nokogiri 1.10.10 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +176 -96
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +716 -414
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +267 -195
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +493 -407
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +197 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +96 -46
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +158 -73
- data/ext/nokogiri/xslt_stylesheet.c +158 -164
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +64 -63
- data/lib/nokogiri/css/parser.y +3 -3
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +17 -30
- data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +41 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +138 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +629 -293
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -3
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +36 -36
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +32 -51
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +139 -161
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -0,0 +1,215 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "singleton"
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class VersionInfo # :nodoc:
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def jruby?
|
10
|
+
::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
|
11
|
+
end
|
12
|
+
|
13
|
+
def windows?
|
14
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
15
|
+
end
|
16
|
+
|
17
|
+
def ruby_minor
|
18
|
+
Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
|
19
|
+
end
|
20
|
+
|
21
|
+
def engine
|
22
|
+
defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
|
23
|
+
end
|
24
|
+
|
25
|
+
def loaded_libxml_version
|
26
|
+
Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
|
27
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
28
|
+
.collect(&:to_i)
|
29
|
+
.join("."))
|
30
|
+
end
|
31
|
+
|
32
|
+
def compiled_libxml_version
|
33
|
+
Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
|
34
|
+
end
|
35
|
+
|
36
|
+
def loaded_libxslt_version
|
37
|
+
Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
|
38
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
39
|
+
.collect(&:to_i)
|
40
|
+
.join("."))
|
41
|
+
end
|
42
|
+
|
43
|
+
def compiled_libxslt_version
|
44
|
+
Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
|
45
|
+
end
|
46
|
+
|
47
|
+
def libxml2?
|
48
|
+
defined?(Nokogiri::LIBXML_COMPILED_VERSION)
|
49
|
+
end
|
50
|
+
|
51
|
+
def libxml2_has_iconv?
|
52
|
+
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
53
|
+
end
|
54
|
+
|
55
|
+
def libxslt_has_datetime?
|
56
|
+
defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
|
57
|
+
end
|
58
|
+
|
59
|
+
def libxml2_using_packaged?
|
60
|
+
libxml2? && Nokogiri::PACKAGED_LIBRARIES
|
61
|
+
end
|
62
|
+
|
63
|
+
def libxml2_using_system?
|
64
|
+
libxml2? && !libxml2_using_packaged?
|
65
|
+
end
|
66
|
+
|
67
|
+
def libxml2_precompiled?
|
68
|
+
libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
|
69
|
+
end
|
70
|
+
|
71
|
+
def warnings
|
72
|
+
warnings = []
|
73
|
+
|
74
|
+
if libxml2?
|
75
|
+
if compiled_libxml_version != loaded_libxml_version
|
76
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
77
|
+
end
|
78
|
+
|
79
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
80
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
warnings
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_hash
|
88
|
+
header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
|
89
|
+
|
90
|
+
{}.tap do |vi|
|
91
|
+
vi["warnings"] = []
|
92
|
+
vi["nokogiri"] = {}.tap do |nokogiri|
|
93
|
+
nokogiri["version"] = Nokogiri::VERSION
|
94
|
+
|
95
|
+
unless jruby?
|
96
|
+
# enable gems like nokogumbo to build with the following in their extconf.rb:
|
97
|
+
#
|
98
|
+
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
99
|
+
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
100
|
+
#
|
101
|
+
cppflags = ["-I#{header_directory.shellescape}"]
|
102
|
+
ldflags = []
|
103
|
+
|
104
|
+
if libxml2_using_packaged?
|
105
|
+
cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
|
106
|
+
cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
|
107
|
+
|
108
|
+
if windows?
|
109
|
+
# on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
|
110
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
111
|
+
unless File.exist?(lib_directory)
|
112
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
113
|
+
end
|
114
|
+
ldflags << "-L#{lib_directory.shellescape}"
|
115
|
+
ldflags << "-l:nokogiri.so"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
nokogiri["cppflags"] = cppflags
|
120
|
+
nokogiri["ldflags"] = ldflags
|
121
|
+
end
|
122
|
+
end
|
123
|
+
vi["ruby"] = {}.tap do |ruby|
|
124
|
+
ruby["version"] = ::RUBY_VERSION
|
125
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
126
|
+
ruby["gem_platform"] = ::Gem::Platform.local.to_s
|
127
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
128
|
+
ruby["engine"] = engine
|
129
|
+
ruby["jruby"] = jruby? if jruby?
|
130
|
+
end
|
131
|
+
|
132
|
+
if libxml2?
|
133
|
+
vi["libxml"] = {}.tap do |libxml|
|
134
|
+
if libxml2_using_packaged?
|
135
|
+
libxml["source"] = "packaged"
|
136
|
+
libxml["precompiled"] = libxml2_precompiled?
|
137
|
+
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
138
|
+
|
139
|
+
# this is for nokogumbo and shouldn't be forever
|
140
|
+
libxml["libxml2_path"] = header_directory
|
141
|
+
else
|
142
|
+
libxml["source"] = "system"
|
143
|
+
end
|
144
|
+
libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
|
145
|
+
libxml["iconv_enabled"] = libxml2_has_iconv?
|
146
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
147
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
148
|
+
end
|
149
|
+
|
150
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
151
|
+
if libxml2_using_packaged?
|
152
|
+
libxslt["source"] = "packaged"
|
153
|
+
libxslt["precompiled"] = libxml2_precompiled?
|
154
|
+
libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
|
155
|
+
else
|
156
|
+
libxslt["source"] = "system"
|
157
|
+
end
|
158
|
+
libxslt["datetime_enabled"] = libxslt_has_datetime?
|
159
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
160
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
161
|
+
end
|
162
|
+
|
163
|
+
vi["warnings"] = warnings
|
164
|
+
end
|
165
|
+
|
166
|
+
if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
|
167
|
+
# see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
|
168
|
+
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
169
|
+
elsif jruby?
|
170
|
+
vi["other_libraries"] = {}.tap do |ol|
|
171
|
+
ol["xerces"] = Nokogiri::XERCES_VERSION
|
172
|
+
ol["nekohtml"] = Nokogiri::NEKO_VERSION
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def to_markdown
|
179
|
+
begin
|
180
|
+
require "psych"
|
181
|
+
rescue LoadError
|
182
|
+
end
|
183
|
+
require "yaml"
|
184
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
185
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
186
|
+
end
|
187
|
+
|
188
|
+
instance.warnings.each do |warning|
|
189
|
+
warn "WARNING: #{warning}"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
194
|
+
return false unless VersionInfo.instance.libxml2?
|
195
|
+
return true unless requirement
|
196
|
+
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.uses_gumbo?
|
200
|
+
uses_libxml? # TODO: replace with Gumbo functionality
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.jruby? # :nodoc:
|
204
|
+
VersionInfo.instance.jruby?
|
205
|
+
end
|
206
|
+
|
207
|
+
# Ensure constants used in this file are loaded - see #1896
|
208
|
+
if Nokogiri.jruby?
|
209
|
+
require_relative "../jruby/dependencies"
|
210
|
+
end
|
211
|
+
require_relative "../extension"
|
212
|
+
|
213
|
+
# More complete version information about libxml
|
214
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
215
|
+
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,109 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
class VersionInfo # :nodoc:
|
6
|
-
def jruby?
|
7
|
-
::JRUBY_VERSION if RUBY_PLATFORM == "java"
|
8
|
-
end
|
9
|
-
|
10
|
-
def engine
|
11
|
-
defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
|
12
|
-
end
|
13
|
-
|
14
|
-
def loaded_parser_version
|
15
|
-
LIBXML_PARSER_VERSION.
|
16
|
-
scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
|
17
|
-
collect(&:to_i).
|
18
|
-
join(".")
|
19
|
-
end
|
20
|
-
|
21
|
-
def compiled_parser_version
|
22
|
-
LIBXML_VERSION
|
23
|
-
end
|
24
|
-
|
25
|
-
def libxml2?
|
26
|
-
defined?(LIBXML_VERSION)
|
27
|
-
end
|
28
|
-
|
29
|
-
def libxml2_using_system?
|
30
|
-
!libxml2_using_packaged?
|
31
|
-
end
|
32
|
-
|
33
|
-
def libxml2_using_packaged?
|
34
|
-
NOKOGIRI_USE_PACKAGED_LIBRARIES
|
35
|
-
end
|
36
|
-
|
37
|
-
def warnings
|
38
|
-
return [] unless libxml2?
|
39
|
-
|
40
|
-
if compiled_parser_version != loaded_parser_version
|
41
|
-
["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
|
42
|
-
else
|
43
|
-
[]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def to_hash
|
48
|
-
hash_info = {}
|
49
|
-
hash_info["warnings"] = []
|
50
|
-
hash_info["nokogiri"] = Nokogiri::VERSION
|
51
|
-
hash_info["ruby"] = {}
|
52
|
-
hash_info["ruby"]["version"] = ::RUBY_VERSION
|
53
|
-
hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
|
54
|
-
hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
|
55
|
-
hash_info["ruby"]["engine"] = engine
|
56
|
-
hash_info["ruby"]["jruby"] = jruby? if jruby?
|
57
|
-
|
58
|
-
if libxml2?
|
59
|
-
hash_info["libxml"] = {}
|
60
|
-
hash_info["libxml"]["binding"] = "extension"
|
61
|
-
if libxml2_using_packaged?
|
62
|
-
hash_info["libxml"]["source"] = "packaged"
|
63
|
-
hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
|
64
|
-
hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
|
65
|
-
hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
|
66
|
-
hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
|
67
|
-
else
|
68
|
-
hash_info["libxml"]["source"] = "system"
|
69
|
-
end
|
70
|
-
hash_info["libxml"]["compiled"] = compiled_parser_version
|
71
|
-
hash_info["libxml"]["loaded"] = loaded_parser_version
|
72
|
-
hash_info["warnings"] = warnings
|
73
|
-
elsif jruby?
|
74
|
-
hash_info["xerces"] = Nokogiri::XERCES_VERSION
|
75
|
-
hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
|
76
|
-
end
|
77
|
-
|
78
|
-
hash_info
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_markdown
|
82
|
-
begin
|
83
|
-
require "psych"
|
84
|
-
rescue LoadError
|
85
|
-
end
|
86
|
-
require "yaml"
|
87
|
-
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
88
|
-
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
89
|
-
end
|
90
|
-
|
91
|
-
# FIXME: maybe switch to singleton?
|
92
|
-
@@instance = new
|
93
|
-
@@instance.warnings.each do |warning|
|
94
|
-
warn "WARNING: #{warning}"
|
95
|
-
end
|
96
|
-
def self.instance; @@instance; end
|
97
|
-
end
|
98
|
-
|
99
|
-
# More complete version information about libxml
|
100
|
-
VERSION_INFO = VersionInfo.instance.to_hash
|
101
|
-
|
102
|
-
def self.uses_libxml? # :nodoc:
|
103
|
-
VersionInfo.instance.libxml2?
|
104
|
-
end
|
105
|
-
|
106
|
-
def self.jruby? # :nodoc:
|
107
|
-
VersionInfo.instance.jruby?
|
108
|
-
end
|
109
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "version/constant"
|
3
|
+
require_relative "version/info"
|
data/lib/nokogiri/xml/attr.rb
CHANGED
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -195,6 +196,41 @@ module Nokogiri
|
|
195
196
|
#
|
196
197
|
# Note the "foo:object" tag.
|
197
198
|
#
|
199
|
+
# === Namespace inheritance
|
200
|
+
#
|
201
|
+
# In the Builder context, children will inherit their parent's namespace. This is the same
|
202
|
+
# behavior as if the underlying {XML::Document} set +namespace_inheritance+ to +true+:
|
203
|
+
#
|
204
|
+
# result = Nokogiri::XML::Builder.new do |xml|
|
205
|
+
# xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
|
206
|
+
# xml.Header
|
207
|
+
# end
|
208
|
+
# end
|
209
|
+
# result.doc.to_xml
|
210
|
+
# # => <?xml version="1.0" encoding="utf-8"?>
|
211
|
+
# # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
|
212
|
+
# # <soapenv:Header/>
|
213
|
+
# # </soapenv:Envelope>
|
214
|
+
#
|
215
|
+
# Users may turn this behavior off by passing a keyword argument +namespace_inheritance:false+
|
216
|
+
# to the initializer:
|
217
|
+
#
|
218
|
+
# result = Nokogiri::XML::Builder.new(namespace_inheritance: false) do |xml|
|
219
|
+
# xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
|
220
|
+
# xml.Header
|
221
|
+
# xml["soapenv"].Body # users may explicitly opt into the namespace
|
222
|
+
# end
|
223
|
+
# end
|
224
|
+
# result.doc.to_xml
|
225
|
+
# # => <?xml version="1.0" encoding="utf-8"?>
|
226
|
+
# # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
|
227
|
+
# # <Header/>
|
228
|
+
# # <soapenv:Body/>
|
229
|
+
# # </soapenv:Envelope>
|
230
|
+
#
|
231
|
+
# For more information on namespace inheritance, please see {XML::Document#namespace_inheritance}
|
232
|
+
#
|
233
|
+
#
|
198
234
|
# == Document Types
|
199
235
|
#
|
200
236
|
# To create a document type (DTD), access use the Builder#doc method to get
|
@@ -225,6 +261,8 @@ module Nokogiri
|
|
225
261
|
# </root>
|
226
262
|
#
|
227
263
|
class Builder
|
264
|
+
DEFAULT_DOCUMENT_OPTIONS = {namespace_inheritance: true}
|
265
|
+
|
228
266
|
# The current Document object being built
|
229
267
|
attr_accessor :doc
|
230
268
|
|
@@ -244,8 +282,8 @@ module Nokogiri
|
|
244
282
|
#
|
245
283
|
# For example:
|
246
284
|
#
|
247
|
-
# doc = Nokogiri::XML(
|
248
|
-
# Nokogiri::XML::Builder.with(doc.
|
285
|
+
# doc = Nokogiri::XML(File.read('somedoc.xml'))
|
286
|
+
# Nokogiri::XML::Builder.with(doc.at_css('some_tag')) do |xml|
|
249
287
|
# # ... Use normal builder methods here ...
|
250
288
|
# xml.awesome # add the "awesome" tag below "some_tag"
|
251
289
|
# end
|
@@ -281,6 +319,7 @@ module Nokogiri
|
|
281
319
|
@arity = nil
|
282
320
|
@ns = nil
|
283
321
|
|
322
|
+
options = DEFAULT_DOCUMENT_OPTIONS.merge(options)
|
284
323
|
options.each do |k, v|
|
285
324
|
@doc.send(:"#{k}=", v)
|
286
325
|
end
|
data/lib/nokogiri/xml/cdata.rb
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'pathname'
|
5
|
+
|
1
6
|
module Nokogiri
|
2
7
|
module XML
|
3
8
|
##
|
@@ -9,11 +14,12 @@ module Nokogiri
|
|
9
14
|
# Nokogiri::XML::Searchable#xpath
|
10
15
|
#
|
11
16
|
class Document < Nokogiri::XML::Node
|
12
|
-
#
|
13
|
-
#
|
17
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
18
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
19
|
+
# characters in NCNAMEs.
|
14
20
|
NCNAME_START_CHAR = "A-Za-z_"
|
15
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
16
|
-
NCNAME_RE = /^xmlns(
|
21
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
22
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
17
23
|
|
18
24
|
##
|
19
25
|
# Parse an XML file.
|
@@ -43,9 +49,11 @@ module Nokogiri
|
|
43
49
|
#
|
44
50
|
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
45
51
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
46
|
-
|
52
|
+
|
47
53
|
yield options if block_given?
|
48
54
|
|
55
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
56
|
+
|
49
57
|
if empty_doc?(string_or_io)
|
50
58
|
if options.strict?
|
51
59
|
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
@@ -55,12 +63,17 @@ module Nokogiri
|
|
55
63
|
end
|
56
64
|
|
57
65
|
doc = if string_or_io.respond_to?(:read)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
if string_or_io.is_a?(Pathname)
|
67
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
68
|
+
string_or_io = string_or_io.expand_path.open
|
69
|
+
url ||= string_or_io.path
|
70
|
+
end
|
71
|
+
|
72
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
73
|
+
else
|
74
|
+
# read_memory pukes on empty docs
|
75
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
76
|
+
end
|
64
77
|
|
65
78
|
# do xinclude processing
|
66
79
|
doc.do_xinclude(options) if options.xinclude?
|
@@ -68,42 +81,142 @@ module Nokogiri
|
|
68
81
|
return doc
|
69
82
|
end
|
70
83
|
|
84
|
+
##
|
85
|
+
# @!method wrap(java_document)
|
86
|
+
# @!scope class
|
87
|
+
#
|
88
|
+
# Create a {Document} using an existing Java DOM document object.
|
89
|
+
#
|
90
|
+
# The returned {Document} shares the same underlying data structure as the Java object, so
|
91
|
+
# changes in one are reflected in the other.
|
92
|
+
#
|
93
|
+
# @param java_document [Java::OrgW3cDom::Document]
|
94
|
+
# @return [Nokogiri::XML::Document]
|
95
|
+
# @note This method is only available when running JRuby.
|
96
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
97
|
+
# @see #to_java
|
98
|
+
|
99
|
+
##
|
100
|
+
# @!method to_java()
|
101
|
+
#
|
102
|
+
# Returns the underlying Java DOM document object for the {Document}.
|
103
|
+
#
|
104
|
+
# The returned Java object shares the same underlying data structure as the {Document}, so
|
105
|
+
# changes in one are reflected in the other.
|
106
|
+
#
|
107
|
+
# @return [Java::OrgW3cDom::Document]
|
108
|
+
# @note This method is only available when running JRuby.
|
109
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
110
|
+
# @see .wrap
|
111
|
+
|
112
|
+
|
71
113
|
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
72
114
|
attr_accessor :errors
|
73
115
|
|
116
|
+
# When true, reparented elements without a namespace will inherit their new parent's
|
117
|
+
# namespace (if one exists). Defaults to +false+.
|
118
|
+
#
|
119
|
+
# @example Default behavior of namespace inheritance
|
120
|
+
# xml = <<~EOF
|
121
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
122
|
+
# <foo:parent>
|
123
|
+
# </foo:parent>
|
124
|
+
# </root>
|
125
|
+
# EOF
|
126
|
+
# doc = Nokogiri::XML(xml)
|
127
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
128
|
+
# parent.add_child("<child></child>")
|
129
|
+
# doc.to_xml
|
130
|
+
# # => <?xml version="1.0"?>
|
131
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
132
|
+
# # <foo:parent>
|
133
|
+
# # <child/>
|
134
|
+
# # </foo:parent>
|
135
|
+
# # </root>
|
136
|
+
#
|
137
|
+
# @example Setting namespace inheritance to +true+
|
138
|
+
# xml = <<~EOF
|
139
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
140
|
+
# <foo:parent>
|
141
|
+
# </foo:parent>
|
142
|
+
# </root>
|
143
|
+
# EOF
|
144
|
+
# doc = Nokogiri::XML(xml)
|
145
|
+
# doc.namespace_inheritance = true
|
146
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
147
|
+
# parent.add_child("<child></child>")
|
148
|
+
# doc.to_xml
|
149
|
+
# # => <?xml version="1.0"?>
|
150
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
151
|
+
# # <foo:parent>
|
152
|
+
# # <foo:child/>
|
153
|
+
# # </foo:parent>
|
154
|
+
# # </root>
|
155
|
+
#
|
156
|
+
# @return [Boolean]
|
157
|
+
#
|
158
|
+
# @since v1.12.4
|
159
|
+
attr_accessor :namespace_inheritance
|
160
|
+
|
74
161
|
def initialize *args # :nodoc:
|
75
162
|
@errors = []
|
76
163
|
@decorators = nil
|
164
|
+
@namespace_inheritance = false
|
77
165
|
end
|
78
166
|
|
79
167
|
##
|
80
|
-
# Create
|
168
|
+
# Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
|
169
|
+
# setting contents or attributes.
|
170
|
+
#
|
171
|
+
# Arguments may be passed to initialize the element:
|
172
|
+
# - a +Hash+ argument will be used to set attributes
|
173
|
+
# - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
|
174
|
+
#
|
175
|
+
# A block may be passed to mutate the node.
|
176
|
+
#
|
177
|
+
# @param name [String]
|
178
|
+
# @param contents_or_attrs [#to_s,Hash]
|
179
|
+
# @yieldparam node [Nokogiri::XML::Element]
|
180
|
+
# @return [Nokogiri::XML::Element]
|
181
|
+
#
|
182
|
+
# @example An empty element without attributes
|
183
|
+
# doc.create_element("div")
|
184
|
+
# # => <div></div>
|
185
|
+
#
|
186
|
+
# @example An element with contents
|
187
|
+
# doc.create_element("div", "contents")
|
188
|
+
# # => <div>contents</div>
|
81
189
|
#
|
82
|
-
#
|
83
|
-
# doc.create_element
|
84
|
-
#
|
85
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
86
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
190
|
+
# @example An element with attributes
|
191
|
+
# doc.create_element("div", {"class" => "container"})
|
192
|
+
# # => <div class='container'></div>
|
87
193
|
#
|
88
|
-
|
194
|
+
# @example An element with contents and attributes
|
195
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
196
|
+
# # => <div class='container'>contents</div>
|
197
|
+
#
|
198
|
+
# @example Passing a block to mutate the element
|
199
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
200
|
+
#
|
201
|
+
def create_element(name, *contents_or_attrs, &block)
|
89
202
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
90
|
-
|
203
|
+
contents_or_attrs.each do |arg|
|
91
204
|
case arg
|
92
205
|
when Hash
|
93
|
-
arg.each
|
206
|
+
arg.each do |k, v|
|
94
207
|
key = k.to_s
|
95
208
|
if key =~ NCNAME_RE
|
96
|
-
ns_name =
|
97
|
-
elm.add_namespace_definition
|
209
|
+
ns_name = Regexp.last_match(1)
|
210
|
+
elm.add_namespace_definition(ns_name, v)
|
98
211
|
else
|
99
212
|
elm[k.to_s] = v.to_s
|
100
213
|
end
|
101
|
-
|
214
|
+
end
|
102
215
|
else
|
103
216
|
elm.content = arg
|
104
217
|
end
|
105
218
|
end
|
106
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
219
|
+
if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
|
107
220
|
elm.namespace = ns
|
108
221
|
end
|
109
222
|
elm
|
@@ -251,30 +364,14 @@ module Nokogiri
|
|
251
364
|
end
|
252
365
|
alias :<< :add_child
|
253
366
|
|
254
|
-
##
|
255
|
-
# +JRuby+
|
256
|
-
# Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
|
257
|
-
def self.wrap document
|
258
|
-
raise "JRuby only method" unless Nokogiri.jruby?
|
259
|
-
return wrapJavaDocument(document)
|
260
|
-
end
|
261
|
-
|
262
|
-
##
|
263
|
-
# +JRuby+
|
264
|
-
# Returns Java's org.w3c.dom.document of this Document.
|
265
|
-
def to_java
|
266
|
-
raise "JRuby only method" unless Nokogiri.jruby?
|
267
|
-
return toJavaDocument()
|
268
|
-
end
|
269
|
-
|
270
367
|
private
|
368
|
+
|
271
369
|
def self.empty_doc? string_or_io
|
272
370
|
string_or_io.nil? ||
|
273
371
|
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
274
372
|
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
275
373
|
end
|
276
374
|
|
277
|
-
# @private
|
278
375
|
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
279
376
|
|
280
377
|
def inspect_attributes
|