nokogiri 1.11.0.rc3 → 1.11.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +1 -1
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +441 -321
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -46
- data/ext/nokogiri/nokogiri.h +22 -26
- data/ext/nokogiri/xml_document.c +2 -2
- data/ext/nokogiri/xml_node.c +1 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -4
- data/lib/nokogiri.rb +1 -1
- data/lib/nokogiri/css/parser.rb +3 -3
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/version.rb +2 -149
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml/document.rb +17 -7
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +50 -27
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +3 -1
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
- data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
- metadata +34 -22
@@ -0,0 +1,182 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "singleton"
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class VersionInfo # :nodoc:
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def jruby?
|
10
|
+
::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
|
11
|
+
end
|
12
|
+
|
13
|
+
def engine
|
14
|
+
defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
|
15
|
+
end
|
16
|
+
|
17
|
+
def loaded_libxml_version
|
18
|
+
Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
|
19
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
20
|
+
.collect(&:to_i)
|
21
|
+
.join("."))
|
22
|
+
end
|
23
|
+
|
24
|
+
def compiled_libxml_version
|
25
|
+
Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
|
26
|
+
end
|
27
|
+
|
28
|
+
def loaded_libxslt_version
|
29
|
+
Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
|
30
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
31
|
+
.collect(&:to_i)
|
32
|
+
.join("."))
|
33
|
+
end
|
34
|
+
|
35
|
+
def compiled_libxslt_version
|
36
|
+
Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
|
37
|
+
end
|
38
|
+
|
39
|
+
def libxml2?
|
40
|
+
defined?(Nokogiri::LIBXML_COMPILED_VERSION)
|
41
|
+
end
|
42
|
+
|
43
|
+
def libxml2_has_iconv?
|
44
|
+
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
45
|
+
end
|
46
|
+
|
47
|
+
def libxml2_using_packaged?
|
48
|
+
libxml2? && Nokogiri::PACKAGED_LIBRARIES
|
49
|
+
end
|
50
|
+
|
51
|
+
def libxml2_using_system?
|
52
|
+
libxml2? && !libxml2_using_packaged?
|
53
|
+
end
|
54
|
+
|
55
|
+
def libxml2_precompiled?
|
56
|
+
libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
|
57
|
+
end
|
58
|
+
|
59
|
+
def warnings
|
60
|
+
warnings = []
|
61
|
+
|
62
|
+
if libxml2?
|
63
|
+
if compiled_libxml_version != loaded_libxml_version
|
64
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
65
|
+
end
|
66
|
+
|
67
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
68
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
warnings
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_hash
|
76
|
+
header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
|
77
|
+
{}.tap do |vi|
|
78
|
+
vi["warnings"] = []
|
79
|
+
vi["nokogiri"] = {}.tap do |nokogiri|
|
80
|
+
nokogiri["version"] = Nokogiri::VERSION
|
81
|
+
|
82
|
+
unless jruby?
|
83
|
+
cppflags = ["-I#{header_directory.shellescape}"]
|
84
|
+
if libxml2_using_packaged?
|
85
|
+
cppflags << "-I#{File.join(header_directory, "include").shellescape}"
|
86
|
+
cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
|
87
|
+
end
|
88
|
+
nokogiri["cppflags"] = cppflags
|
89
|
+
end
|
90
|
+
end
|
91
|
+
vi["ruby"] = {}.tap do |ruby|
|
92
|
+
ruby["version"] = ::RUBY_VERSION
|
93
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
94
|
+
ruby["gem_platform"] = ::Gem::Platform.local.to_s
|
95
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
96
|
+
ruby["engine"] = engine
|
97
|
+
ruby["jruby"] = jruby? if jruby?
|
98
|
+
end
|
99
|
+
|
100
|
+
if libxml2?
|
101
|
+
vi["libxml"] = {}.tap do |libxml|
|
102
|
+
if libxml2_using_packaged?
|
103
|
+
libxml["source"] = "packaged"
|
104
|
+
libxml["precompiled"] = libxml2_precompiled?
|
105
|
+
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
106
|
+
|
107
|
+
# this is for nokogumbo and shouldn't be forever
|
108
|
+
libxml["libxml2_path"] = header_directory
|
109
|
+
else
|
110
|
+
libxml["source"] = "system"
|
111
|
+
end
|
112
|
+
libxml["iconv_enabled"] = libxml2_has_iconv?
|
113
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
114
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
115
|
+
end
|
116
|
+
|
117
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
118
|
+
if libxml2_using_packaged?
|
119
|
+
libxslt["source"] = "packaged"
|
120
|
+
libxslt["precompiled"] = libxml2_precompiled?
|
121
|
+
libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
|
122
|
+
else
|
123
|
+
libxslt["source"] = "system"
|
124
|
+
end
|
125
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
126
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
127
|
+
end
|
128
|
+
|
129
|
+
vi["warnings"] = warnings
|
130
|
+
end
|
131
|
+
|
132
|
+
if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
|
133
|
+
# see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
|
134
|
+
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
135
|
+
elsif jruby?
|
136
|
+
vi["other_libraries"] = {}.tap do |ol|
|
137
|
+
ol["xerces"] = Nokogiri::XERCES_VERSION
|
138
|
+
ol["nekohtml"] = Nokogiri::NEKO_VERSION
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_markdown
|
145
|
+
begin
|
146
|
+
require "psych"
|
147
|
+
rescue LoadError
|
148
|
+
end
|
149
|
+
require "yaml"
|
150
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
151
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
152
|
+
end
|
153
|
+
|
154
|
+
instance.warnings.each do |warning|
|
155
|
+
warn "WARNING: #{warning}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
160
|
+
return false unless VersionInfo.instance.libxml2?
|
161
|
+
return true unless requirement
|
162
|
+
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
163
|
+
end
|
164
|
+
|
165
|
+
def self.jruby? # :nodoc:
|
166
|
+
VersionInfo.instance.jruby?
|
167
|
+
end
|
168
|
+
|
169
|
+
# Ensure constants used in this file are loaded - see #1896
|
170
|
+
if Nokogiri.jruby?
|
171
|
+
require "nokogiri/jruby/dependencies"
|
172
|
+
end
|
173
|
+
begin
|
174
|
+
::RUBY_VERSION =~ /(\d+\.\d+)/
|
175
|
+
require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
|
176
|
+
rescue LoadError
|
177
|
+
require "nokogiri/nokogiri"
|
178
|
+
end
|
179
|
+
|
180
|
+
# More complete version information about libxml
|
181
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
182
|
+
end
|
@@ -1,4 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
2
5
|
module Nokogiri
|
3
6
|
module XML
|
4
7
|
##
|
@@ -44,9 +47,11 @@ module Nokogiri
|
|
44
47
|
#
|
45
48
|
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
46
49
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
47
|
-
|
50
|
+
|
48
51
|
yield options if block_given?
|
49
52
|
|
53
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
54
|
+
|
50
55
|
if empty_doc?(string_or_io)
|
51
56
|
if options.strict?
|
52
57
|
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
@@ -56,12 +61,17 @@ module Nokogiri
|
|
56
61
|
end
|
57
62
|
|
58
63
|
doc = if string_or_io.respond_to?(:read)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
64
|
+
if string_or_io.is_a?(Pathname)
|
65
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
66
|
+
string_or_io = string_or_io.expand_path.open
|
67
|
+
url ||= string_or_io.path
|
68
|
+
end
|
69
|
+
|
70
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
71
|
+
else
|
72
|
+
# read_memory pukes on empty docs
|
73
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
74
|
+
end
|
65
75
|
|
66
76
|
# do xinclude processing
|
67
77
|
doc.do_xinclude(options) if options.xinclude?
|
@@ -141,6 +141,10 @@ module Nokogiri
|
|
141
141
|
document.errors = things
|
142
142
|
end
|
143
143
|
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
144
148
|
private
|
145
149
|
|
146
150
|
# fix for issue 770
|
@@ -150,12 +154,6 @@ module Nokogiri
|
|
150
154
|
%Q{xmlns#{prefix}="#{namespace.href}"}
|
151
155
|
end.join ' '
|
152
156
|
end
|
153
|
-
|
154
|
-
def coerce data
|
155
|
-
return super unless String === data
|
156
|
-
|
157
|
-
document.fragment(data).children
|
158
|
-
end
|
159
157
|
end
|
160
158
|
end
|
161
159
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -267,6 +267,8 @@ module Nokogiri
|
|
267
267
|
#
|
268
268
|
# Also see related method +swap+.
|
269
269
|
def replace(node_or_tags)
|
270
|
+
raise("Cannot replace a node with no parent") unless parent
|
271
|
+
|
270
272
|
# We cannot replace a text node directly, otherwise libxml will return
|
271
273
|
# an internal error at parser.c:13031, I don't know exactly why
|
272
274
|
# libxml is trying to find a parent node that is an element or document
|
@@ -278,7 +280,7 @@ module Nokogiri
|
|
278
280
|
return replacee.replace node_or_tags
|
279
281
|
end
|
280
282
|
|
281
|
-
node_or_tags = coerce(node_or_tags)
|
283
|
+
node_or_tags = parent.coerce(node_or_tags)
|
282
284
|
|
283
285
|
if node_or_tags.is_a?(XML::NodeSet)
|
284
286
|
node_or_tags.each { |n| add_previous_sibling n }
|
@@ -819,13 +821,30 @@ module Nokogiri
|
|
819
821
|
|
820
822
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
821
823
|
|
822
|
-
|
823
|
-
#
|
824
|
+
# libxml2 does not obey the `recover` option after encountering errors during `in_context`
|
825
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
826
|
+
#
|
827
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
828
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
829
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
830
|
+
# that's not easily prevented (or even detected).
|
831
|
+
#
|
832
|
+
# I think preferable behavior would be to either:
|
833
|
+
#
|
834
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
|
835
|
+
# b. don't recover, but raise a sensible exception
|
836
|
+
#
|
837
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
838
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
824
839
|
error_count = document.errors.length
|
825
840
|
node_set = in_context(contents, options.to_i)
|
826
|
-
if node_set.empty?
|
827
|
-
|
828
|
-
|
841
|
+
if (node_set.empty? && (document.errors.length > error_count))
|
842
|
+
if options.recover?
|
843
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
844
|
+
node_set = fragment.children
|
845
|
+
else
|
846
|
+
raise document.errors[error_count]
|
847
|
+
end
|
829
848
|
end
|
830
849
|
node_set
|
831
850
|
end
|
@@ -1128,6 +1147,28 @@ module Nokogiri
|
|
1128
1147
|
|
1129
1148
|
# @!endgroup
|
1130
1149
|
|
1150
|
+
protected
|
1151
|
+
|
1152
|
+
def coerce(data)
|
1153
|
+
case data
|
1154
|
+
when XML::NodeSet
|
1155
|
+
return data
|
1156
|
+
when XML::DocumentFragment
|
1157
|
+
return data.children
|
1158
|
+
when String
|
1159
|
+
return fragment(data).children
|
1160
|
+
when Document, XML::Attr
|
1161
|
+
# unacceptable
|
1162
|
+
when XML::Node
|
1163
|
+
return data
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
raise ArgumentError, <<-EOERR
|
1167
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1168
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1169
|
+
EOERR
|
1170
|
+
end
|
1171
|
+
|
1131
1172
|
private
|
1132
1173
|
|
1133
1174
|
def keywordify(keywords)
|
@@ -1142,10 +1183,12 @@ module Nokogiri
|
|
1142
1183
|
end
|
1143
1184
|
|
1144
1185
|
def add_sibling(next_or_previous, node_or_tags)
|
1186
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
1187
|
+
|
1145
1188
|
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
|
1146
1189
|
iter = (next_or_previous == :next) ? :reverse_each : :each
|
1147
1190
|
|
1148
|
-
node_or_tags = coerce
|
1191
|
+
node_or_tags = parent.coerce(node_or_tags)
|
1149
1192
|
if node_or_tags.is_a?(XML::NodeSet)
|
1150
1193
|
if text?
|
1151
1194
|
pivot = Nokogiri::XML::Node.new "dummy", document
|
@@ -1182,26 +1225,6 @@ module Nokogiri
|
|
1182
1225
|
[:name, :namespace, :attribute_nodes, :children]
|
1183
1226
|
end
|
1184
1227
|
|
1185
|
-
def coerce(data)
|
1186
|
-
case data
|
1187
|
-
when XML::NodeSet
|
1188
|
-
return data
|
1189
|
-
when XML::DocumentFragment
|
1190
|
-
return data.children
|
1191
|
-
when String
|
1192
|
-
return fragment(data).children
|
1193
|
-
when Document, XML::Attr
|
1194
|
-
# unacceptable
|
1195
|
-
when XML::Node
|
1196
|
-
return data
|
1197
|
-
end
|
1198
|
-
|
1199
|
-
raise ArgumentError, <<-EOERR
|
1200
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1201
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1202
|
-
EOERR
|
1203
|
-
end
|
1204
|
-
|
1205
1228
|
# @private
|
1206
1229
|
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
|
1207
1230
|
|
@@ -73,6 +73,8 @@ module Nokogiri
|
|
73
73
|
DEFAULT_XML = RECOVER | NONET
|
74
74
|
# the default options used for parsing HTML documents
|
75
75
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
76
|
+
# the default options used for parsing XML schemas
|
77
|
+
DEFAULT_SCHEMA = NONET
|
76
78
|
|
77
79
|
attr_accessor :options
|
78
80
|
def initialize options = STRICT
|
@@ -107,6 +109,10 @@ module Nokogiri
|
|
107
109
|
@options & RECOVER == STRICT
|
108
110
|
end
|
109
111
|
|
112
|
+
def ==(other)
|
113
|
+
other.to_i == to_i
|
114
|
+
end
|
115
|
+
|
110
116
|
alias :to_i :options
|
111
117
|
|
112
118
|
def inspect
|
@@ -5,8 +5,8 @@ module Nokogiri
|
|
5
5
|
###
|
6
6
|
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
7
7
|
# See Nokogiri::XML::RelaxNG for an example.
|
8
|
-
def RelaxNG
|
9
|
-
RelaxNG.new(string_or_io)
|
8
|
+
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
RelaxNG.new(string_or_io, options)
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
@@ -27,6 +27,10 @@ module Nokogiri
|
|
27
27
|
# end
|
28
28
|
#
|
29
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
|
32
|
+
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
|
33
|
+
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
|
30
34
|
class RelaxNG < Nokogiri::XML::Schema
|
31
35
|
end
|
32
36
|
end
|
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -5,8 +5,8 @@ module Nokogiri
|
|
5
5
|
###
|
6
6
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
7
7
|
# object.
|
8
|
-
def Schema
|
9
|
-
Schema.new(string_or_io)
|
8
|
+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
Schema.new(string_or_io, options)
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
@@ -27,15 +27,23 @@ module Nokogiri
|
|
27
27
|
# end
|
28
28
|
#
|
29
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
|
32
|
+
# are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
|
33
|
+
# documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
|
34
|
+
# security policy. If a document is trusted, then the caller may turn off the NONET option via
|
35
|
+
# the ParseOptions to re-enable external entity resolution over a network connection.
|
30
36
|
class Schema
|
31
37
|
# Errors while parsing the schema file
|
32
38
|
attr_accessor :errors
|
39
|
+
# The Nokogiri::XML::ParseOptions used to parse the schema
|
40
|
+
attr_accessor :parse_options
|
33
41
|
|
34
42
|
###
|
35
43
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
36
44
|
# object.
|
37
|
-
def self.new string_or_io
|
38
|
-
from_document
|
45
|
+
def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
|
46
|
+
from_document(Nokogiri::XML(string_or_io), options)
|
39
47
|
end
|
40
48
|
|
41
49
|
###
|