nokogiri 1.11.0.rc3 → 1.11.0.rc4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +1015 -947
  3. data/README.md +1 -1
  4. data/ext/nokogiri/depend +476 -357
  5. data/ext/nokogiri/extconf.rb +441 -321
  6. data/ext/nokogiri/html_document.c +79 -78
  7. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  8. data/ext/nokogiri/nokogiri.c +34 -46
  9. data/ext/nokogiri/nokogiri.h +22 -26
  10. data/ext/nokogiri/xml_document.c +2 -2
  11. data/ext/nokogiri/xml_node.c +1 -1
  12. data/ext/nokogiri/xml_node_set.c +1 -1
  13. data/ext/nokogiri/xml_relax_ng.c +29 -11
  14. data/ext/nokogiri/xml_sax_parser.c +2 -7
  15. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  16. data/ext/nokogiri/xml_schema.c +55 -13
  17. data/ext/nokogiri/xml_xpath_context.c +80 -4
  18. data/ext/nokogiri/xslt_stylesheet.c +1 -4
  19. data/lib/nokogiri.rb +1 -1
  20. data/lib/nokogiri/css/parser.rb +3 -3
  21. data/lib/nokogiri/css/parser.y +2 -2
  22. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  23. data/lib/nokogiri/html/document.rb +12 -26
  24. data/lib/nokogiri/version.rb +2 -149
  25. data/lib/nokogiri/version/constant.rb +5 -0
  26. data/lib/nokogiri/version/info.rb +182 -0
  27. data/lib/nokogiri/xml/document.rb +17 -7
  28. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  29. data/lib/nokogiri/xml/node.rb +50 -27
  30. data/lib/nokogiri/xml/parse_options.rb +6 -0
  31. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  32. data/lib/nokogiri/xml/schema.rb +12 -4
  33. data/lib/nokogiri/xml/searchable.rb +3 -1
  34. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  35. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  36. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  37. metadata +34 -22
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ # The version of Nokogiri you are using
4
+ VERSION = "1.11.0.rc4"
5
+ end
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def engine
14
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
15
+ end
16
+
17
+ def loaded_libxml_version
18
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
19
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
20
+ .collect(&:to_i)
21
+ .join("."))
22
+ end
23
+
24
+ def compiled_libxml_version
25
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
26
+ end
27
+
28
+ def loaded_libxslt_version
29
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
30
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
31
+ .collect(&:to_i)
32
+ .join("."))
33
+ end
34
+
35
+ def compiled_libxslt_version
36
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
37
+ end
38
+
39
+ def libxml2?
40
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
41
+ end
42
+
43
+ def libxml2_has_iconv?
44
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
45
+ end
46
+
47
+ def libxml2_using_packaged?
48
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
49
+ end
50
+
51
+ def libxml2_using_system?
52
+ libxml2? && !libxml2_using_packaged?
53
+ end
54
+
55
+ def libxml2_precompiled?
56
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
57
+ end
58
+
59
+ def warnings
60
+ warnings = []
61
+
62
+ if libxml2?
63
+ if compiled_libxml_version != loaded_libxml_version
64
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
65
+ end
66
+
67
+ if compiled_libxslt_version != loaded_libxslt_version
68
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
69
+ end
70
+ end
71
+
72
+ warnings
73
+ end
74
+
75
+ def to_hash
76
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
77
+ {}.tap do |vi|
78
+ vi["warnings"] = []
79
+ vi["nokogiri"] = {}.tap do |nokogiri|
80
+ nokogiri["version"] = Nokogiri::VERSION
81
+
82
+ unless jruby?
83
+ cppflags = ["-I#{header_directory.shellescape}"]
84
+ if libxml2_using_packaged?
85
+ cppflags << "-I#{File.join(header_directory, "include").shellescape}"
86
+ cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
87
+ end
88
+ nokogiri["cppflags"] = cppflags
89
+ end
90
+ end
91
+ vi["ruby"] = {}.tap do |ruby|
92
+ ruby["version"] = ::RUBY_VERSION
93
+ ruby["platform"] = ::RUBY_PLATFORM
94
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
95
+ ruby["description"] = ::RUBY_DESCRIPTION
96
+ ruby["engine"] = engine
97
+ ruby["jruby"] = jruby? if jruby?
98
+ end
99
+
100
+ if libxml2?
101
+ vi["libxml"] = {}.tap do |libxml|
102
+ if libxml2_using_packaged?
103
+ libxml["source"] = "packaged"
104
+ libxml["precompiled"] = libxml2_precompiled?
105
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
106
+
107
+ # this is for nokogumbo and shouldn't be forever
108
+ libxml["libxml2_path"] = header_directory
109
+ else
110
+ libxml["source"] = "system"
111
+ end
112
+ libxml["iconv_enabled"] = libxml2_has_iconv?
113
+ libxml["compiled"] = compiled_libxml_version.to_s
114
+ libxml["loaded"] = loaded_libxml_version.to_s
115
+ end
116
+
117
+ vi["libxslt"] = {}.tap do |libxslt|
118
+ if libxml2_using_packaged?
119
+ libxslt["source"] = "packaged"
120
+ libxslt["precompiled"] = libxml2_precompiled?
121
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
122
+ else
123
+ libxslt["source"] = "system"
124
+ end
125
+ libxslt["compiled"] = compiled_libxslt_version.to_s
126
+ libxslt["loaded"] = loaded_libxslt_version.to_s
127
+ end
128
+
129
+ vi["warnings"] = warnings
130
+ end
131
+
132
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
133
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
134
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
135
+ elsif jruby?
136
+ vi["other_libraries"] = {}.tap do |ol|
137
+ ol["xerces"] = Nokogiri::XERCES_VERSION
138
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ def to_markdown
145
+ begin
146
+ require "psych"
147
+ rescue LoadError
148
+ end
149
+ require "yaml"
150
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
151
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
152
+ end
153
+
154
+ instance.warnings.each do |warning|
155
+ warn "WARNING: #{warning}"
156
+ end
157
+ end
158
+
159
+ def self.uses_libxml?(requirement = nil) # :nodoc:
160
+ return false unless VersionInfo.instance.libxml2?
161
+ return true unless requirement
162
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
163
+ end
164
+
165
+ def self.jruby? # :nodoc:
166
+ VersionInfo.instance.jruby?
167
+ end
168
+
169
+ # Ensure constants used in this file are loaded - see #1896
170
+ if Nokogiri.jruby?
171
+ require "nokogiri/jruby/dependencies"
172
+ end
173
+ begin
174
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
175
+ require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
176
+ rescue LoadError
177
+ require "nokogiri/nokogiri"
178
+ end
179
+
180
+ # More complete version information about libxml
181
+ VERSION_INFO = VersionInfo.instance.to_hash
182
+ end
@@ -1,4 +1,7 @@
1
1
  # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
2
5
  module Nokogiri
3
6
  module XML
4
7
  ##
@@ -44,9 +47,11 @@ module Nokogiri
44
47
  #
45
48
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
46
49
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
47
- # Give the options to the user
50
+
48
51
  yield options if block_given?
49
52
 
53
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
54
+
50
55
  if empty_doc?(string_or_io)
51
56
  if options.strict?
52
57
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -56,12 +61,17 @@ module Nokogiri
56
61
  end
57
62
 
58
63
  doc = if string_or_io.respond_to?(:read)
59
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
60
- read_io(string_or_io, url, encoding, options.to_i)
61
- else
62
- # read_memory pukes on empty docs
63
- read_memory(string_or_io, url, encoding, options.to_i)
64
- end
64
+ if string_or_io.is_a?(Pathname)
65
+ # resolve the Pathname to the file and open it as an IO object, see #2110
66
+ string_or_io = string_or_io.expand_path.open
67
+ url ||= string_or_io.path
68
+ end
69
+
70
+ read_io(string_or_io, url, encoding, options.to_i)
71
+ else
72
+ # read_memory pukes on empty docs
73
+ read_memory(string_or_io, url, encoding, options.to_i)
74
+ end
65
75
 
66
76
  # do xinclude processing
67
77
  doc.do_xinclude(options) if options.xinclude?
@@ -141,6 +141,10 @@ module Nokogiri
141
141
  document.errors = things
142
142
  end
143
143
 
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
144
148
  private
145
149
 
146
150
  # fix for issue 770
@@ -150,12 +154,6 @@ module Nokogiri
150
154
  %Q{xmlns#{prefix}="#{namespace.href}"}
151
155
  end.join ' '
152
156
  end
153
-
154
- def coerce data
155
- return super unless String === data
156
-
157
- document.fragment(data).children
158
- end
159
157
  end
160
158
  end
161
159
  end
@@ -267,6 +267,8 @@ module Nokogiri
267
267
  #
268
268
  # Also see related method +swap+.
269
269
  def replace(node_or_tags)
270
+ raise("Cannot replace a node with no parent") unless parent
271
+
270
272
  # We cannot replace a text node directly, otherwise libxml will return
271
273
  # an internal error at parser.c:13031, I don't know exactly why
272
274
  # libxml is trying to find a parent node that is an element or document
@@ -278,7 +280,7 @@ module Nokogiri
278
280
  return replacee.replace node_or_tags
279
281
  end
280
282
 
281
- node_or_tags = coerce(node_or_tags)
283
+ node_or_tags = parent.coerce(node_or_tags)
282
284
 
283
285
  if node_or_tags.is_a?(XML::NodeSet)
284
286
  node_or_tags.each { |n| add_previous_sibling n }
@@ -819,13 +821,30 @@ module Nokogiri
819
821
 
820
822
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
821
823
 
822
- ##
823
- # This is a horrible hack, but I don't care. See #313 for background.
824
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
825
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
826
+ #
827
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
828
+ # would have been inherited from the context node won't be handled correctly. This hack was
829
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
830
+ # that's not easily prevented (or even detected).
831
+ #
832
+ # I think preferable behavior would be to either:
833
+ #
834
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
835
+ # b. don't recover, but raise a sensible exception
836
+ #
837
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
838
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
824
839
  error_count = document.errors.length
825
840
  node_set = in_context(contents, options.to_i)
826
- if node_set.empty? and document.errors.length > error_count and options.recover?
827
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
828
- node_set = fragment.children
841
+ if (node_set.empty? && (document.errors.length > error_count))
842
+ if options.recover?
843
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
844
+ node_set = fragment.children
845
+ else
846
+ raise document.errors[error_count]
847
+ end
829
848
  end
830
849
  node_set
831
850
  end
@@ -1128,6 +1147,28 @@ module Nokogiri
1128
1147
 
1129
1148
  # @!endgroup
1130
1149
 
1150
+ protected
1151
+
1152
+ def coerce(data)
1153
+ case data
1154
+ when XML::NodeSet
1155
+ return data
1156
+ when XML::DocumentFragment
1157
+ return data.children
1158
+ when String
1159
+ return fragment(data).children
1160
+ when Document, XML::Attr
1161
+ # unacceptable
1162
+ when XML::Node
1163
+ return data
1164
+ end
1165
+
1166
+ raise ArgumentError, <<-EOERR
1167
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1168
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1169
+ EOERR
1170
+ end
1171
+
1131
1172
  private
1132
1173
 
1133
1174
  def keywordify(keywords)
@@ -1142,10 +1183,12 @@ module Nokogiri
1142
1183
  end
1143
1184
 
1144
1185
  def add_sibling(next_or_previous, node_or_tags)
1186
+ raise("Cannot add sibling to a node with no parent") unless parent
1187
+
1145
1188
  impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
1146
1189
  iter = (next_or_previous == :next) ? :reverse_each : :each
1147
1190
 
1148
- node_or_tags = coerce node_or_tags
1191
+ node_or_tags = parent.coerce(node_or_tags)
1149
1192
  if node_or_tags.is_a?(XML::NodeSet)
1150
1193
  if text?
1151
1194
  pivot = Nokogiri::XML::Node.new "dummy", document
@@ -1182,26 +1225,6 @@ module Nokogiri
1182
1225
  [:name, :namespace, :attribute_nodes, :children]
1183
1226
  end
1184
1227
 
1185
- def coerce(data)
1186
- case data
1187
- when XML::NodeSet
1188
- return data
1189
- when XML::DocumentFragment
1190
- return data.children
1191
- when String
1192
- return fragment(data).children
1193
- when Document, XML::Attr
1194
- # unacceptable
1195
- when XML::Node
1196
- return data
1197
- end
1198
-
1199
- raise ArgumentError, <<-EOERR
1200
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1201
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1202
- EOERR
1203
- end
1204
-
1205
1228
  # @private
1206
1229
  IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
1207
1230
 
@@ -73,6 +73,8 @@ module Nokogiri
73
73
  DEFAULT_XML = RECOVER | NONET
74
74
  # the default options used for parsing HTML documents
75
75
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
+ # the default options used for parsing XML schemas
77
+ DEFAULT_SCHEMA = NONET
76
78
 
77
79
  attr_accessor :options
78
80
  def initialize options = STRICT
@@ -107,6 +109,10 @@ module Nokogiri
107
109
  @options & RECOVER == STRICT
108
110
  end
109
111
 
112
+ def ==(other)
113
+ other.to_i == to_i
114
+ end
115
+
110
116
  alias :to_i :options
111
117
 
112
118
  def inspect
@@ -5,8 +5,8 @@ module Nokogiri
5
5
  ###
6
6
  # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
7
7
  # See Nokogiri::XML::RelaxNG for an example.
8
- def RelaxNG string_or_io
9
- RelaxNG.new(string_or_io)
8
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ RelaxNG.new(string_or_io, options)
10
10
  end
11
11
  end
12
12
 
@@ -27,6 +27,10 @@ module Nokogiri
27
27
  # end
28
28
  #
29
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
32
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
33
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
30
34
  class RelaxNG < Nokogiri::XML::Schema
31
35
  end
32
36
  end
@@ -5,8 +5,8 @@ module Nokogiri
5
5
  ###
6
6
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
7
7
  # object.
8
- def Schema string_or_io
9
- Schema.new(string_or_io)
8
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ Schema.new(string_or_io, options)
10
10
  end
11
11
  end
12
12
 
@@ -27,15 +27,23 @@ module Nokogiri
27
27
  # end
28
28
  #
29
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
32
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
33
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
34
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
35
+ # the ParseOptions to re-enable external entity resolution over a network connection.
30
36
  class Schema
31
37
  # Errors while parsing the schema file
32
38
  attr_accessor :errors
39
+ # The Nokogiri::XML::ParseOptions used to parse the schema
40
+ attr_accessor :parse_options
33
41
 
34
42
  ###
35
43
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
36
44
  # object.
37
- def self.new string_or_io
38
- from_document Nokogiri::XML(string_or_io)
45
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ from_document(Nokogiri::XML(string_or_io), options)
39
47
  end
40
48
 
41
49
  ###