nokogiri 1.10.10 → 1.11.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +19 -17
- data/ext/nokogiri/extconf.rb +44 -35
- data/ext/nokogiri/nokogiri.c +12 -6
- data/ext/nokogiri/nokogiri.h +1 -0
- data/ext/nokogiri/xml_document.c +10 -1
- data/ext/nokogiri/xml_node.c +20 -0
- data/ext/nokogiri/xml_schema.c +0 -29
- data/ext/nokogiri/xslt_stylesheet.c +0 -4
- data/lib/nokogiri.rb +2 -19
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -0
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +3 -1
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +1 -0
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +85 -45
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +1 -0
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +3 -8
- data/lib/nokogiri/xml/document_fragment.rb +1 -0
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +26 -12
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +4 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +1 -0
- data/lib/nokogiri/xml/searchable.rb +1 -0
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +27 -27
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# The line below caused a problem on non-GAE rack environment.
|
3
|
+
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
4
|
+
#
|
5
|
+
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
6
|
+
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
7
|
+
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
8
|
+
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
9
|
+
# already set in the classpath.
|
10
|
+
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
11
|
+
require 'stringio'
|
12
|
+
require 'isorelax.jar'
|
13
|
+
require 'jing.jar'
|
14
|
+
require 'nekohtml.jar'
|
15
|
+
require 'nekodtd.jar'
|
16
|
+
require 'xercesImpl.jar'
|
17
|
+
require 'serializer.jar'
|
18
|
+
require 'xalan.jar'
|
19
|
+
require 'xml-apis.jar'
|
20
|
+
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
# The version of Nokogiri you are using
|
3
|
-
VERSION = "1.
|
4
|
+
VERSION = "1.11.0.rc1"
|
4
5
|
|
5
6
|
class VersionInfo # :nodoc:
|
6
7
|
def jruby?
|
@@ -11,19 +12,30 @@ module Nokogiri
|
|
11
12
|
defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
|
12
13
|
end
|
13
14
|
|
14
|
-
def
|
15
|
-
|
15
|
+
def loaded_libxml_version
|
16
|
+
Gem::Version.new(LIBXML_LOADED_VERSION.
|
16
17
|
scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
|
17
18
|
collect(&:to_i).
|
18
|
-
join(".")
|
19
|
+
join("."))
|
19
20
|
end
|
20
21
|
|
21
|
-
def
|
22
|
-
|
22
|
+
def compiled_libxml_version
|
23
|
+
Gem::Version.new LIBXML_COMPILED_VERSION
|
24
|
+
end
|
25
|
+
|
26
|
+
def loaded_libxslt_version
|
27
|
+
Gem::Version.new(LIBXSLT_LOADED_VERSION.
|
28
|
+
scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
|
29
|
+
collect(&:to_i).
|
30
|
+
join("."))
|
31
|
+
end
|
32
|
+
|
33
|
+
def compiled_libxslt_version
|
34
|
+
Gem::Version.new LIBXSLT_COMPILED_VERSION
|
23
35
|
end
|
24
36
|
|
25
37
|
def libxml2?
|
26
|
-
defined?(
|
38
|
+
defined?(LIBXML_COMPILED_VERSION)
|
27
39
|
end
|
28
40
|
|
29
41
|
def libxml2_using_system?
|
@@ -35,47 +47,62 @@ module Nokogiri
|
|
35
47
|
end
|
36
48
|
|
37
49
|
def warnings
|
38
|
-
|
50
|
+
warnings = []
|
39
51
|
|
40
|
-
if
|
41
|
-
|
42
|
-
|
43
|
-
|
52
|
+
if libxml2?
|
53
|
+
if compiled_libxml_version != loaded_libxml_version
|
54
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
55
|
+
end
|
56
|
+
|
57
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
58
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
59
|
+
end
|
44
60
|
end
|
61
|
+
|
62
|
+
warnings
|
45
63
|
end
|
46
64
|
|
47
65
|
def to_hash
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
66
|
+
{}.tap do |vi|
|
67
|
+
vi["warnings"] = []
|
68
|
+
vi["nokogiri"] = Nokogiri::VERSION
|
69
|
+
vi["ruby"] = {}.tap do |ruby|
|
70
|
+
ruby["version"] = ::RUBY_VERSION
|
71
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
72
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
73
|
+
ruby["engine"] = engine
|
74
|
+
ruby["jruby"] = jruby? if jruby?
|
75
|
+
end
|
57
76
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
77
|
+
if libxml2?
|
78
|
+
vi["libxml"] = {}.tap do |libxml|
|
79
|
+
if libxml2_using_packaged?
|
80
|
+
libxml["source"] = "packaged"
|
81
|
+
libxml["patches"] = NOKOGIRI_LIBXML2_PATCHES
|
82
|
+
else
|
83
|
+
libxml["source"] = "system"
|
84
|
+
end
|
85
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
86
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
90
|
+
if libxml2_using_packaged?
|
91
|
+
libxslt["source"] = "packaged"
|
92
|
+
libxslt["patches"] = NOKOGIRI_LIBXSLT_PATCHES
|
93
|
+
else
|
94
|
+
libxslt["source"] = "system"
|
95
|
+
end
|
96
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
97
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
98
|
+
end
|
99
|
+
|
100
|
+
vi["warnings"] = warnings
|
101
|
+
elsif jruby?
|
102
|
+
vi["xerces"] = Nokogiri::XERCES_VERSION
|
103
|
+
vi["nekohtml"] = Nokogiri::NEKO_VERSION
|
69
104
|
end
|
70
|
-
hash_info["libxml"]["compiled"] = compiled_parser_version
|
71
|
-
hash_info["libxml"]["loaded"] = loaded_parser_version
|
72
|
-
hash_info["warnings"] = warnings
|
73
|
-
elsif jruby?
|
74
|
-
hash_info["xerces"] = Nokogiri::XERCES_VERSION
|
75
|
-
hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
|
76
105
|
end
|
77
|
-
|
78
|
-
hash_info
|
79
106
|
end
|
80
107
|
|
81
108
|
def to_markdown
|
@@ -96,14 +123,27 @@ module Nokogiri
|
|
96
123
|
def self.instance; @@instance; end
|
97
124
|
end
|
98
125
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
VersionInfo.instance.libxml2?
|
126
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
127
|
+
return false unless VersionInfo.instance.libxml2?
|
128
|
+
return true unless requirement
|
129
|
+
return Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
104
130
|
end
|
105
131
|
|
106
132
|
def self.jruby? # :nodoc:
|
107
133
|
VersionInfo.instance.jruby?
|
108
134
|
end
|
135
|
+
|
136
|
+
# Ensure constants used in this file are loaded
|
137
|
+
if Nokogiri.jruby?
|
138
|
+
require "nokogiri/jruby/dependencies"
|
139
|
+
end
|
140
|
+
begin
|
141
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
142
|
+
require "nokogiri/#{$1}/nokogiri"
|
143
|
+
rescue LoadError
|
144
|
+
require "nokogiri/nokogiri"
|
145
|
+
end
|
146
|
+
|
147
|
+
# More complete version information about libxml
|
148
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
109
149
|
end
|
data/lib/nokogiri/xml.rb
CHANGED
data/lib/nokogiri/xml/attr.rb
CHANGED
data/lib/nokogiri/xml/builder.rb
CHANGED
data/lib/nokogiri/xml/cdata.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
##
|
@@ -254,18 +255,12 @@ module Nokogiri
|
|
254
255
|
##
|
255
256
|
# +JRuby+
|
256
257
|
# Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
|
257
|
-
def self.wrap
|
258
|
-
raise "JRuby only method" unless Nokogiri.jruby?
|
259
|
-
return wrapJavaDocument(document)
|
260
|
-
end
|
258
|
+
def self.wrap(document) end if false # native-ext provides Document.wrap
|
261
259
|
|
262
260
|
##
|
263
261
|
# +JRuby+
|
264
262
|
# Returns Java's org.w3c.dom.document of this Document.
|
265
|
-
def to_java
|
266
|
-
raise "JRuby only method" unless Nokogiri.jruby?
|
267
|
-
return toJavaDocument()
|
268
|
-
end
|
263
|
+
def to_java; end if false # JRuby provides #to_java
|
269
264
|
|
270
265
|
private
|
271
266
|
def self.empty_doc? string_or_io
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
require 'stringio'
|
3
4
|
require 'nokogiri/xml/node/save_options'
|
4
5
|
|
@@ -337,9 +338,9 @@ module Nokogiri
|
|
337
338
|
# If you need to distinguish attributes with the same name, with different namespaces
|
338
339
|
# use #attribute_nodes instead.
|
339
340
|
def attributes
|
340
|
-
|
341
|
-
[node.node_name
|
342
|
-
|
341
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
342
|
+
hash[node.node_name] = node
|
343
|
+
end
|
343
344
|
end
|
344
345
|
|
345
346
|
###
|
@@ -348,6 +349,12 @@ module Nokogiri
|
|
348
349
|
attribute_nodes.map(&:value)
|
349
350
|
end
|
350
351
|
|
352
|
+
###
|
353
|
+
# Does this Node's attributes include <value>
|
354
|
+
def value?(value)
|
355
|
+
values.include? value
|
356
|
+
end
|
357
|
+
|
351
358
|
###
|
352
359
|
# Get the attribute names for this Node.
|
353
360
|
def keys
|
@@ -509,10 +516,11 @@ module Nokogiri
|
|
509
516
|
# default namespaces set on ancestor will NOT be, even if self
|
510
517
|
# has no explicit default namespace.
|
511
518
|
def namespaces
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
519
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
520
|
+
prefix = ns.prefix
|
521
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
522
|
+
hash[key] = ns.href
|
523
|
+
end
|
516
524
|
end
|
517
525
|
|
518
526
|
# Returns true if this is a Comment
|
@@ -761,10 +769,15 @@ module Nokogiri
|
|
761
769
|
end
|
762
770
|
indent_text = options[:indent_text] || ' '
|
763
771
|
|
772
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
773
|
+
# string instead of generating a new empty string for every node with
|
774
|
+
# zero indentation.
|
775
|
+
indentation = indent_times.zero? ? '' : (indent_text * indent_times)
|
776
|
+
|
764
777
|
config = SaveOptions.new(save_options.to_i)
|
765
778
|
yield config if block_given?
|
766
779
|
|
767
|
-
native_write_to(io, encoding,
|
780
|
+
native_write_to(io, encoding, indentation, config.options)
|
768
781
|
end
|
769
782
|
|
770
783
|
###
|
@@ -847,17 +860,18 @@ module Nokogiri
|
|
847
860
|
node_or_tags
|
848
861
|
end
|
849
862
|
|
863
|
+
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
864
|
+
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
865
|
+
|
850
866
|
def to_format save_option, options
|
851
|
-
|
852
|
-
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
867
|
+
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
853
868
|
|
854
869
|
options[:save_with] = save_option unless options[:save_with]
|
855
870
|
serialize(options)
|
856
871
|
end
|
857
872
|
|
858
873
|
def write_format_to save_option, io, options
|
859
|
-
|
860
|
-
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
874
|
+
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
861
875
|
|
862
876
|
options[:save_with] ||= save_option
|
863
877
|
write_to io, options
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -5,11 +6,11 @@ module Nokogiri
|
|
5
6
|
#
|
6
7
|
# == Building combinations of parse options
|
7
8
|
# You can build your own combinations of these parse options by using any of the following methods:
|
8
|
-
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
|
+
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
10
|
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
|
10
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
11
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
|
11
12
|
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
|
12
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
13
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
|
13
14
|
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
|
14
15
|
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
|
15
16
|
#
|