nokogiri 1.11.0.rc3-java → 1.11.0.rc4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +1015 -947
  3. data/README.md +1 -1
  4. data/ext/java/nokogiri/HtmlSaxParserContext.java +1 -1
  5. data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
  6. data/ext/java/nokogiri/XmlNode.java +4 -6
  7. data/ext/java/nokogiri/XmlReader.java +2 -2
  8. data/ext/java/nokogiri/XmlRelaxng.java +10 -3
  9. data/ext/java/nokogiri/XmlSaxParserContext.java +8 -3
  10. data/ext/java/nokogiri/XmlSchema.java +84 -24
  11. data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
  12. data/ext/java/nokogiri/XmlXpathContext.java +48 -18
  13. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +51 -39
  14. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  15. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
  16. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +62 -6
  17. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
  18. data/ext/java/nokogiri/internals/XmlDomParserContext.java +2 -2
  19. data/ext/nokogiri/depend +476 -357
  20. data/ext/nokogiri/extconf.rb +441 -321
  21. data/ext/nokogiri/html_document.c +79 -78
  22. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  23. data/ext/nokogiri/nokogiri.c +34 -46
  24. data/ext/nokogiri/xml_document.c +2 -2
  25. data/ext/nokogiri/xml_node.c +1 -1
  26. data/ext/nokogiri/xml_node_set.c +1 -1
  27. data/ext/nokogiri/xml_relax_ng.c +29 -11
  28. data/ext/nokogiri/xml_sax_parser.c +2 -7
  29. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  30. data/ext/nokogiri/xml_schema.c +55 -13
  31. data/ext/nokogiri/xml_xpath_context.c +80 -4
  32. data/ext/nokogiri/xslt_stylesheet.c +1 -4
  33. data/lib/nokogiri.rb +1 -1
  34. data/lib/nokogiri/css/parser.rb +3 -3
  35. data/lib/nokogiri/css/parser.y +2 -2
  36. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  37. data/lib/nokogiri/html/document.rb +12 -26
  38. data/lib/nokogiri/nokogiri.jar +0 -0
  39. data/lib/nokogiri/version.rb +2 -149
  40. data/lib/nokogiri/version/constant.rb +5 -0
  41. data/lib/nokogiri/version/info.rb +182 -0
  42. data/lib/nokogiri/xml/document.rb +17 -7
  43. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  44. data/lib/nokogiri/xml/node.rb +50 -27
  45. data/lib/nokogiri/xml/parse_options.rb +6 -0
  46. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  47. data/lib/nokogiri/xml/schema.rb +12 -4
  48. data/lib/nokogiri/xml/searchable.rb +3 -1
  49. metadata +47 -73
  50. data/ext/nokogiri/html_document.h +0 -10
  51. data/ext/nokogiri/html_element_description.h +0 -10
  52. data/ext/nokogiri/html_entity_lookup.h +0 -8
  53. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  54. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  55. data/ext/nokogiri/nokogiri.h +0 -134
  56. data/ext/nokogiri/xml_attr.h +0 -9
  57. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  58. data/ext/nokogiri/xml_cdata.h +0 -9
  59. data/ext/nokogiri/xml_comment.h +0 -9
  60. data/ext/nokogiri/xml_document.h +0 -23
  61. data/ext/nokogiri/xml_document_fragment.h +0 -10
  62. data/ext/nokogiri/xml_dtd.h +0 -10
  63. data/ext/nokogiri/xml_element_content.h +0 -10
  64. data/ext/nokogiri/xml_element_decl.h +0 -9
  65. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  66. data/ext/nokogiri/xml_entity_decl.h +0 -10
  67. data/ext/nokogiri/xml_entity_reference.h +0 -9
  68. data/ext/nokogiri/xml_io.h +0 -11
  69. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  70. data/ext/nokogiri/xml_namespace.h +0 -14
  71. data/ext/nokogiri/xml_node.h +0 -13
  72. data/ext/nokogiri/xml_node_set.h +0 -12
  73. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  74. data/ext/nokogiri/xml_reader.h +0 -10
  75. data/ext/nokogiri/xml_relax_ng.h +0 -9
  76. data/ext/nokogiri/xml_sax_parser.h +0 -39
  77. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  78. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  79. data/ext/nokogiri/xml_schema.h +0 -9
  80. data/ext/nokogiri/xml_syntax_error.h +0 -13
  81. data/ext/nokogiri/xml_text.h +0 -9
  82. data/ext/nokogiri/xml_xpath_context.h +0 -10
  83. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,7 @@
1
1
  # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
2
5
  module Nokogiri
3
6
  module HTML
4
7
  class Document < Nokogiri::XML::Document
@@ -161,11 +164,12 @@ module Nokogiri
161
164
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
162
165
  # Nokogiri::XML::ParseOptions.
163
166
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
164
-
165
167
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
166
- # Give the options to the user
168
+
167
169
  yield options if block_given?
168
170
 
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
169
173
  if string_or_io.respond_to?(:encoding)
170
174
  unless string_or_io.encoding.name == "ASCII-8BIT"
171
175
  encoding ||= string_or_io.encoding.name
@@ -173,7 +177,12 @@ module Nokogiri
173
177
  end
174
178
 
175
179
  if string_or_io.respond_to?(:read)
176
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
177
186
  unless encoding
178
187
  # Libxml2's parser has poor support for encoding
179
188
  # detection. First, it does not recognize the HTML5
@@ -252,9 +261,6 @@ module Nokogiri
252
261
  end
253
262
 
254
263
  def self.detect_encoding(chunk)
255
- if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
256
- return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
257
- end
258
264
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
259
265
  return Nokogiri.XML(m[1]).encoding
260
266
 
@@ -273,26 +279,6 @@ module Nokogiri
273
279
  end
274
280
  end
275
281
 
276
- def self.is_jruby_without_fix?
277
- JRUBY_VERSION.split('.').join.to_i < 165
278
- end
279
-
280
- def self.detect_encoding_for_jruby_without_fix(chunk)
281
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
282
- return Nokogiri.XML(m[1]).encoding
283
-
284
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
285
- return m[4]
286
-
287
- catch(:encoding_found) {
288
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
289
- nil
290
- }
291
- rescue Nokogiri::SyntaxError, RuntimeError
292
- # Ignore parser errors that nokogiri may raise
293
- nil
294
- end
295
-
296
282
  def initialize(io)
297
283
  @io = io
298
284
  @firstchunk = nil
Binary file
@@ -1,150 +1,3 @@
1
1
  # frozen_string_literal: true
2
- module Nokogiri
3
- # The version of Nokogiri you are using
4
- VERSION = "1.11.0.rc3"
5
-
6
- class VersionInfo # :nodoc:
7
- def jruby?
8
- ::JRUBY_VERSION if RUBY_PLATFORM == "java"
9
- end
10
-
11
- def engine
12
- defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
13
- end
14
-
15
- def loaded_libxml_version
16
- Gem::Version.new(LIBXML_LOADED_VERSION.
17
- scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
18
- collect(&:to_i).
19
- join("."))
20
- end
21
-
22
- def compiled_libxml_version
23
- Gem::Version.new LIBXML_COMPILED_VERSION
24
- end
25
-
26
- def loaded_libxslt_version
27
- Gem::Version.new(LIBXSLT_LOADED_VERSION.
28
- scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
29
- collect(&:to_i).
30
- join("."))
31
- end
32
-
33
- def compiled_libxslt_version
34
- Gem::Version.new LIBXSLT_COMPILED_VERSION
35
- end
36
-
37
- def libxml2?
38
- defined?(LIBXML_COMPILED_VERSION)
39
- end
40
-
41
- def libxml2_using_system?
42
- !libxml2_using_packaged?
43
- end
44
-
45
- def libxml2_using_packaged?
46
- NOKOGIRI_USE_PACKAGED_LIBRARIES
47
- end
48
-
49
- def warnings
50
- warnings = []
51
-
52
- if libxml2?
53
- if compiled_libxml_version != loaded_libxml_version
54
- warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
55
- end
56
-
57
- if compiled_libxslt_version != loaded_libxslt_version
58
- warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
59
- end
60
- end
61
-
62
- warnings
63
- end
64
-
65
- def to_hash
66
- {}.tap do |vi|
67
- vi["warnings"] = []
68
- vi["nokogiri"] = Nokogiri::VERSION
69
- vi["ruby"] = {}.tap do |ruby|
70
- ruby["version"] = ::RUBY_VERSION
71
- ruby["platform"] = ::RUBY_PLATFORM
72
- ruby["gem_platform"] = ::Gem::Platform.local.to_s
73
- ruby["description"] = ::RUBY_DESCRIPTION
74
- ruby["engine"] = engine
75
- ruby["jruby"] = jruby? if jruby?
76
- end
77
-
78
- if libxml2?
79
- vi["libxml"] = {}.tap do |libxml|
80
- if libxml2_using_packaged?
81
- libxml["source"] = "packaged"
82
- libxml["patches"] = NOKOGIRI_LIBXML2_PATCHES
83
- else
84
- libxml["source"] = "system"
85
- end
86
- libxml["compiled"] = compiled_libxml_version.to_s
87
- libxml["loaded"] = loaded_libxml_version.to_s
88
- end
89
-
90
- vi["libxslt"] = {}.tap do |libxslt|
91
- if libxml2_using_packaged?
92
- libxslt["source"] = "packaged"
93
- libxslt["patches"] = NOKOGIRI_LIBXSLT_PATCHES
94
- else
95
- libxslt["source"] = "system"
96
- end
97
- libxslt["compiled"] = compiled_libxslt_version.to_s
98
- libxslt["loaded"] = loaded_libxslt_version.to_s
99
- end
100
-
101
- vi["warnings"] = warnings
102
- elsif jruby?
103
- vi["xerces"] = Nokogiri::XERCES_VERSION
104
- vi["nekohtml"] = Nokogiri::NEKO_VERSION
105
- end
106
- end
107
- end
108
-
109
- def to_markdown
110
- begin
111
- require "psych"
112
- rescue LoadError
113
- end
114
- require "yaml"
115
- "# Nokogiri (#{Nokogiri::VERSION})\n" +
116
- YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
117
- end
118
-
119
- # FIXME: maybe switch to singleton?
120
- @@instance = new
121
- @@instance.warnings.each do |warning|
122
- warn "WARNING: #{warning}"
123
- end
124
- def self.instance; @@instance; end
125
- end
126
-
127
- def self.uses_libxml?(requirement = nil) # :nodoc:
128
- return false unless VersionInfo.instance.libxml2?
129
- return true unless requirement
130
- return Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
131
- end
132
-
133
- def self.jruby? # :nodoc:
134
- VersionInfo.instance.jruby?
135
- end
136
-
137
- # Ensure constants used in this file are loaded - see #1896
138
- if Nokogiri.jruby?
139
- require "nokogiri/jruby/dependencies"
140
- end
141
- begin
142
- RUBY_VERSION =~ /(\d+\.\d+)/
143
- require "nokogiri/#{$1}/nokogiri"
144
- rescue LoadError
145
- require "nokogiri/nokogiri"
146
- end
147
-
148
- # More complete version information about libxml
149
- VERSION_INFO = VersionInfo.instance.to_hash
150
- end
2
+ require_relative "version/constant"
3
+ require_relative "version/info"
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ # The version of Nokogiri you are using
4
+ VERSION = "1.11.0.rc4"
5
+ end
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def engine
14
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
15
+ end
16
+
17
+ def loaded_libxml_version
18
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
19
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
20
+ .collect(&:to_i)
21
+ .join("."))
22
+ end
23
+
24
+ def compiled_libxml_version
25
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
26
+ end
27
+
28
+ def loaded_libxslt_version
29
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
30
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
31
+ .collect(&:to_i)
32
+ .join("."))
33
+ end
34
+
35
+ def compiled_libxslt_version
36
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
37
+ end
38
+
39
+ def libxml2?
40
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
41
+ end
42
+
43
+ def libxml2_has_iconv?
44
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
45
+ end
46
+
47
+ def libxml2_using_packaged?
48
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
49
+ end
50
+
51
+ def libxml2_using_system?
52
+ libxml2? && !libxml2_using_packaged?
53
+ end
54
+
55
+ def libxml2_precompiled?
56
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
57
+ end
58
+
59
+ def warnings
60
+ warnings = []
61
+
62
+ if libxml2?
63
+ if compiled_libxml_version != loaded_libxml_version
64
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
65
+ end
66
+
67
+ if compiled_libxslt_version != loaded_libxslt_version
68
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
69
+ end
70
+ end
71
+
72
+ warnings
73
+ end
74
+
75
+ def to_hash
76
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
77
+ {}.tap do |vi|
78
+ vi["warnings"] = []
79
+ vi["nokogiri"] = {}.tap do |nokogiri|
80
+ nokogiri["version"] = Nokogiri::VERSION
81
+
82
+ unless jruby?
83
+ cppflags = ["-I#{header_directory.shellescape}"]
84
+ if libxml2_using_packaged?
85
+ cppflags << "-I#{File.join(header_directory, "include").shellescape}"
86
+ cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
87
+ end
88
+ nokogiri["cppflags"] = cppflags
89
+ end
90
+ end
91
+ vi["ruby"] = {}.tap do |ruby|
92
+ ruby["version"] = ::RUBY_VERSION
93
+ ruby["platform"] = ::RUBY_PLATFORM
94
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
95
+ ruby["description"] = ::RUBY_DESCRIPTION
96
+ ruby["engine"] = engine
97
+ ruby["jruby"] = jruby? if jruby?
98
+ end
99
+
100
+ if libxml2?
101
+ vi["libxml"] = {}.tap do |libxml|
102
+ if libxml2_using_packaged?
103
+ libxml["source"] = "packaged"
104
+ libxml["precompiled"] = libxml2_precompiled?
105
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
106
+
107
+ # this is for nokogumbo and shouldn't be forever
108
+ libxml["libxml2_path"] = header_directory
109
+ else
110
+ libxml["source"] = "system"
111
+ end
112
+ libxml["iconv_enabled"] = libxml2_has_iconv?
113
+ libxml["compiled"] = compiled_libxml_version.to_s
114
+ libxml["loaded"] = loaded_libxml_version.to_s
115
+ end
116
+
117
+ vi["libxslt"] = {}.tap do |libxslt|
118
+ if libxml2_using_packaged?
119
+ libxslt["source"] = "packaged"
120
+ libxslt["precompiled"] = libxml2_precompiled?
121
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
122
+ else
123
+ libxslt["source"] = "system"
124
+ end
125
+ libxslt["compiled"] = compiled_libxslt_version.to_s
126
+ libxslt["loaded"] = loaded_libxslt_version.to_s
127
+ end
128
+
129
+ vi["warnings"] = warnings
130
+ end
131
+
132
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
133
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
134
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
135
+ elsif jruby?
136
+ vi["other_libraries"] = {}.tap do |ol|
137
+ ol["xerces"] = Nokogiri::XERCES_VERSION
138
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ def to_markdown
145
+ begin
146
+ require "psych"
147
+ rescue LoadError
148
+ end
149
+ require "yaml"
150
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
151
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
152
+ end
153
+
154
+ instance.warnings.each do |warning|
155
+ warn "WARNING: #{warning}"
156
+ end
157
+ end
158
+
159
+ def self.uses_libxml?(requirement = nil) # :nodoc:
160
+ return false unless VersionInfo.instance.libxml2?
161
+ return true unless requirement
162
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
163
+ end
164
+
165
+ def self.jruby? # :nodoc:
166
+ VersionInfo.instance.jruby?
167
+ end
168
+
169
+ # Ensure constants used in this file are loaded - see #1896
170
+ if Nokogiri.jruby?
171
+ require "nokogiri/jruby/dependencies"
172
+ end
173
+ begin
174
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
175
+ require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
176
+ rescue LoadError
177
+ require "nokogiri/nokogiri"
178
+ end
179
+
180
+ # More complete version information about libxml
181
+ VERSION_INFO = VersionInfo.instance.to_hash
182
+ end
@@ -1,4 +1,7 @@
1
1
  # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
2
5
  module Nokogiri
3
6
  module XML
4
7
  ##
@@ -44,9 +47,11 @@ module Nokogiri
44
47
  #
45
48
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
46
49
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
47
- # Give the options to the user
50
+
48
51
  yield options if block_given?
49
52
 
53
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
54
+
50
55
  if empty_doc?(string_or_io)
51
56
  if options.strict?
52
57
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -56,12 +61,17 @@ module Nokogiri
56
61
  end
57
62
 
58
63
  doc = if string_or_io.respond_to?(:read)
59
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
60
- read_io(string_or_io, url, encoding, options.to_i)
61
- else
62
- # read_memory pukes on empty docs
63
- read_memory(string_or_io, url, encoding, options.to_i)
64
- end
64
+ if string_or_io.is_a?(Pathname)
65
+ # resolve the Pathname to the file and open it as an IO object, see #2110
66
+ string_or_io = string_or_io.expand_path.open
67
+ url ||= string_or_io.path
68
+ end
69
+
70
+ read_io(string_or_io, url, encoding, options.to_i)
71
+ else
72
+ # read_memory pukes on empty docs
73
+ read_memory(string_or_io, url, encoding, options.to_i)
74
+ end
65
75
 
66
76
  # do xinclude processing
67
77
  doc.do_xinclude(options) if options.xinclude?