nokogiri 1.10.10 → 1.11.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -17
  3. data/ext/nokogiri/extconf.rb +44 -35
  4. data/ext/nokogiri/nokogiri.c +12 -6
  5. data/ext/nokogiri/nokogiri.h +1 -0
  6. data/ext/nokogiri/xml_document.c +10 -1
  7. data/ext/nokogiri/xml_node.c +20 -0
  8. data/ext/nokogiri/xml_schema.c +0 -29
  9. data/ext/nokogiri/xslt_stylesheet.c +0 -4
  10. data/lib/nokogiri.rb +2 -19
  11. data/lib/nokogiri/css.rb +1 -0
  12. data/lib/nokogiri/css/node.rb +1 -0
  13. data/lib/nokogiri/css/parser.rb +1 -0
  14. data/lib/nokogiri/css/parser_extras.rb +1 -0
  15. data/lib/nokogiri/css/syntax_error.rb +1 -0
  16. data/lib/nokogiri/css/tokenizer.rb +1 -0
  17. data/lib/nokogiri/css/xpath_visitor.rb +3 -1
  18. data/lib/nokogiri/decorators/slop.rb +1 -0
  19. data/lib/nokogiri/html.rb +1 -0
  20. data/lib/nokogiri/html/builder.rb +1 -0
  21. data/lib/nokogiri/html/document.rb +1 -0
  22. data/lib/nokogiri/html/document_fragment.rb +1 -0
  23. data/lib/nokogiri/html/element_description.rb +1 -0
  24. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  25. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  26. data/lib/nokogiri/html/sax/parser.rb +1 -0
  27. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  28. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  29. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  30. data/lib/nokogiri/syntax_error.rb +1 -0
  31. data/lib/nokogiri/version.rb +85 -45
  32. data/lib/nokogiri/xml.rb +1 -0
  33. data/lib/nokogiri/xml/attr.rb +1 -0
  34. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  35. data/lib/nokogiri/xml/builder.rb +1 -0
  36. data/lib/nokogiri/xml/cdata.rb +1 -0
  37. data/lib/nokogiri/xml/character_data.rb +1 -0
  38. data/lib/nokogiri/xml/document.rb +3 -8
  39. data/lib/nokogiri/xml/document_fragment.rb +1 -0
  40. data/lib/nokogiri/xml/dtd.rb +1 -0
  41. data/lib/nokogiri/xml/element_content.rb +1 -0
  42. data/lib/nokogiri/xml/element_decl.rb +1 -0
  43. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  44. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  45. data/lib/nokogiri/xml/namespace.rb +1 -0
  46. data/lib/nokogiri/xml/node.rb +26 -12
  47. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  48. data/lib/nokogiri/xml/node_set.rb +1 -0
  49. data/lib/nokogiri/xml/notation.rb +1 -0
  50. data/lib/nokogiri/xml/parse_options.rb +4 -3
  51. data/lib/nokogiri/xml/pp.rb +1 -0
  52. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  53. data/lib/nokogiri/xml/pp/node.rb +1 -0
  54. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  55. data/lib/nokogiri/xml/reader.rb +7 -3
  56. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  57. data/lib/nokogiri/xml/sax.rb +1 -0
  58. data/lib/nokogiri/xml/sax/document.rb +1 -0
  59. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  60. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  62. data/lib/nokogiri/xml/schema.rb +1 -0
  63. data/lib/nokogiri/xml/searchable.rb +1 -0
  64. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  65. data/lib/nokogiri/xml/text.rb +1 -0
  66. data/lib/nokogiri/xml/xpath.rb +1 -0
  67. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  68. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  69. data/lib/nokogiri/xslt.rb +1 -0
  70. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  71. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  72. metadata +27 -27
  73. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  class ElementDescription
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  class ElementDescription
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  class EntityDescription < Struct.new(:value, :name, :description); end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  module SAX
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  module SAX
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ # The line below caused a problem on non-GAE rack environment.
3
+ # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
4
+ #
5
+ # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
6
+ # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
7
+ # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
8
+ # should skip loading xml jars. This is because those are in WEB-INF/lib and
9
+ # already set in the classpath.
10
+ unless $LOAD_PATH.to_s.include?("appengine-rack")
11
+ require 'stringio'
12
+ require 'isorelax.jar'
13
+ require 'jing.jar'
14
+ require 'nekohtml.jar'
15
+ require 'nekodtd.jar'
16
+ require 'xercesImpl.jar'
17
+ require 'serializer.jar'
18
+ require 'xalan.jar'
19
+ require 'xml-apis.jar'
20
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  class SyntaxError < ::StandardError
3
4
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  # The version of Nokogiri you are using
3
- VERSION = "1.10.10"
4
+ VERSION = "1.11.0.rc1"
4
5
 
5
6
  class VersionInfo # :nodoc:
6
7
  def jruby?
@@ -11,19 +12,30 @@ module Nokogiri
11
12
  defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
12
13
  end
13
14
 
14
- def loaded_parser_version
15
- LIBXML_PARSER_VERSION.
15
+ def loaded_libxml_version
16
+ Gem::Version.new(LIBXML_LOADED_VERSION.
16
17
  scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
17
18
  collect(&:to_i).
18
- join(".")
19
+ join("."))
19
20
  end
20
21
 
21
- def compiled_parser_version
22
- LIBXML_VERSION
22
+ def compiled_libxml_version
23
+ Gem::Version.new LIBXML_COMPILED_VERSION
24
+ end
25
+
26
+ def loaded_libxslt_version
27
+ Gem::Version.new(LIBXSLT_LOADED_VERSION.
28
+ scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
29
+ collect(&:to_i).
30
+ join("."))
31
+ end
32
+
33
+ def compiled_libxslt_version
34
+ Gem::Version.new LIBXSLT_COMPILED_VERSION
23
35
  end
24
36
 
25
37
  def libxml2?
26
- defined?(LIBXML_VERSION)
38
+ defined?(LIBXML_COMPILED_VERSION)
27
39
  end
28
40
 
29
41
  def libxml2_using_system?
@@ -35,47 +47,62 @@ module Nokogiri
35
47
  end
36
48
 
37
49
  def warnings
38
- return [] unless libxml2?
50
+ warnings = []
39
51
 
40
- if compiled_parser_version != loaded_parser_version
41
- ["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
42
- else
43
- []
52
+ if libxml2?
53
+ if compiled_libxml_version != loaded_libxml_version
54
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
55
+ end
56
+
57
+ if compiled_libxslt_version != loaded_libxslt_version
58
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
59
+ end
44
60
  end
61
+
62
+ warnings
45
63
  end
46
64
 
47
65
  def to_hash
48
- hash_info = {}
49
- hash_info["warnings"] = []
50
- hash_info["nokogiri"] = Nokogiri::VERSION
51
- hash_info["ruby"] = {}
52
- hash_info["ruby"]["version"] = ::RUBY_VERSION
53
- hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
54
- hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
55
- hash_info["ruby"]["engine"] = engine
56
- hash_info["ruby"]["jruby"] = jruby? if jruby?
66
+ {}.tap do |vi|
67
+ vi["warnings"] = []
68
+ vi["nokogiri"] = Nokogiri::VERSION
69
+ vi["ruby"] = {}.tap do |ruby|
70
+ ruby["version"] = ::RUBY_VERSION
71
+ ruby["platform"] = ::RUBY_PLATFORM
72
+ ruby["description"] = ::RUBY_DESCRIPTION
73
+ ruby["engine"] = engine
74
+ ruby["jruby"] = jruby? if jruby?
75
+ end
57
76
 
58
- if libxml2?
59
- hash_info["libxml"] = {}
60
- hash_info["libxml"]["binding"] = "extension"
61
- if libxml2_using_packaged?
62
- hash_info["libxml"]["source"] = "packaged"
63
- hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
64
- hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
65
- hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
66
- hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
67
- else
68
- hash_info["libxml"]["source"] = "system"
77
+ if libxml2?
78
+ vi["libxml"] = {}.tap do |libxml|
79
+ if libxml2_using_packaged?
80
+ libxml["source"] = "packaged"
81
+ libxml["patches"] = NOKOGIRI_LIBXML2_PATCHES
82
+ else
83
+ libxml["source"] = "system"
84
+ end
85
+ libxml["compiled"] = compiled_libxml_version.to_s
86
+ libxml["loaded"] = loaded_libxml_version.to_s
87
+ end
88
+
89
+ vi["libxslt"] = {}.tap do |libxslt|
90
+ if libxml2_using_packaged?
91
+ libxslt["source"] = "packaged"
92
+ libxslt["patches"] = NOKOGIRI_LIBXSLT_PATCHES
93
+ else
94
+ libxslt["source"] = "system"
95
+ end
96
+ libxslt["compiled"] = compiled_libxslt_version.to_s
97
+ libxslt["loaded"] = loaded_libxslt_version.to_s
98
+ end
99
+
100
+ vi["warnings"] = warnings
101
+ elsif jruby?
102
+ vi["xerces"] = Nokogiri::XERCES_VERSION
103
+ vi["nekohtml"] = Nokogiri::NEKO_VERSION
69
104
  end
70
- hash_info["libxml"]["compiled"] = compiled_parser_version
71
- hash_info["libxml"]["loaded"] = loaded_parser_version
72
- hash_info["warnings"] = warnings
73
- elsif jruby?
74
- hash_info["xerces"] = Nokogiri::XERCES_VERSION
75
- hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
76
105
  end
77
-
78
- hash_info
79
106
  end
80
107
 
81
108
  def to_markdown
@@ -96,14 +123,27 @@ module Nokogiri
96
123
  def self.instance; @@instance; end
97
124
  end
98
125
 
99
- # More complete version information about libxml
100
- VERSION_INFO = VersionInfo.instance.to_hash
101
-
102
- def self.uses_libxml? # :nodoc:
103
- VersionInfo.instance.libxml2?
126
+ def self.uses_libxml?(requirement = nil) # :nodoc:
127
+ return false unless VersionInfo.instance.libxml2?
128
+ return true unless requirement
129
+ return Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
104
130
  end
105
131
 
106
132
  def self.jruby? # :nodoc:
107
133
  VersionInfo.instance.jruby?
108
134
  end
135
+
136
+ # Ensure constants used in this file are loaded
137
+ if Nokogiri.jruby?
138
+ require "nokogiri/jruby/dependencies"
139
+ end
140
+ begin
141
+ RUBY_VERSION =~ /(\d+\.\d+)/
142
+ require "nokogiri/#{$1}/nokogiri"
143
+ rescue LoadError
144
+ require "nokogiri/nokogiri"
145
+ end
146
+
147
+ # More complete version information about libxml
148
+ VERSION_INFO = VersionInfo.instance.to_hash
109
149
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xml/pp'
2
3
  require 'nokogiri/xml/parse_options'
3
4
  require 'nokogiri/xml/sax'
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Attr < Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class CDATA < Nokogiri::XML::Text
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class CharacterData < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ##
@@ -254,18 +255,12 @@ module Nokogiri
254
255
  ##
255
256
  # +JRuby+
256
257
  # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
260
- end
258
+ def self.wrap(document) end if false # native-ext provides Document.wrap
261
259
 
262
260
  ##
263
261
  # +JRuby+
264
262
  # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
268
- end
263
+ def to_java; end if false # JRuby provides #to_java
269
264
 
270
265
  private
271
266
  def self.empty_doc? string_or_io
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DocumentFragment < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DTD < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class ElementDecl < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class EntityDecl < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class EntityReference < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Namespace
@@ -1,4 +1,5 @@
1
1
  # encoding: UTF-8
2
+ # frozen_string_literal: true
2
3
  require 'stringio'
3
4
  require 'nokogiri/xml/node/save_options'
4
5
 
@@ -337,9 +338,9 @@ module Nokogiri
337
338
  # If you need to distinguish attributes with the same name, with different namespaces
338
339
  # use #attribute_nodes instead.
339
340
  def attributes
340
- Hash[attribute_nodes.map { |node|
341
- [node.node_name, node]
342
- }]
341
+ attribute_nodes.each_with_object({}) do |node, hash|
342
+ hash[node.node_name] = node
343
+ end
343
344
  end
344
345
 
345
346
  ###
@@ -348,6 +349,12 @@ module Nokogiri
348
349
  attribute_nodes.map(&:value)
349
350
  end
350
351
 
352
+ ###
353
+ # Does this Node's attributes include <value>
354
+ def value?(value)
355
+ values.include? value
356
+ end
357
+
351
358
  ###
352
359
  # Get the attribute names for this Node.
353
360
  def keys
@@ -509,10 +516,11 @@ module Nokogiri
509
516
  # default namespaces set on ancestor will NOT be, even if self
510
517
  # has no explicit default namespace.
511
518
  def namespaces
512
- Hash[namespace_scopes.map { |nd|
513
- key = ['xmlns', nd.prefix].compact.join(':')
514
- [key, nd.href]
515
- }]
519
+ namespace_scopes.each_with_object({}) do |ns, hash|
520
+ prefix = ns.prefix
521
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
522
+ hash[key] = ns.href
523
+ end
516
524
  end
517
525
 
518
526
  # Returns true if this is a Comment
@@ -761,10 +769,15 @@ module Nokogiri
761
769
  end
762
770
  indent_text = options[:indent_text] || ' '
763
771
 
772
+ # Any string times 0 returns an empty string. Therefore, use the same
773
+ # string instead of generating a new empty string for every node with
774
+ # zero indentation.
775
+ indentation = indent_times.zero? ? '' : (indent_text * indent_times)
776
+
764
777
  config = SaveOptions.new(save_options.to_i)
765
778
  yield config if block_given?
766
779
 
767
- native_write_to(io, encoding, indent_text * indent_times, config.options)
780
+ native_write_to(io, encoding, indentation, config.options)
768
781
  end
769
782
 
770
783
  ###
@@ -847,17 +860,18 @@ module Nokogiri
847
860
  node_or_tags
848
861
  end
849
862
 
863
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
864
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
865
+
850
866
  def to_format save_option, options
851
- # FIXME: this is a hack around broken libxml versions
852
- return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
867
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
853
868
 
854
869
  options[:save_with] = save_option unless options[:save_with]
855
870
  serialize(options)
856
871
  end
857
872
 
858
873
  def write_format_to save_option, io, options
859
- # FIXME: this is a hack around broken libxml versions
860
- return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
874
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
861
875
 
862
876
  options[:save_with] ||= save_option
863
877
  write_to io, options
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ####
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Notation < Struct.new(:name, :public_id, :system_id)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -5,11 +6,11 @@ module Nokogiri
5
6
  #
6
7
  # == Building combinations of parse options
7
8
  # You can build your own combinations of these parse options by using any of the following methods:
8
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
9
+ # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
9
10
  # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
10
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
12
  # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)
13
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
13
14
  # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
14
15
  # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
15
16
  #