nokogiri 1.11.0.rc3 → 1.11.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/ext/nokogiri/depend +37 -358
  7. data/ext/nokogiri/extconf.rb +581 -374
  8. data/ext/nokogiri/html_document.c +78 -82
  9. data/ext/nokogiri/html_element_description.c +84 -71
  10. data/ext/nokogiri/html_entity_lookup.c +21 -16
  11. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  12. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  13. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  14. data/ext/nokogiri/nokogiri.c +192 -93
  15. data/ext/nokogiri/nokogiri.h +177 -98
  16. data/ext/nokogiri/test_global_handlers.c +40 -0
  17. data/ext/nokogiri/xml_attr.c +15 -15
  18. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  19. data/ext/nokogiri/xml_cdata.c +13 -18
  20. data/ext/nokogiri/xml_comment.c +19 -26
  21. data/ext/nokogiri/xml_document.c +246 -188
  22. data/ext/nokogiri/xml_document_fragment.c +13 -15
  23. data/ext/nokogiri/xml_dtd.c +54 -48
  24. data/ext/nokogiri/xml_element_content.c +30 -27
  25. data/ext/nokogiri/xml_element_decl.c +22 -22
  26. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  27. data/ext/nokogiri/xml_entity_decl.c +32 -30
  28. data/ext/nokogiri/xml_entity_reference.c +16 -18
  29. data/ext/nokogiri/xml_namespace.c +56 -49
  30. data/ext/nokogiri/xml_node.c +371 -320
  31. data/ext/nokogiri/xml_node_set.c +168 -156
  32. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  33. data/ext/nokogiri/xml_reader.c +191 -157
  34. data/ext/nokogiri/xml_relax_ng.c +52 -28
  35. data/ext/nokogiri/xml_sax_parser.c +118 -118
  36. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  37. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  38. data/ext/nokogiri/xml_schema.c +95 -47
  39. data/ext/nokogiri/xml_syntax_error.c +42 -21
  40. data/ext/nokogiri/xml_text.c +13 -17
  41. data/ext/nokogiri/xml_xpath_context.c +206 -123
  42. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  43. data/lib/nokogiri.rb +3 -7
  44. data/lib/nokogiri/css/parser.rb +3 -3
  45. data/lib/nokogiri/css/parser.y +2 -2
  46. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  47. data/lib/nokogiri/extension.rb +26 -0
  48. data/lib/nokogiri/html/document.rb +12 -26
  49. data/lib/nokogiri/html/document_fragment.rb +15 -15
  50. data/lib/nokogiri/version.rb +2 -149
  51. data/lib/nokogiri/version/constant.rb +5 -0
  52. data/lib/nokogiri/version/info.rb +205 -0
  53. data/lib/nokogiri/xml/document.rb +91 -35
  54. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  55. data/lib/nokogiri/xml/node.rb +89 -69
  56. data/lib/nokogiri/xml/parse_options.rb +6 -0
  57. data/lib/nokogiri/xml/reader.rb +2 -9
  58. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  59. data/lib/nokogiri/xml/schema.rb +12 -4
  60. data/lib/nokogiri/xml/searchable.rb +3 -1
  61. data/lib/nokogiri/xml/xpath.rb +1 -3
  62. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  63. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  64. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  65. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  66. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  67. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
  68. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +2511 -0
  69. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  70. metadata +86 -168
  71. data/ext/nokogiri/html_document.h +0 -10
  72. data/ext/nokogiri/html_element_description.h +0 -10
  73. data/ext/nokogiri/html_entity_lookup.h +0 -8
  74. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  75. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  76. data/ext/nokogiri/xml_attr.h +0 -9
  77. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  78. data/ext/nokogiri/xml_cdata.h +0 -9
  79. data/ext/nokogiri/xml_comment.h +0 -9
  80. data/ext/nokogiri/xml_document.h +0 -23
  81. data/ext/nokogiri/xml_document_fragment.h +0 -10
  82. data/ext/nokogiri/xml_dtd.h +0 -10
  83. data/ext/nokogiri/xml_element_content.h +0 -10
  84. data/ext/nokogiri/xml_element_decl.h +0 -9
  85. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  86. data/ext/nokogiri/xml_entity_decl.h +0 -10
  87. data/ext/nokogiri/xml_entity_reference.h +0 -9
  88. data/ext/nokogiri/xml_io.c +0 -63
  89. data/ext/nokogiri/xml_io.h +0 -11
  90. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  91. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  92. data/ext/nokogiri/xml_namespace.h +0 -14
  93. data/ext/nokogiri/xml_node.h +0 -13
  94. data/ext/nokogiri/xml_node_set.h +0 -12
  95. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  96. data/ext/nokogiri/xml_reader.h +0 -10
  97. data/ext/nokogiri/xml_relax_ng.h +0 -9
  98. data/ext/nokogiri/xml_sax_parser.h +0 -39
  99. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  100. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  101. data/ext/nokogiri/xml_schema.h +0 -9
  102. data/ext/nokogiri/xml_syntax_error.h +0 -13
  103. data/ext/nokogiri/xml_text.h +0 -9
  104. data/ext/nokogiri/xml_xpath_context.h +0 -10
  105. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ # The version of Nokogiri you are using
4
+ VERSION = "1.11.3"
5
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def windows?
14
+ ::RUBY_PLATFORM =~ /mingw|mswin/
15
+ end
16
+
17
+ def ruby_minor
18
+ Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
19
+ end
20
+
21
+ def engine
22
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
23
+ end
24
+
25
+ def loaded_libxml_version
26
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
27
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
28
+ .collect(&:to_i)
29
+ .join("."))
30
+ end
31
+
32
+ def compiled_libxml_version
33
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
34
+ end
35
+
36
+ def loaded_libxslt_version
37
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
38
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
39
+ .collect(&:to_i)
40
+ .join("."))
41
+ end
42
+
43
+ def compiled_libxslt_version
44
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
45
+ end
46
+
47
+ def libxml2?
48
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
49
+ end
50
+
51
+ def libxml2_has_iconv?
52
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
53
+ end
54
+
55
+ def libxml2_using_packaged?
56
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
57
+ end
58
+
59
+ def libxml2_using_system?
60
+ libxml2? && !libxml2_using_packaged?
61
+ end
62
+
63
+ def libxml2_precompiled?
64
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
65
+ end
66
+
67
+ def warnings
68
+ warnings = []
69
+
70
+ if libxml2?
71
+ if compiled_libxml_version != loaded_libxml_version
72
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
73
+ end
74
+
75
+ if compiled_libxslt_version != loaded_libxslt_version
76
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
77
+ end
78
+ end
79
+
80
+ warnings
81
+ end
82
+
83
+ def to_hash
84
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
85
+
86
+ {}.tap do |vi|
87
+ vi["warnings"] = []
88
+ vi["nokogiri"] = {}.tap do |nokogiri|
89
+ nokogiri["version"] = Nokogiri::VERSION
90
+
91
+ unless jruby?
92
+ # enable gems like nokogumbo to build with the following in their extconf.rb:
93
+ #
94
+ # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
95
+ # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
96
+ #
97
+ cppflags = ["-I#{header_directory.shellescape}"]
98
+ ldflags = []
99
+
100
+ if libxml2_using_packaged?
101
+ cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
102
+ cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
103
+
104
+ if windows?
105
+ # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
106
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
107
+ unless File.exist?(lib_directory)
108
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
109
+ end
110
+ ldflags << "-L#{lib_directory.shellescape}"
111
+ ldflags << "-l:nokogiri.so"
112
+ end
113
+ end
114
+
115
+ nokogiri["cppflags"] = cppflags
116
+ nokogiri["ldflags"] = ldflags
117
+ end
118
+ end
119
+ vi["ruby"] = {}.tap do |ruby|
120
+ ruby["version"] = ::RUBY_VERSION
121
+ ruby["platform"] = ::RUBY_PLATFORM
122
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
123
+ ruby["description"] = ::RUBY_DESCRIPTION
124
+ ruby["engine"] = engine
125
+ ruby["jruby"] = jruby? if jruby?
126
+ end
127
+
128
+ if libxml2?
129
+ vi["libxml"] = {}.tap do |libxml|
130
+ if libxml2_using_packaged?
131
+ libxml["source"] = "packaged"
132
+ libxml["precompiled"] = libxml2_precompiled?
133
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
134
+
135
+ # this is for nokogumbo and shouldn't be forever
136
+ libxml["libxml2_path"] = header_directory
137
+ else
138
+ libxml["source"] = "system"
139
+ end
140
+ libxml["iconv_enabled"] = libxml2_has_iconv?
141
+ libxml["compiled"] = compiled_libxml_version.to_s
142
+ libxml["loaded"] = loaded_libxml_version.to_s
143
+ end
144
+
145
+ vi["libxslt"] = {}.tap do |libxslt|
146
+ if libxml2_using_packaged?
147
+ libxslt["source"] = "packaged"
148
+ libxslt["precompiled"] = libxml2_precompiled?
149
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
150
+ else
151
+ libxslt["source"] = "system"
152
+ end
153
+ libxslt["compiled"] = compiled_libxslt_version.to_s
154
+ libxslt["loaded"] = loaded_libxslt_version.to_s
155
+ end
156
+
157
+ vi["warnings"] = warnings
158
+ end
159
+
160
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
161
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
162
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
163
+ elsif jruby?
164
+ vi["other_libraries"] = {}.tap do |ol|
165
+ ol["xerces"] = Nokogiri::XERCES_VERSION
166
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ def to_markdown
173
+ begin
174
+ require "psych"
175
+ rescue LoadError
176
+ end
177
+ require "yaml"
178
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
179
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
180
+ end
181
+
182
+ instance.warnings.each do |warning|
183
+ warn "WARNING: #{warning}"
184
+ end
185
+ end
186
+
187
+ def self.uses_libxml?(requirement = nil) # :nodoc:
188
+ return false unless VersionInfo.instance.libxml2?
189
+ return true unless requirement
190
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
191
+ end
192
+
193
+ def self.jruby? # :nodoc:
194
+ VersionInfo.instance.jruby?
195
+ end
196
+
197
+ # Ensure constants used in this file are loaded - see #1896
198
+ if Nokogiri.jruby?
199
+ require "nokogiri/jruby/dependencies"
200
+ end
201
+ require "nokogiri/extension"
202
+
203
+ # More complete version information about libxml
204
+ VERSION_INFO = VersionInfo.instance.to_hash
205
+ end
@@ -1,4 +1,8 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
4
+ require 'pathname'
5
+
2
6
  module Nokogiri
3
7
  module XML
4
8
  ##
@@ -10,11 +14,12 @@ module Nokogiri
10
14
  # Nokogiri::XML::Searchable#xpath
11
15
  #
12
16
  class Document < Nokogiri::XML::Node
13
- # I'm ignoring unicode characters here.
14
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
15
20
  NCNAME_START_CHAR = "A-Za-z_"
16
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
17
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
18
23
 
19
24
  ##
20
25
  # Parse an XML file.
@@ -44,9 +49,11 @@ module Nokogiri
44
49
  #
45
50
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
46
51
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
47
- # Give the options to the user
52
+
48
53
  yield options if block_given?
49
54
 
55
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
56
+
50
57
  if empty_doc?(string_or_io)
51
58
  if options.strict?
52
59
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -56,12 +63,17 @@ module Nokogiri
56
63
  end
57
64
 
58
65
  doc = if string_or_io.respond_to?(:read)
59
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
60
- read_io(string_or_io, url, encoding, options.to_i)
61
- else
62
- # read_memory pukes on empty docs
63
- read_memory(string_or_io, url, encoding, options.to_i)
64
- end
66
+ if string_or_io.is_a?(Pathname)
67
+ # resolve the Pathname to the file and open it as an IO object, see #2110
68
+ string_or_io = string_or_io.expand_path.open
69
+ url ||= string_or_io.path
70
+ end
71
+
72
+ read_io(string_or_io, url, encoding, options.to_i)
73
+ else
74
+ # read_memory pukes on empty docs
75
+ read_memory(string_or_io, url, encoding, options.to_i)
76
+ end
65
77
 
66
78
  # do xinclude processing
67
79
  doc.do_xinclude(options) if options.xinclude?
@@ -69,6 +81,35 @@ module Nokogiri
69
81
  return doc
70
82
  end
71
83
 
84
+ ##
85
+ # @!method wrap(java_document)
86
+ # @!scope class
87
+ #
88
+ # Create a {Document} using an existing Java DOM document object.
89
+ #
90
+ # The returned {Document} shares the same underlying data structure as the Java object, so
91
+ # changes in one are reflected in the other.
92
+ #
93
+ # @param java_document [Java::OrgW3cDom::Document]
94
+ # @return [Nokogiri::XML::Document]
95
+ # @note This method is only available when running JRuby.
96
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
+ # @see #to_java
98
+
99
+ ##
100
+ # @!method to_java()
101
+ #
102
+ # Returns the underlying Java DOM document object for the {Document}.
103
+ #
104
+ # The returned Java object shares the same underlying data structure as the {Document}, so
105
+ # changes in one are reflected in the other.
106
+ #
107
+ # @return [Java::OrgW3cDom::Document]
108
+ # @note This method is only available when running JRuby.
109
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
+ # @see .wrap
111
+
112
+
72
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
73
114
  attr_accessor :errors
74
115
 
@@ -78,33 +119,58 @@ module Nokogiri
78
119
  end
79
120
 
80
121
  ##
81
- # Create an element with +name+, and optionally setting the content and attributes.
122
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
123
+ # setting contents or attributes.
124
+ #
125
+ # Arguments may be passed to initialize the element:
126
+ # - a +Hash+ argument will be used to set attributes
127
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
128
+ #
129
+ # A block may be passed to mutate the node.
130
+ #
131
+ # @param name [String]
132
+ # @param contents_or_attrs [#to_s,Hash]
133
+ # @yieldparam node [Nokogiri::XML::Element]
134
+ # @return [Nokogiri::XML::Element]
135
+ #
136
+ # @example An empty element without attributes
137
+ # doc.create_element("div")
138
+ # # => <div></div>
82
139
  #
83
- # doc.create_element "div" # <div></div>
84
- # doc.create_element "div", :class => "container" # <div class='container'></div>
85
- # doc.create_element "div", "contents" # <div>contents</div>
86
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
87
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
140
+ # @example An element with contents
141
+ # doc.create_element("div", "contents")
142
+ # # => <div>contents</div>
88
143
  #
89
- def create_element name, *args, &block
144
+ # @example An element with attributes
145
+ # doc.create_element("div", {"class" => "container"})
146
+ # # => <div class='container'></div>
147
+ #
148
+ # @example An element with contents and attributes
149
+ # doc.create_element("div", "contents", {"class" => "container"})
150
+ # # => <div class='container'>contents</div>
151
+ #
152
+ # @example Passing a block to mutate the element
153
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
154
+ #
155
+ def create_element(name, *contents_or_attrs, &block)
90
156
  elm = Nokogiri::XML::Element.new(name, self, &block)
91
- args.each do |arg|
157
+ contents_or_attrs.each do |arg|
92
158
  case arg
93
159
  when Hash
94
- arg.each { |k,v|
160
+ arg.each do |k, v|
95
161
  key = k.to_s
96
162
  if key =~ NCNAME_RE
97
- ns_name = key.split(":", 2)[1]
98
- elm.add_namespace_definition ns_name, v
163
+ ns_name = Regexp.last_match(1)
164
+ elm.add_namespace_definition(ns_name, v)
99
165
  else
100
166
  elm[k.to_s] = v.to_s
101
167
  end
102
- }
168
+ end
103
169
  else
104
170
  elm.content = arg
105
171
  end
106
172
  end
107
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
173
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
108
174
  elm.namespace = ns
109
175
  end
110
176
  elm
@@ -252,24 +318,14 @@ module Nokogiri
252
318
  end
253
319
  alias :<< :add_child
254
320
 
255
- ##
256
- # +JRuby+
257
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
258
- def self.wrap(document) end if false # native-ext provides Document.wrap
259
-
260
- ##
261
- # +JRuby+
262
- # Returns Java's org.w3c.dom.document of this Document.
263
- def to_java; end if false # JRuby provides #to_java
264
-
265
321
  private
322
+
266
323
  def self.empty_doc? string_or_io
267
324
  string_or_io.nil? ||
268
325
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
269
326
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
270
327
  end
271
328
 
272
- # @private
273
329
  IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
274
330
 
275
331
  def inspect_attributes
@@ -141,6 +141,10 @@ module Nokogiri
141
141
  document.errors = things
142
142
  end
143
143
 
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
144
148
  private
145
149
 
146
150
  # fix for issue 770
@@ -150,12 +154,6 @@ module Nokogiri
150
154
  %Q{xmlns#{prefix}="#{namespace.href}"}
151
155
  end.join ' '
152
156
  end
153
-
154
- def coerce data
155
- return super unless String === data
156
-
157
- document.fragment(data).children
158
- end
159
157
  end
160
158
  end
161
159
  end
@@ -5,64 +5,54 @@ require "nokogiri/xml/node/save_options"
5
5
 
6
6
  module Nokogiri
7
7
  module XML
8
- ####
9
- # Nokogiri::XML::Node is your window to the fun filled world of dealing
10
- # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
11
- # to a hash with regard to attributes. For example (from irb):
8
+ ##
9
+ # {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
10
+ # tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
11
+ # example:
12
12
  #
13
- # irb(main):004:0> node
14
- # => <a href="#foo" id="link">link</a>
15
- # irb(main):005:0> node['href']
16
- # => "#foo"
17
- # irb(main):006:0> node.keys
18
- # => ["href", "id"]
19
- # irb(main):007:0> node.values
20
- # => ["#foo", "link"]
21
- # irb(main):008:0> node['class'] = 'green'
22
- # => "green"
23
- # irb(main):009:0> node
24
- # => <a href="#foo" id="link" class="green">link</a>
25
- # irb(main):010:0>
13
+ # node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
14
+ # node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
15
+ # node['href'] # => "#foo"
16
+ # node.keys # => ["href", "id"]
17
+ # node.values # => ["#foo", "link"]
18
+ # node['class'] = 'green' # => "green"
19
+ # node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
26
20
  #
27
- # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
21
+ # See the method group entitled "Working With Node Attributes" for the full set of methods.
28
22
  #
29
- # Nokogiri::XML::Node also has methods that let you move around your
23
+ # {Nokogiri::XML::Node} also has methods that let you move around your
30
24
  # tree. For navigating your tree, see:
31
25
  #
32
- # * Nokogiri::XML::Node#parent
33
- # * Nokogiri::XML::Node#children
34
- # * Nokogiri::XML::Node#next
35
- # * Nokogiri::XML::Node#previous
36
- #
26
+ # * {#parent}
27
+ # * {#children}
28
+ # * {#next}
29
+ # * {#previous}
37
30
  #
38
31
  # When printing or otherwise emitting a document or a node (and
39
32
  # its subtree), there are a few methods you might want to use:
40
33
  #
41
- # * content, text, inner_text, to_str: emit plaintext
42
- #
43
- # These methods will all emit the plaintext version of your
44
- # document, meaning that entities will be replaced (e.g., "&lt;"
45
- # will be replaced with "<"), meaning that any sanitizing will
46
- # likely be un-done in the output.
34
+ # * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
35
+ # meaning that entities will be replaced (e.g., "&lt;" will be replaced with "<"), meaning
36
+ # that any sanitizing will likely be un-done in the output.
47
37
  #
48
- # * to_s, to_xml, to_html, inner_html: emit well-formed markup
38
+ # * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
39
+ # properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
40
+ # parsers, etc.
49
41
  #
50
- # These methods will all emit properly-escaped markup, meaning
51
- # that it's suitable for consumption by browsers, parsers, etc.
42
+ # You may search this node's subtree using {#xpath} and {#css}
52
43
  #
53
- # You may search this node's subtree using Searchable#xpath and Searchable#css
54
44
  class Node
55
45
  include Nokogiri::XML::PP::Node
56
46
  include Nokogiri::XML::Searchable
57
47
  include Enumerable
58
48
 
59
- # Element node type, see Nokogiri::XML::Node#element?
49
+ # Element node type, see {Nokogiri::XML::Node#element?}
60
50
  ELEMENT_NODE = 1
61
51
  # Attribute node type
62
52
  ATTRIBUTE_NODE = 2
63
- # Text node type, see Nokogiri::XML::Node#text?
53
+ # Text node type, see {Nokogiri::XML::Node#text?}
64
54
  TEXT_NODE = 3
65
- # CDATA node type, see Nokogiri::XML::Node#cdata?
55
+ # CDATA node type, see {Nokogiri::XML::Node#cdata?}
66
56
  CDATA_SECTION_NODE = 4
67
57
  # Entity reference node type
68
58
  ENTITY_REF_NODE = 5
@@ -70,9 +60,9 @@ module Nokogiri
70
60
  ENTITY_NODE = 6
71
61
  # PI node type
72
62
  PI_NODE = 7
73
- # Comment node type, see Nokogiri::XML::Node#comment?
63
+ # Comment node type, see {Nokogiri::XML::Node#comment?}
74
64
  COMMENT_NODE = 8
75
- # Document node type, see Nokogiri::XML::Node#xml?
65
+ # Document node type, see {Nokogiri::XML::Node#xml?}
76
66
  DOCUMENT_NODE = 9
77
67
  # Document type node type
78
68
  DOCUMENT_TYPE_NODE = 10
@@ -80,7 +70,7 @@ module Nokogiri
80
70
  DOCUMENT_FRAG_NODE = 11
81
71
  # Notation node type
82
72
  NOTATION_NODE = 12
83
- # HTML document node type, see Nokogiri::XML::Node#html?
73
+ # HTML document node type, see {Nokogiri::XML::Node#html?}
84
74
  HTML_DOCUMENT_NODE = 13
85
75
  # DTD node type
86
76
  DTD_NODE = 14
@@ -99,8 +89,15 @@ module Nokogiri
99
89
  # DOCB document node type
100
90
  DOCB_DOCUMENT_NODE = 21
101
91
 
102
- def initialize(name, document) # :nodoc:
103
- # ... Ya. This is empty on purpose.
92
+ ##
93
+ # Create a new node with +name+ sharing GC lifecycle with +document+.
94
+ # @param name [String]
95
+ # @param document [Nokogiri::XML::Document]
96
+ # @yieldparam node [Nokogiri::XML::Node]
97
+ # @return [Nokogiri::XML::Node]
98
+ # @see Nokogiri::XML::Node.new
99
+ def initialize(name, document)
100
+ # This is intentionally empty.
104
101
  end
105
102
 
106
103
  ###
@@ -267,6 +264,8 @@ module Nokogiri
267
264
  #
268
265
  # Also see related method +swap+.
269
266
  def replace(node_or_tags)
267
+ raise("Cannot replace a node with no parent") unless parent
268
+
270
269
  # We cannot replace a text node directly, otherwise libxml will return
271
270
  # an internal error at parser.c:13031, I don't know exactly why
272
271
  # libxml is trying to find a parent node that is an element or document
@@ -278,7 +277,7 @@ module Nokogiri
278
277
  return replacee.replace node_or_tags
279
278
  end
280
279
 
281
- node_or_tags = coerce(node_or_tags)
280
+ node_or_tags = parent.coerce(node_or_tags)
282
281
 
283
282
  if node_or_tags.is_a?(XML::NodeSet)
284
283
  node_or_tags.each { |n| add_previous_sibling n }
@@ -819,13 +818,30 @@ module Nokogiri
819
818
 
820
819
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
821
820
 
822
- ##
823
- # This is a horrible hack, but I don't care. See #313 for background.
821
+ # libxml2 does not obey the `recover` option after encountering errors during `in_context`
822
+ # parsing, and so this horrible hack is here to try to emulate recovery behavior.
823
+ #
824
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
825
+ # would have been inherited from the context node won't be handled correctly. This hack was
826
+ # written in 2010, and I regret it, because it's silently degrading functionality in a way
827
+ # that's not easily prevented (or even detected).
828
+ #
829
+ # I think preferable behavior would be to either:
830
+ #
831
+ # a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
832
+ # b. don't recover, but raise a sensible exception
833
+ #
834
+ # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
835
+ # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
824
836
  error_count = document.errors.length
825
837
  node_set = in_context(contents, options.to_i)
826
- if node_set.empty? and document.errors.length > error_count and options.recover?
827
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
828
- node_set = fragment.children
838
+ if (node_set.empty? && (document.errors.length > error_count))
839
+ if options.recover?
840
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
841
+ node_set = fragment.children
842
+ else
843
+ raise document.errors[error_count]
844
+ end
829
845
  end
830
846
  node_set
831
847
  end
@@ -1128,6 +1144,28 @@ module Nokogiri
1128
1144
 
1129
1145
  # @!endgroup
1130
1146
 
1147
+ protected
1148
+
1149
+ def coerce(data)
1150
+ case data
1151
+ when XML::NodeSet
1152
+ return data
1153
+ when XML::DocumentFragment
1154
+ return data.children
1155
+ when String
1156
+ return fragment(data).children
1157
+ when Document, XML::Attr
1158
+ # unacceptable
1159
+ when XML::Node
1160
+ return data
1161
+ end
1162
+
1163
+ raise ArgumentError, <<-EOERR
1164
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1165
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1166
+ EOERR
1167
+ end
1168
+
1131
1169
  private
1132
1170
 
1133
1171
  def keywordify(keywords)
@@ -1142,10 +1180,12 @@ module Nokogiri
1142
1180
  end
1143
1181
 
1144
1182
  def add_sibling(next_or_previous, node_or_tags)
1183
+ raise("Cannot add sibling to a node with no parent") unless parent
1184
+
1145
1185
  impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
1146
1186
  iter = (next_or_previous == :next) ? :reverse_each : :each
1147
1187
 
1148
- node_or_tags = coerce node_or_tags
1188
+ node_or_tags = parent.coerce(node_or_tags)
1149
1189
  if node_or_tags.is_a?(XML::NodeSet)
1150
1190
  if text?
1151
1191
  pivot = Nokogiri::XML::Node.new "dummy", document
@@ -1182,26 +1222,6 @@ module Nokogiri
1182
1222
  [:name, :namespace, :attribute_nodes, :children]
1183
1223
  end
1184
1224
 
1185
- def coerce(data)
1186
- case data
1187
- when XML::NodeSet
1188
- return data
1189
- when XML::DocumentFragment
1190
- return data.children
1191
- when String
1192
- return fragment(data).children
1193
- when Document, XML::Attr
1194
- # unacceptable
1195
- when XML::Node
1196
- return data
1197
- end
1198
-
1199
- raise ArgumentError, <<-EOERR
1200
- Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1201
- (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
1202
- EOERR
1203
- end
1204
-
1205
1225
  # @private
1206
1226
  IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
1207
1227