nokogiri 1.14.3 → 1.16.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +60 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.5.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +11 -9
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -94,7 +94,7 @@ module Nokogiri
94
94
  # no support for a call without len
95
95
 
96
96
  unless @firstchunk
97
- (@firstchunk = @io.read(len)) || (return nil)
97
+ (@firstchunk = @io.read(len)) || return
98
98
 
99
99
  # This implementation expects that the first call from
100
100
  # htmlReadIO() is made with a length long enough (~1KB) to
@@ -36,7 +36,7 @@ module Nokogiri
36
36
  attr_reader :quirks_mode
37
37
 
38
38
  # Create a document fragment.
39
- def initialize(doc, tags = nil, ctx = nil, options = {})
39
+ def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/MissingSuper
40
40
  self.document = doc
41
41
  self.errors = []
42
42
  return self unless tags
@@ -17,6 +17,9 @@
17
17
  # limitations under the License.
18
18
  #
19
19
 
20
+ #
21
+ # TODO: this whole file should go away. maybe make it a decorator?
22
+ #
20
23
  require_relative "../xml/node"
21
24
 
22
25
  module Nokogiri
@@ -50,6 +53,8 @@ module Nokogiri
50
53
  config = XML::Node::SaveOptions.new(save_options.to_i)
51
54
  yield config if block_given?
52
55
 
56
+ encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
57
+
53
58
  config_options = config.options
54
59
  if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
55
60
  # Use Nokogiri's serializing code.
@@ -239,20 +239,6 @@ module Nokogiri
239
239
  DocumentFragment.parse(string, encoding, options)
240
240
  end
241
241
 
242
- # Fetch and parse a HTML document from the web, following redirects,
243
- # handling https, and determining the character encoding using HTML5
244
- # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
245
- # http headers and special options. Everything which is not a
246
- # special option is considered a header. Special options include:
247
- # * :follow_limit => number of redirects which are followed
248
- # * :basic_auth => [username, password]
249
- def get(uri, options = {})
250
- # TODO: deprecate
251
- warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
252
- uplevel: 1, category: :deprecated)
253
- get_impl(uri, options)
254
- end
255
-
256
242
  # :nodoc:
257
243
  def read_and_encode(string, encoding)
258
244
  # Read the string with the given encoding.
@@ -280,55 +266,6 @@ module Nokogiri
280
266
 
281
267
  private
282
268
 
283
- def get_impl(uri, options = {})
284
- headers = options.clone
285
- headers = { follow_limit: headers } if Numeric === headers # deprecated
286
- limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
287
-
288
- require "net/http"
289
- uri = URI(uri) unless URI === uri
290
-
291
- http = Net::HTTP.new(uri.host, uri.port)
292
-
293
- # TLS / SSL support
294
- http.use_ssl = true if uri.scheme == "https"
295
-
296
- # Pass through Net::HTTP override values, which currently include:
297
- # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
298
- # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
299
- # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
300
- # :verify_callback, :verify_depth, :verify_mode
301
- options.each do |key, _value|
302
- http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
303
- end
304
-
305
- request = Net::HTTP::Get.new(uri.request_uri)
306
-
307
- # basic authentication
308
- auth = headers.delete(:basic_auth)
309
- auth ||= [uri.user, uri.password] if uri.user && uri.password
310
- request.basic_auth(auth.first, auth.last) if auth
311
-
312
- # remaining options are treated as headers
313
- headers.each { |key, value| request[key.to_s] = value.to_s }
314
-
315
- response = http.request(request)
316
-
317
- case response
318
- when Net::HTTPSuccess
319
- doc = parse(reencode(response.body, response["content-type"]), options)
320
- doc.instance_variable_set(:@response, response)
321
- doc.class.send(:attr_reader, :response)
322
- doc
323
- when Net::HTTPRedirection
324
- response.value if limit <= 1
325
- location = URI.join(uri, response["location"])
326
- get_impl(location, options.merge(follow_limit: limit - 1))
327
- else
328
- response.value
329
- end
330
- end
331
-
332
269
  # Charset sniffing is a complex and controversial topic that understandably isn't done _by
333
270
  # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
334
271
  # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
@@ -2,26 +2,26 @@
2
2
  begin
3
3
  require 'jar_dependencies'
4
4
  rescue LoadError
5
- require 'xalan/xalan/2.7.2/xalan-2.7.2.jar'
5
+ require 'xalan/serializer/2.7.3/serializer-2.7.3.jar'
6
6
  require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
7
7
  require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
8
8
  require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
9
- require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
10
9
  require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
10
+ require 'xalan/xalan/2.7.3/xalan-2.7.3.jar'
11
11
  require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
12
- require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
12
+ require 'org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar'
13
13
  require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
14
14
  end
15
15
 
16
16
  if defined? Jars
17
- require_jar 'xalan', 'xalan', '2.7.2'
17
+ require_jar 'xalan', 'serializer', '2.7.3'
18
18
  require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
19
19
  require_jar 'nu.validator', 'jing', '20200702VNU'
20
20
  require_jar 'xerces', 'xercesImpl', '2.12.2'
21
- require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
22
21
  require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
22
+ require_jar 'xalan', 'xalan', '2.7.3'
23
23
  require_jar 'xml-apis', 'xml-apis', '1.4.01'
24
- require_jar 'xalan', 'serializer', '2.7.2'
24
+ require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko2'
25
25
  require_jar 'isorelax', 'isorelax', '20030108'
26
26
  end
27
27
 
@@ -32,9 +32,9 @@ module Nokogiri
32
32
  "net.sf.saxon:Saxon-HE" => "9.6.0-4",
33
33
  "net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
34
34
  "nu.validator:jing" => "20200702VNU",
35
- "org.nokogiri:nekodtd" => "0.1.11.noko1",
36
- "xalan:serializer" => "2.7.2",
37
- "xalan:xalan" => "2.7.2",
35
+ "org.nokogiri:nekodtd" => "0.1.11.noko2",
36
+ "xalan:serializer" => "2.7.3",
37
+ "xalan:xalan" => "2.7.3",
38
38
  "xerces:xercesImpl" => "2.12.2",
39
39
  "xml-apis:xml-apis" => "1.4.01",
40
40
  }.freeze
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.14.3"
5
+ VERSION = "1.16.2"
6
6
  end
@@ -94,11 +94,14 @@ module Nokogiri
94
94
  nokogiri["version"] = Nokogiri::VERSION
95
95
 
96
96
  unless jruby?
97
- # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ # enable gems to build against Nokogiri with the following in their extconf.rb:
98
98
  #
99
99
  # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
100
100
  # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
101
101
  #
102
+ # though, this won't work on all platform and versions of Ruby, and won't be supported
103
+ # forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
104
+ #
102
105
  cppflags = ["-I#{header_directory.shellescape}"]
103
106
  ldflags = []
104
107
 
@@ -108,7 +111,8 @@ module Nokogiri
108
111
  end
109
112
 
110
113
  if windows?
111
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
114
+ # on windows, third party libraries that wish to link against nokogiri
115
+ # should link against nokogiri.so to resolve symbols. see #2167
112
116
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
117
  unless File.exist?(lib_directory)
114
118
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
@@ -136,9 +140,6 @@ module Nokogiri
136
140
  libxml["source"] = "packaged"
137
141
  libxml["precompiled"] = libxml2_precompiled?
138
142
  libxml["patches"] = Nokogiri::LIBXML2_PATCHES
139
-
140
- # this is for nokogumbo and shouldn't be forever
141
- libxml["libxml2_path"] = header_directory
142
143
  else
143
144
  libxml["source"] = "system"
144
145
  end
@@ -18,8 +18,6 @@ module Nokogiri
18
18
  # - +value+ → (String) The value of the attribute.
19
19
  # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
20
  #
21
- # ⚡ This is an experimental feature, available since v1.14.0
22
- #
23
21
  # *Example*
24
22
  #
25
23
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -52,6 +50,8 @@ module Nokogiri
52
50
  # # href = "http://nokogiri.org/ns/noko"
53
51
  # # })}
54
52
  #
53
+ # Since v1.14.0
54
+ #
55
55
  def deconstruct_keys(keys)
56
56
  { name: name, value: value, namespace: namespace }
57
57
  end
@@ -12,8 +12,10 @@ module Nokogiri
12
12
  undef_method :namespace_definitions
13
13
  undef_method :line if method_defined?(:line)
14
14
 
15
- def inspect
16
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
15
+ private
16
+
17
+ def inspect_attributes
18
+ [:to_s]
17
19
  end
18
20
  end
19
21
  end
@@ -174,8 +174,7 @@ module Nokogiri
174
174
  # Since v1.12.4
175
175
  attr_accessor :namespace_inheritance
176
176
 
177
- # :nodoc:
178
- def initialize(*args) # rubocop:disable Lint/MissingSuper
177
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
179
178
  @errors = []
180
179
  @decorators = nil
181
180
  @namespace_inheritance = false
@@ -330,7 +329,7 @@ module Nokogiri
330
329
  # Validate this Document against it's DTD. Returns a list of errors on
331
330
  # the document or +nil+ when there is no DTD.
332
331
  def validate
333
- return nil unless internal_subset
332
+ return unless internal_subset
334
333
 
335
334
  internal_subset.validate(self)
336
335
  end
@@ -427,8 +426,6 @@ module Nokogiri
427
426
  # instructions. If you have a use case and would like this functionality, please let us know
428
427
  # by opening an issue or a discussion on the github project.
429
428
  #
430
- # ⚡ This is an experimental feature, available since v1.14.0
431
- #
432
429
  # *Example*
433
430
  #
434
431
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -455,6 +452,8 @@ module Nokogiri
455
452
  # doc.deconstruct_keys([:root])
456
453
  # # => {:root=>nil}
457
454
  #
455
+ # Since v1.14.0
456
+ #
458
457
  def deconstruct_keys(keys)
459
458
  { root: root }
460
459
  end
@@ -16,7 +16,7 @@ module Nokogiri
16
16
  # If +ctx+ is present, it is used as a context node for the
17
17
  # subtree created, e.g., namespaces will be resolved relative
18
18
  # to +ctx+.
19
- def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
19
+ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
20
20
  return self unless tags
21
21
 
22
22
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
@@ -154,8 +154,6 @@ module Nokogiri
154
154
  # root elements, you should deconstruct the array returned by
155
155
  # <tt>DocumentFragment#elements</tt>.
156
156
  #
157
- # ⚡ This is an experimental feature, available since v1.14.0
158
- #
159
157
  # *Example*
160
158
  #
161
159
  # frag = Nokogiri::HTML5.fragment(<<~HTML)
@@ -187,6 +185,8 @@ module Nokogiri
187
185
  # # }),
188
186
  # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
187
  #
188
+ # Since v1.14.0
189
+ #
190
190
  def deconstruct
191
191
  children.to_a
192
192
  end
@@ -11,9 +11,11 @@ module Nokogiri
11
11
  # ]>
12
12
  # </root>
13
13
  #
14
- # ElementContent represents the tree inside the <!ELEMENT> tag shown above
15
- # that lists the possible content for the div1 tag.
14
+ # ElementContent represents the binary tree inside the <!ELEMENT> tag shown above that lists the
15
+ # possible content for the div1 tag.
16
16
  class ElementContent
17
+ include Nokogiri::XML::PP::Node
18
+
17
19
  # Possible definitions of type
18
20
  PCDATA = 1
19
21
  ELEMENT = 2
@@ -33,6 +35,12 @@ module Nokogiri
33
35
  def children
34
36
  [c1, c2].compact
35
37
  end
38
+
39
+ private
40
+
41
+ def inspect_attributes
42
+ [:prefix, :name, :type, :occur, :children]
43
+ end
36
44
  end
37
45
  end
38
46
  end
@@ -7,8 +7,10 @@ module Nokogiri
7
7
  undef_method :namespace_definitions
8
8
  undef_method :line if method_defined?(:line)
9
9
 
10
- def inspect
11
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
10
+ private
11
+
12
+ def inspect_attributes
13
+ [:to_s]
12
14
  end
13
15
  end
14
16
  end
@@ -13,8 +13,10 @@ module Nokogiri
13
13
  doc.create_entity(name, *args)
14
14
  end
15
15
 
16
- def inspect
17
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
16
+ private
17
+
18
+ def inspect_attributes
19
+ [:to_s]
18
20
  end
19
21
  end
20
22
  end
@@ -16,8 +16,6 @@ module Nokogiri
16
16
  # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
17
  # - +href+ → (String) The namespace's URI
18
18
  #
19
- # ⚡ This is an experimental feature, available since v1.14.0
20
- #
21
19
  # *Example*
22
20
  #
23
21
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -43,6 +41,7 @@ module Nokogiri
43
41
  # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
42
  # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
43
  #
44
+ # Since v1.14.0
46
45
  #
47
46
  def deconstruct_keys(keys)
48
47
  { prefix: prefix, href: href }
@@ -62,6 +62,14 @@ module Nokogiri
62
62
  end
63
63
 
64
64
  alias_method :to_i, :options
65
+
66
+ def inspect
67
+ options = []
68
+ self.class.constants.each do |k|
69
+ options << k.downcase if send(:"#{k.downcase}?")
70
+ end
71
+ super.sub(/>$/, " " + options.join(", ") + ">")
72
+ end
65
73
  end
66
74
  end
67
75
  end
@@ -1049,29 +1049,35 @@ module Nokogiri
1049
1049
 
1050
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1051
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1052
  error_count = document.errors.length
1068
1053
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1054
+ if document.errors.length > error_count
1055
+ raise document.errors[error_count] unless options.recover?
1056
+
1057
+ if node_set.empty?
1058
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1060
+ # behavior.
1061
+ #
1062
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1063
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1064
+ #
1065
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1066
+ # would have been inherited from the context node won't be handled correctly. This hack
1067
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1068
+ # a way that's not easily prevented (or even detected).
1069
+ #
1070
+ # I think preferable behavior would be to either:
1071
+ #
1072
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1073
+ # +recover+ option
1074
+ # b. don't recover, but raise a sensible exception
1075
+ #
1076
+ # For context and background:
1077
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1078
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1079
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1080
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1081
  end
1076
1082
  end
1077
1083
  node_set
@@ -1165,7 +1171,7 @@ module Nokogiri
1165
1171
  # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
1166
1172
  # nil on XML documents and on unknown tags.
1167
1173
  def description
1168
- return nil if document.xml?
1174
+ return if document.xml?
1169
1175
 
1170
1176
  Nokogiri::HTML4::ElementDescription[name]
1171
1177
  end
@@ -1254,8 +1260,8 @@ module Nokogiri
1254
1260
  # Compare two Node objects with respect to their Document. Nodes from
1255
1261
  # different documents cannot be compared.
1256
1262
  def <=>(other)
1257
- return nil unless other.is_a?(Nokogiri::XML::Node)
1258
- return nil unless document == other.document
1263
+ return unless other.is_a?(Nokogiri::XML::Node)
1264
+ return unless document == other.document
1259
1265
 
1260
1266
  compare(other)
1261
1267
  end
@@ -1269,15 +1275,16 @@ module Nokogiri
1269
1275
  #
1270
1276
  # These two statements are equivalent:
1271
1277
  #
1272
- # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
1278
+ # node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
1273
1279
  #
1274
1280
  # or
1275
1281
  #
1276
- # node.serialize(:encoding => 'UTF-8') do |config|
1282
+ # node.serialize(encoding: 'UTF-8') do |config|
1277
1283
  # config.format.as_xml
1278
1284
  # end
1279
1285
  #
1280
1286
  def serialize(*args, &block)
1287
+ # TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
1281
1288
  options = if args.first.is_a?(Hash)
1282
1289
  args.shift
1283
1290
  else
@@ -1310,7 +1317,7 @@ module Nokogiri
1310
1317
  ###
1311
1318
  # Serialize this Node to XML using +options+
1312
1319
  #
1313
- # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
1320
+ # doc.to_xml(indent: 5, encoding: 'UTF-8')
1314
1321
  #
1315
1322
  # See Node#write_to for a list of +options+
1316
1323
  def to_xml(options = {})
@@ -1321,7 +1328,7 @@ module Nokogiri
1321
1328
  ###
1322
1329
  # Serialize this Node to XHTML using +options+
1323
1330
  #
1324
- # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
1331
+ # doc.to_xhtml(indent: 5, encoding: 'UTF-8')
1325
1332
  #
1326
1333
  # See Node#write_to for a list of +options+
1327
1334
  def to_xhtml(options = {})
@@ -1329,25 +1336,32 @@ module Nokogiri
1329
1336
  end
1330
1337
 
1331
1338
  ###
1332
- # Write Node to +io+ with +options+. +options+ modify the output of
1333
- # this method. Valid options are:
1339
+ # :call-seq:
1340
+ # write_to(io, *options)
1341
+ #
1342
+ # Serialize this node or document to +io+.
1343
+ #
1344
+ # [Parameters]
1345
+ # - +io+ (IO) An IO-like object to which the serialized content will be written.
1346
+ # - +options+ (Hash) See below
1334
1347
  #
1335
- # * +:encoding+ for changing the encoding
1336
- # * +:indent_text+ the indentation text, defaults to one space
1337
- # * +:indent+ the number of +:indent_text+ to use, defaults to 2
1338
- # * +:save_with+ a combination of SaveOptions constants.
1348
+ # [Options]
1349
+ # * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
1350
+ # * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
1351
+ # * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
1352
+ # * +:save_with+ (Integer) a combination of SaveOptions constants
1339
1353
  #
1340
1354
  # To save with UTF-8 indented twice:
1341
1355
  #
1342
- # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
1356
+ # node.write_to(io, encoding: 'UTF-8', indent: 2)
1343
1357
  #
1344
1358
  # To save indented with two dashes:
1345
1359
  #
1346
- # node.write_to(io, :indent_text => '-', :indent => 2)
1360
+ # node.write_to(io, indent_text: '-', indent: 2)
1347
1361
  #
1348
1362
  def write_to(io, *options)
1349
1363
  options = options.first.is_a?(Hash) ? options.shift : {}
1350
- encoding = options[:encoding] || options[0]
1364
+ encoding = options[:encoding] || options[0] || document.encoding
1351
1365
  if Nokogiri.jruby?
1352
1366
  save_options = options[:save_with] || options[1]
1353
1367
  indent_times = options[:indent] || 0
@@ -1365,6 +1379,8 @@ module Nokogiri
1365
1379
  config = SaveOptions.new(save_options.to_i)
1366
1380
  yield config if block_given?
1367
1381
 
1382
+ encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
1383
+
1368
1384
  native_write_to(io, encoding, indentation, config.options)
1369
1385
  end
1370
1386
 
@@ -1420,8 +1436,6 @@ module Nokogiri
1420
1436
  # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1421
1437
  # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1422
1438
  #
1423
- # ⚡ This is an experimental feature, available since v1.14.0
1424
- #
1425
1439
  # *Example*
1426
1440
  #
1427
1441
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -1456,6 +1470,8 @@ module Nokogiri
1456
1470
  # # value = "def"
1457
1471
  # # })]}
1458
1472
  #
1473
+ # Since v1.14.0
1474
+ #
1459
1475
  def deconstruct_keys(keys)
1460
1476
  requested_keys = DECONSTRUCT_KEYS & keys
1461
1477
  {}.tap do |values|
@@ -372,7 +372,7 @@ module Nokogiri
372
372
  # Removes the last element from set and returns it, or +nil+ if
373
373
  # the set is empty
374
374
  def pop
375
- return nil if length == 0
375
+ return if length == 0
376
376
 
377
377
  delete(last)
378
378
  end
@@ -381,7 +381,7 @@ module Nokogiri
381
381
  # Returns the first element of the NodeSet and removes it. Returns
382
382
  # +nil+ if the set is empty.
383
383
  def shift
384
- return nil if length == 0
384
+ return if length == 0
385
385
 
386
386
  delete(first)
387
387
  end
@@ -435,7 +435,7 @@ module Nokogiri
435
435
  #
436
436
  # Returns the members of this NodeSet as an array, to use in pattern matching.
437
437
  #
438
- # This is an experimental feature, available since v1.14.0
438
+ # Since v1.14.0
439
439
  #
440
440
  def deconstruct
441
441
  to_a
@@ -13,9 +13,14 @@ module Nokogiri
13
13
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
14
14
  rescue NoMethodError
15
15
  true
16
- end.map do |attribute|
17
- "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
18
- end.join(" ")
16
+ end
17
+ attributes = if inspect_attributes.length == 1
18
+ send(attributes.first).inspect
19
+ else
20
+ attributes.map do |attribute|
21
+ "#{attribute}=#{send(attribute).inspect}"
22
+ end.join(" ")
23
+ end
19
24
  "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
20
25
  end
21
26
 
@@ -23,6 +28,7 @@ module Nokogiri
23
28
  nice_name = self.class.name.split("::").last
24
29
  pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
25
30
  pp.breakable
31
+
26
32
  attrs = inspect_attributes.filter_map do |t|
27
33
  [t, send(t)] if respond_to?(t)
28
34
  end.find_all do |x|
@@ -35,19 +41,24 @@ module Nokogiri
35
41
  end
36
42
  end
37
43
 
38
- pp.seplist(attrs) do |v|
39
- if COLLECTIONS.include?(v.first)
40
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
41
- pp.breakable
42
- pp.seplist(v.last) do |item|
43
- pp.pp(item)
44
+ if inspect_attributes.length == 1
45
+ pp.pp(attrs.first.last)
46
+ else
47
+ pp.seplist(attrs) do |v|
48
+ if COLLECTIONS.include?(v.first)
49
+ pp.group(2, "#{v.first} = [", "]") do
50
+ pp.breakable
51
+ pp.seplist(v.last) do |item|
52
+ pp.pp(item)
53
+ end
44
54
  end
55
+ else
56
+ pp.text("#{v.first} = ")
57
+ pp.pp(v.last)
45
58
  end
46
- else
47
- pp.text("#{v.first} = ")
48
- pp.pp(v.last)
49
59
  end
50
60
  end
61
+
51
62
  pp.breakable
52
63
  end
53
64
  end