nokogiri 1.14.2 → 1.16.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -94,7 +94,7 @@ module Nokogiri
94
94
  # no support for a call without len
95
95
 
96
96
  unless @firstchunk
97
- (@firstchunk = @io.read(len)) || (return nil)
97
+ (@firstchunk = @io.read(len)) || return
98
98
 
99
99
  # This implementation expects that the first call from
100
100
  # htmlReadIO() is made with a length long enough (~1KB) to
@@ -36,7 +36,7 @@ module Nokogiri
36
36
  attr_reader :quirks_mode
37
37
 
38
38
  # Create a document fragment.
39
- def initialize(doc, tags = nil, ctx = nil, options = {})
39
+ def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/MissingSuper
40
40
  self.document = doc
41
41
  self.errors = []
42
42
  return self unless tags
@@ -17,6 +17,9 @@
17
17
  # limitations under the License.
18
18
  #
19
19
 
20
+ #
21
+ # TODO: this whole file should go away. maybe make it a decorator?
22
+ #
20
23
  require_relative "../xml/node"
21
24
 
22
25
  module Nokogiri
@@ -50,6 +53,8 @@ module Nokogiri
50
53
  config = XML::Node::SaveOptions.new(save_options.to_i)
51
54
  yield config if block_given?
52
55
 
56
+ encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
57
+
53
58
  config_options = config.options
54
59
  if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
55
60
  # Use Nokogiri's serializing code.
@@ -239,20 +239,6 @@ module Nokogiri
239
239
  DocumentFragment.parse(string, encoding, options)
240
240
  end
241
241
 
242
- # Fetch and parse a HTML document from the web, following redirects,
243
- # handling https, and determining the character encoding using HTML5
244
- # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
245
- # http headers and special options. Everything which is not a
246
- # special option is considered a header. Special options include:
247
- # * :follow_limit => number of redirects which are followed
248
- # * :basic_auth => [username, password]
249
- def get(uri, options = {})
250
- # TODO: deprecate
251
- warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
252
- uplevel: 1, category: :deprecated)
253
- get_impl(uri, options)
254
- end
255
-
256
242
  # :nodoc:
257
243
  def read_and_encode(string, encoding)
258
244
  # Read the string with the given encoding.
@@ -280,55 +266,6 @@ module Nokogiri
280
266
 
281
267
  private
282
268
 
283
- def get_impl(uri, options = {})
284
- headers = options.clone
285
- headers = { follow_limit: headers } if Numeric === headers # deprecated
286
- limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
287
-
288
- require "net/http"
289
- uri = URI(uri) unless URI === uri
290
-
291
- http = Net::HTTP.new(uri.host, uri.port)
292
-
293
- # TLS / SSL support
294
- http.use_ssl = true if uri.scheme == "https"
295
-
296
- # Pass through Net::HTTP override values, which currently include:
297
- # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
298
- # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
299
- # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
300
- # :verify_callback, :verify_depth, :verify_mode
301
- options.each do |key, _value|
302
- http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
303
- end
304
-
305
- request = Net::HTTP::Get.new(uri.request_uri)
306
-
307
- # basic authentication
308
- auth = headers.delete(:basic_auth)
309
- auth ||= [uri.user, uri.password] if uri.user && uri.password
310
- request.basic_auth(auth.first, auth.last) if auth
311
-
312
- # remaining options are treated as headers
313
- headers.each { |key, value| request[key.to_s] = value.to_s }
314
-
315
- response = http.request(request)
316
-
317
- case response
318
- when Net::HTTPSuccess
319
- doc = parse(reencode(response.body, response["content-type"]), options)
320
- doc.instance_variable_set(:@response, response)
321
- doc.class.send(:attr_reader, :response)
322
- doc
323
- when Net::HTTPRedirection
324
- response.value if limit <= 1
325
- location = URI.join(uri, response["location"])
326
- get_impl(location, options.merge(follow_limit: limit - 1))
327
- else
328
- response.value
329
- end
330
- end
331
-
332
269
  # Charset sniffing is a complex and controversial topic that understandably isn't done _by
333
270
  # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
334
271
  # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
@@ -2,26 +2,26 @@
2
2
  begin
3
3
  require 'jar_dependencies'
4
4
  rescue LoadError
5
- require 'xalan/xalan/2.7.2/xalan-2.7.2.jar'
5
+ require 'xalan/serializer/2.7.3/serializer-2.7.3.jar'
6
6
  require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
7
7
  require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
8
8
  require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
9
- require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
10
9
  require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
10
+ require 'xalan/xalan/2.7.3/xalan-2.7.3.jar'
11
11
  require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
12
- require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
12
+ require 'org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar'
13
13
  require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
14
14
  end
15
15
 
16
16
  if defined? Jars
17
- require_jar 'xalan', 'xalan', '2.7.2'
17
+ require_jar 'xalan', 'serializer', '2.7.3'
18
18
  require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
19
19
  require_jar 'nu.validator', 'jing', '20200702VNU'
20
20
  require_jar 'xerces', 'xercesImpl', '2.12.2'
21
- require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
22
21
  require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
22
+ require_jar 'xalan', 'xalan', '2.7.3'
23
23
  require_jar 'xml-apis', 'xml-apis', '1.4.01'
24
- require_jar 'xalan', 'serializer', '2.7.2'
24
+ require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko2'
25
25
  require_jar 'isorelax', 'isorelax', '20030108'
26
26
  end
27
27
 
@@ -32,9 +32,9 @@ module Nokogiri
32
32
  "net.sf.saxon:Saxon-HE" => "9.6.0-4",
33
33
  "net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
34
34
  "nu.validator:jing" => "20200702VNU",
35
- "org.nokogiri:nekodtd" => "0.1.11.noko1",
36
- "xalan:serializer" => "2.7.2",
37
- "xalan:xalan" => "2.7.2",
35
+ "org.nokogiri:nekodtd" => "0.1.11.noko2",
36
+ "xalan:serializer" => "2.7.3",
37
+ "xalan:xalan" => "2.7.3",
38
38
  "xerces:xercesImpl" => "2.12.2",
39
39
  "xml-apis:xml-apis" => "1.4.01",
40
40
  }.freeze
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.14.2"
5
+ VERSION = "1.16.6"
6
6
  end
@@ -94,11 +94,14 @@ module Nokogiri
94
94
  nokogiri["version"] = Nokogiri::VERSION
95
95
 
96
96
  unless jruby?
97
- # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ # enable gems to build against Nokogiri with the following in their extconf.rb:
98
98
  #
99
99
  # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
100
100
  # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
101
101
  #
102
+ # though, this won't work on all platform and versions of Ruby, and won't be supported
103
+ # forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
104
+ #
102
105
  cppflags = ["-I#{header_directory.shellescape}"]
103
106
  ldflags = []
104
107
 
@@ -108,7 +111,8 @@ module Nokogiri
108
111
  end
109
112
 
110
113
  if windows?
111
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
114
+ # on windows, third party libraries that wish to link against nokogiri
115
+ # should link against nokogiri.so to resolve symbols. see #2167
112
116
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
117
  unless File.exist?(lib_directory)
114
118
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
@@ -136,9 +140,6 @@ module Nokogiri
136
140
  libxml["source"] = "packaged"
137
141
  libxml["precompiled"] = libxml2_precompiled?
138
142
  libxml["patches"] = Nokogiri::LIBXML2_PATCHES
139
-
140
- # this is for nokogumbo and shouldn't be forever
141
- libxml["libxml2_path"] = header_directory
142
143
  else
143
144
  libxml["source"] = "system"
144
145
  end
@@ -18,8 +18,6 @@ module Nokogiri
18
18
  # - +value+ → (String) The value of the attribute.
19
19
  # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
20
  #
21
- # ⚡ This is an experimental feature, available since v1.14.0
22
- #
23
21
  # *Example*
24
22
  #
25
23
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -52,6 +50,8 @@ module Nokogiri
52
50
  # # href = "http://nokogiri.org/ns/noko"
53
51
  # # })}
54
52
  #
53
+ # Since v1.14.0
54
+ #
55
55
  def deconstruct_keys(keys)
56
56
  { name: name, value: value, namespace: namespace }
57
57
  end
@@ -12,8 +12,10 @@ module Nokogiri
12
12
  undef_method :namespace_definitions
13
13
  undef_method :line if method_defined?(:line)
14
14
 
15
- def inspect
16
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
15
+ private
16
+
17
+ def inspect_attributes
18
+ [:to_s]
17
19
  end
18
20
  end
19
21
  end
@@ -174,8 +174,7 @@ module Nokogiri
174
174
  # Since v1.12.4
175
175
  attr_accessor :namespace_inheritance
176
176
 
177
- # :nodoc:
178
- def initialize(*args) # rubocop:disable Lint/MissingSuper
177
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
179
178
  @errors = []
180
179
  @decorators = nil
181
180
  @namespace_inheritance = false
@@ -330,7 +329,7 @@ module Nokogiri
330
329
  # Validate this Document against it's DTD. Returns a list of errors on
331
330
  # the document or +nil+ when there is no DTD.
332
331
  def validate
333
- return nil unless internal_subset
332
+ return unless internal_subset
334
333
 
335
334
  internal_subset.validate(self)
336
335
  end
@@ -427,8 +426,6 @@ module Nokogiri
427
426
  # instructions. If you have a use case and would like this functionality, please let us know
428
427
  # by opening an issue or a discussion on the github project.
429
428
  #
430
- # ⚡ This is an experimental feature, available since v1.14.0
431
- #
432
429
  # *Example*
433
430
  #
434
431
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -455,6 +452,8 @@ module Nokogiri
455
452
  # doc.deconstruct_keys([:root])
456
453
  # # => {:root=>nil}
457
454
  #
455
+ # Since v1.14.0
456
+ #
458
457
  def deconstruct_keys(keys)
459
458
  { root: root }
460
459
  end
@@ -16,7 +16,7 @@ module Nokogiri
16
16
  # If +ctx+ is present, it is used as a context node for the
17
17
  # subtree created, e.g., namespaces will be resolved relative
18
18
  # to +ctx+.
19
- def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
19
+ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
20
20
  return self unless tags
21
21
 
22
22
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
@@ -154,8 +154,6 @@ module Nokogiri
154
154
  # root elements, you should deconstruct the array returned by
155
155
  # <tt>DocumentFragment#elements</tt>.
156
156
  #
157
- # ⚡ This is an experimental feature, available since v1.14.0
158
- #
159
157
  # *Example*
160
158
  #
161
159
  # frag = Nokogiri::HTML5.fragment(<<~HTML)
@@ -187,6 +185,8 @@ module Nokogiri
187
185
  # # }),
188
186
  # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
187
  #
188
+ # Since v1.14.0
189
+ #
190
190
  def deconstruct
191
191
  children.to_a
192
192
  end
@@ -11,9 +11,11 @@ module Nokogiri
11
11
  # ]>
12
12
  # </root>
13
13
  #
14
- # ElementContent represents the tree inside the <!ELEMENT> tag shown above
15
- # that lists the possible content for the div1 tag.
14
+ # ElementContent represents the binary tree inside the <!ELEMENT> tag shown above that lists the
15
+ # possible content for the div1 tag.
16
16
  class ElementContent
17
+ include Nokogiri::XML::PP::Node
18
+
17
19
  # Possible definitions of type
18
20
  PCDATA = 1
19
21
  ELEMENT = 2
@@ -33,6 +35,12 @@ module Nokogiri
33
35
  def children
34
36
  [c1, c2].compact
35
37
  end
38
+
39
+ private
40
+
41
+ def inspect_attributes
42
+ [:prefix, :name, :type, :occur, :children]
43
+ end
36
44
  end
37
45
  end
38
46
  end
@@ -7,8 +7,10 @@ module Nokogiri
7
7
  undef_method :namespace_definitions
8
8
  undef_method :line if method_defined?(:line)
9
9
 
10
- def inspect
11
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
10
+ private
11
+
12
+ def inspect_attributes
13
+ [:to_s]
12
14
  end
13
15
  end
14
16
  end
@@ -13,8 +13,10 @@ module Nokogiri
13
13
  doc.create_entity(name, *args)
14
14
  end
15
15
 
16
- def inspect
17
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{to_s.inspect}>"
16
+ private
17
+
18
+ def inspect_attributes
19
+ [:to_s]
18
20
  end
19
21
  end
20
22
  end
@@ -16,8 +16,6 @@ module Nokogiri
16
16
  # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
17
  # - +href+ → (String) The namespace's URI
18
18
  #
19
- # ⚡ This is an experimental feature, available since v1.14.0
20
- #
21
19
  # *Example*
22
20
  #
23
21
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -43,6 +41,7 @@ module Nokogiri
43
41
  # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
42
  # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
43
  #
44
+ # Since v1.14.0
46
45
  #
47
46
  def deconstruct_keys(keys)
48
47
  { prefix: prefix, href: href }
@@ -62,6 +62,14 @@ module Nokogiri
62
62
  end
63
63
 
64
64
  alias_method :to_i, :options
65
+
66
+ def inspect
67
+ options = []
68
+ self.class.constants.each do |k|
69
+ options << k.downcase if send(:"#{k.downcase}?")
70
+ end
71
+ super.sub(/>$/, " " + options.join(", ") + ">")
72
+ end
65
73
  end
66
74
  end
67
75
  end
@@ -1049,29 +1049,35 @@ module Nokogiri
1049
1049
 
1050
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1051
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1052
  error_count = document.errors.length
1068
1053
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1054
+ if document.errors.length > error_count
1055
+ raise document.errors[error_count] unless options.recover?
1056
+
1057
+ if node_set.empty?
1058
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1060
+ # behavior.
1061
+ #
1062
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1063
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1064
+ #
1065
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1066
+ # would have been inherited from the context node won't be handled correctly. This hack
1067
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1068
+ # a way that's not easily prevented (or even detected).
1069
+ #
1070
+ # I think preferable behavior would be to either:
1071
+ #
1072
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1073
+ # +recover+ option
1074
+ # b. don't recover, but raise a sensible exception
1075
+ #
1076
+ # For context and background:
1077
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1078
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1079
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1080
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1081
  end
1076
1082
  end
1077
1083
  node_set
@@ -1165,7 +1171,7 @@ module Nokogiri
1165
1171
  # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
1166
1172
  # nil on XML documents and on unknown tags.
1167
1173
  def description
1168
- return nil if document.xml?
1174
+ return if document.xml?
1169
1175
 
1170
1176
  Nokogiri::HTML4::ElementDescription[name]
1171
1177
  end
@@ -1254,8 +1260,8 @@ module Nokogiri
1254
1260
  # Compare two Node objects with respect to their Document. Nodes from
1255
1261
  # different documents cannot be compared.
1256
1262
  def <=>(other)
1257
- return nil unless other.is_a?(Nokogiri::XML::Node)
1258
- return nil unless document == other.document
1263
+ return unless other.is_a?(Nokogiri::XML::Node)
1264
+ return unless document == other.document
1259
1265
 
1260
1266
  compare(other)
1261
1267
  end
@@ -1269,15 +1275,16 @@ module Nokogiri
1269
1275
  #
1270
1276
  # These two statements are equivalent:
1271
1277
  #
1272
- # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
1278
+ # node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
1273
1279
  #
1274
1280
  # or
1275
1281
  #
1276
- # node.serialize(:encoding => 'UTF-8') do |config|
1282
+ # node.serialize(encoding: 'UTF-8') do |config|
1277
1283
  # config.format.as_xml
1278
1284
  # end
1279
1285
  #
1280
1286
  def serialize(*args, &block)
1287
+ # TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
1281
1288
  options = if args.first.is_a?(Hash)
1282
1289
  args.shift
1283
1290
  else
@@ -1310,7 +1317,7 @@ module Nokogiri
1310
1317
  ###
1311
1318
  # Serialize this Node to XML using +options+
1312
1319
  #
1313
- # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
1320
+ # doc.to_xml(indent: 5, encoding: 'UTF-8')
1314
1321
  #
1315
1322
  # See Node#write_to for a list of +options+
1316
1323
  def to_xml(options = {})
@@ -1321,7 +1328,7 @@ module Nokogiri
1321
1328
  ###
1322
1329
  # Serialize this Node to XHTML using +options+
1323
1330
  #
1324
- # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
1331
+ # doc.to_xhtml(indent: 5, encoding: 'UTF-8')
1325
1332
  #
1326
1333
  # See Node#write_to for a list of +options+
1327
1334
  def to_xhtml(options = {})
@@ -1329,25 +1336,32 @@ module Nokogiri
1329
1336
  end
1330
1337
 
1331
1338
  ###
1332
- # Write Node to +io+ with +options+. +options+ modify the output of
1333
- # this method. Valid options are:
1339
+ # :call-seq:
1340
+ # write_to(io, *options)
1341
+ #
1342
+ # Serialize this node or document to +io+.
1343
+ #
1344
+ # [Parameters]
1345
+ # - +io+ (IO) An IO-like object to which the serialized content will be written.
1346
+ # - +options+ (Hash) See below
1334
1347
  #
1335
- # * +:encoding+ for changing the encoding
1336
- # * +:indent_text+ the indentation text, defaults to one space
1337
- # * +:indent+ the number of +:indent_text+ to use, defaults to 2
1338
- # * +:save_with+ a combination of SaveOptions constants.
1348
+ # [Options]
1349
+ # * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
1350
+ # * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
1351
+ # * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
1352
+ # * +:save_with+ (Integer) a combination of SaveOptions constants
1339
1353
  #
1340
1354
  # To save with UTF-8 indented twice:
1341
1355
  #
1342
- # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
1356
+ # node.write_to(io, encoding: 'UTF-8', indent: 2)
1343
1357
  #
1344
1358
  # To save indented with two dashes:
1345
1359
  #
1346
- # node.write_to(io, :indent_text => '-', :indent => 2)
1360
+ # node.write_to(io, indent_text: '-', indent: 2)
1347
1361
  #
1348
1362
  def write_to(io, *options)
1349
1363
  options = options.first.is_a?(Hash) ? options.shift : {}
1350
- encoding = options[:encoding] || options[0]
1364
+ encoding = options[:encoding] || options[0] || document.encoding
1351
1365
  if Nokogiri.jruby?
1352
1366
  save_options = options[:save_with] || options[1]
1353
1367
  indent_times = options[:indent] || 0
@@ -1365,6 +1379,8 @@ module Nokogiri
1365
1379
  config = SaveOptions.new(save_options.to_i)
1366
1380
  yield config if block_given?
1367
1381
 
1382
+ encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
1383
+
1368
1384
  native_write_to(io, encoding, indentation, config.options)
1369
1385
  end
1370
1386
 
@@ -1420,8 +1436,6 @@ module Nokogiri
1420
1436
  # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1421
1437
  # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1422
1438
  #
1423
- # ⚡ This is an experimental feature, available since v1.14.0
1424
- #
1425
1439
  # *Example*
1426
1440
  #
1427
1441
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -1456,6 +1470,8 @@ module Nokogiri
1456
1470
  # # value = "def"
1457
1471
  # # })]}
1458
1472
  #
1473
+ # Since v1.14.0
1474
+ #
1459
1475
  def deconstruct_keys(keys)
1460
1476
  requested_keys = DECONSTRUCT_KEYS & keys
1461
1477
  {}.tap do |values|
@@ -372,7 +372,7 @@ module Nokogiri
372
372
  # Removes the last element from set and returns it, or +nil+ if
373
373
  # the set is empty
374
374
  def pop
375
- return nil if length == 0
375
+ return if length == 0
376
376
 
377
377
  delete(last)
378
378
  end
@@ -381,7 +381,7 @@ module Nokogiri
381
381
  # Returns the first element of the NodeSet and removes it. Returns
382
382
  # +nil+ if the set is empty.
383
383
  def shift
384
- return nil if length == 0
384
+ return if length == 0
385
385
 
386
386
  delete(first)
387
387
  end
@@ -435,7 +435,7 @@ module Nokogiri
435
435
  #
436
436
  # Returns the members of this NodeSet as an array, to use in pattern matching.
437
437
  #
438
- # This is an experimental feature, available since v1.14.0
438
+ # Since v1.14.0
439
439
  #
440
440
  def deconstruct
441
441
  to_a
@@ -13,9 +13,14 @@ module Nokogiri
13
13
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
14
14
  rescue NoMethodError
15
15
  true
16
- end.map do |attribute|
17
- "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
18
- end.join(" ")
16
+ end
17
+ attributes = if inspect_attributes.length == 1
18
+ send(attributes.first).inspect
19
+ else
20
+ attributes.map do |attribute|
21
+ "#{attribute}=#{send(attribute).inspect}"
22
+ end.join(" ")
23
+ end
19
24
  "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
20
25
  end
21
26
 
@@ -23,6 +28,7 @@ module Nokogiri
23
28
  nice_name = self.class.name.split("::").last
24
29
  pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
25
30
  pp.breakable
31
+
26
32
  attrs = inspect_attributes.filter_map do |t|
27
33
  [t, send(t)] if respond_to?(t)
28
34
  end.find_all do |x|
@@ -35,19 +41,24 @@ module Nokogiri
35
41
  end
36
42
  end
37
43
 
38
- pp.seplist(attrs) do |v|
39
- if COLLECTIONS.include?(v.first)
40
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
41
- pp.breakable
42
- pp.seplist(v.last) do |item|
43
- pp.pp(item)
44
+ if inspect_attributes.length == 1
45
+ pp.pp(attrs.first.last)
46
+ else
47
+ pp.seplist(attrs) do |v|
48
+ if COLLECTIONS.include?(v.first)
49
+ pp.group(2, "#{v.first} = [", "]") do
50
+ pp.breakable
51
+ pp.seplist(v.last) do |item|
52
+ pp.pp(item)
53
+ end
44
54
  end
55
+ else
56
+ pp.text("#{v.first} = ")
57
+ pp.pp(v.last)
45
58
  end
46
- else
47
- pp.text("#{v.first} = ")
48
- pp.pp(v.last)
49
59
  end
50
60
  end
61
+
51
62
  pp.breakable
52
63
  end
53
64
  end