nokogiri 1.15.2 → 1.16.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +17 -14
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +3 -4
  6. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  7. data/ext/nokogiri/nokogiri.h +10 -3
  8. data/ext/nokogiri/test_global_handlers.c +1 -1
  9. data/ext/nokogiri/xml_cdata.c +28 -23
  10. data/ext/nokogiri/xml_document.c +13 -5
  11. data/ext/nokogiri/xml_namespace.c +0 -4
  12. data/ext/nokogiri/xml_node.c +6 -9
  13. data/ext/nokogiri/xml_reader.c +25 -48
  14. data/ext/nokogiri/xml_relax_ng.c +1 -1
  15. data/ext/nokogiri/xml_sax_parser_context.c +4 -0
  16. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  17. data/ext/nokogiri/xml_schema.c +13 -8
  18. data/ext/nokogiri/xml_syntax_error.c +3 -3
  19. data/ext/nokogiri/xml_text.c +24 -19
  20. data/ext/nokogiri/xml_xpath_context.c +2 -5
  21. data/ext/nokogiri/xslt_stylesheet.c +8 -3
  22. data/gumbo-parser/Makefile +18 -0
  23. data/gumbo-parser/src/error.c +1 -1
  24. data/gumbo-parser/src/parser.c +8 -5
  25. data/gumbo-parser/src/tokenizer.c +1 -0
  26. data/lib/nokogiri/css/parser_extras.rb +1 -1
  27. data/lib/nokogiri/css/xpath_visitor.rb +1 -21
  28. data/lib/nokogiri/html4/document.rb +1 -1
  29. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  30. data/lib/nokogiri/html5.rb +0 -66
  31. data/lib/nokogiri/version/constant.rb +1 -1
  32. data/lib/nokogiri/version/info.rb +6 -5
  33. data/lib/nokogiri/xml/attr.rb +2 -2
  34. data/lib/nokogiri/xml/document.rb +4 -5
  35. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  36. data/lib/nokogiri/xml/namespace.rb +1 -2
  37. data/lib/nokogiri/xml/node.rb +31 -24
  38. data/lib/nokogiri/xml/node_set.rb +3 -3
  39. data/lib/nokogiri/xml/reader.rb +10 -9
  40. data/lib/nokogiri/xml/searchable.rb +3 -3
  41. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  42. data/lib/nokogiri/xml.rb +1 -1
  43. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  44. data/lib/nokogiri/xslt.rb +1 -1
  45. data/lib/nokogiri.rb +1 -1
  46. data/ports/archives/libxml2-2.12.5.tar.xz +0 -0
  47. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  48. metadata +6 -6
  49. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  50. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -9,33 +9,38 @@ VALUE cNokogiriXmlText ;
9
9
  * Create a new Text element on the +document+ with +content+
10
10
  */
11
11
  static VALUE
12
- new (int argc, VALUE *argv, VALUE klass)
12
+ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
13
13
  {
14
- xmlDocPtr doc;
15
- xmlNodePtr node;
16
- VALUE string;
17
- VALUE document;
18
- VALUE rest;
14
+ xmlDocPtr c_document;
15
+ xmlNodePtr c_node;
16
+ VALUE rb_string;
17
+ VALUE rb_document;
18
+ VALUE rb_rest;
19
19
  VALUE rb_node;
20
20
 
21
- rb_scan_args(argc, argv, "2*", &string, &document, &rest);
21
+ rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
22
22
 
23
- if (rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
24
- doc = noko_xml_document_unwrap(document);
25
- } else {
23
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
24
+ rb_raise(rb_eTypeError,
25
+ "expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
26
+ rb_obj_class(rb_document));
27
+ }
28
+
29
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
26
30
  xmlNodePtr deprecated_node_type_arg;
27
- // TODO: deprecate allowing Node
28
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
29
- Noko_Node_Get_Struct(document, xmlNode, deprecated_node_type_arg);
30
- doc = deprecated_node_type_arg->doc;
31
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
32
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
33
+ c_document = deprecated_node_type_arg->doc;
34
+ } else {
35
+ c_document = noko_xml_document_unwrap(rb_document);
31
36
  }
32
37
 
33
- node = xmlNewText((xmlChar *)StringValueCStr(string));
34
- node->doc = doc;
38
+ c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
39
+ c_node->doc = c_document;
35
40
 
36
- noko_xml_document_pin_node(node);
41
+ noko_xml_document_pin_node(c_node);
37
42
 
38
- rb_node = noko_xml_node_wrap(klass, node) ;
43
+ rb_node = noko_xml_node_wrap(klass, c_node) ;
39
44
  rb_obj_call_init(rb_node, argc, argv);
40
45
 
41
46
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -52,5 +57,5 @@ noko_init_xml_text(void)
52
57
  */
53
58
  cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
54
59
 
55
- rb_define_singleton_method(cNokogiriXmlText, "new", new, -1);
60
+ rb_define_singleton_method(cNokogiriXmlText, "new", rb_xml_text_s_new, -1);
56
61
  }
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
321
321
  VALUE rb_handler = (VALUE)data;
322
322
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
323
  if (c_ns_uri == NULL) {
324
- NOKO_WARN_DEPRECATION(
325
- "A custom XPath or CSS handler function named '%s' is being invoked without a namespace."
326
- " Please update your query to reference this function as 'nokogiri:%s'."
327
- " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
328
- c_name, c_name);
324
+ NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
+ c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
329
326
  }
330
327
  return method_caller;
331
328
  }
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -357,7 +357,7 @@ static void handle_parser_error (
357
357
  print_tag_stack(error, output);
358
358
  return;
359
359
  case GUMBO_TOKEN_END_TAG:
360
- print_message(output, "Eng tag '%s' isn't allowed here.",
360
+ print_message(output, "End tag '%s' isn't allowed here.",
361
361
  gumbo_normalized_tagname(error->input_tag));
362
362
  print_tag_stack(error, output);
363
363
  return;
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
506
506
  for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
507
507
  gumbo_destroy_attribute(tag_state->_attributes.data[i]);
508
508
  }
509
+ gumbo_free(tag_state->_name);
509
510
  gumbo_free(tag_state->_attributes.data);
510
511
  mark_tag_state_as_empty(tag_state);
511
512
  gumbo_string_buffer_destroy(&tag_state->_buffer);
@@ -23,7 +23,7 @@ module Nokogiri
23
23
 
24
24
  # Get the css selector in +string+ from the cache
25
25
  def [](string)
26
- return nil unless cache_on?
26
+ return unless cache_on?
27
27
 
28
28
  @mutex.synchronize { @cache[string] }
29
29
  end
@@ -302,7 +302,7 @@ module Nokogiri
302
302
  end
303
303
 
304
304
  def read_a_and_positive_b(values)
305
- op = values[2]
305
+ op = values[2].strip
306
306
  if op == "+"
307
307
  a = values[0].to_i
308
308
  b = values[3].to_i
@@ -335,25 +335,5 @@ module Nokogiri
335
335
  end
336
336
  end
337
337
  end
338
-
339
- module XPathVisitorAlwaysUseBuiltins # :nodoc:
340
- def self.new
341
- warn(
342
- "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
343
- { uplevel: 1 },
344
- )
345
- XPathVisitor.new(builtins: :always)
346
- end
347
- end
348
-
349
- module XPathVisitorOptimallyUseBuiltins # :nodoc:
350
- def self.new
351
- warn(
352
- "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
353
- { uplevel: 1 },
354
- )
355
- XPathVisitor.new(builtins: :optimal)
356
- end
357
- end
358
338
  end
359
339
  end
@@ -92,7 +92,7 @@ module Nokogiri
92
92
  title = XML::Node.new("title", self) << tnode
93
93
  if (head = at_xpath("//head"))
94
94
  head << title
95
- elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
95
+ elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
96
96
  # better put after charset declaration
97
97
  meta.add_next_sibling(title)
98
98
  else
@@ -94,7 +94,7 @@ module Nokogiri
94
94
  # no support for a call without len
95
95
 
96
96
  unless @firstchunk
97
- (@firstchunk = @io.read(len)) || (return nil)
97
+ (@firstchunk = @io.read(len)) || return
98
98
 
99
99
  # This implementation expects that the first call from
100
100
  # htmlReadIO() is made with a length long enough (~1KB) to
@@ -239,23 +239,6 @@ module Nokogiri
239
239
  DocumentFragment.parse(string, encoding, options)
240
240
  end
241
241
 
242
- # Fetch and parse a HTML document from the web, following redirects,
243
- # handling https, and determining the character encoding using HTML5
244
- # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
245
- # http headers and special options. Everything which is not a
246
- # special option is considered a header. Special options include:
247
- # * :follow_limit => number of redirects which are followed
248
- # * :basic_auth => [username, password]
249
- def get(uri, options = {})
250
- # TODO: deprecate
251
- warn(
252
- "Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
253
- uplevel: 1,
254
- category: :deprecated,
255
- )
256
- get_impl(uri, options)
257
- end
258
-
259
242
  # :nodoc:
260
243
  def read_and_encode(string, encoding)
261
244
  # Read the string with the given encoding.
@@ -283,55 +266,6 @@ module Nokogiri
283
266
 
284
267
  private
285
268
 
286
- def get_impl(uri, options = {})
287
- headers = options.clone
288
- headers = { follow_limit: headers } if Numeric === headers # deprecated
289
- limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
290
-
291
- require "net/http"
292
- uri = URI(uri) unless URI === uri
293
-
294
- http = Net::HTTP.new(uri.host, uri.port)
295
-
296
- # TLS / SSL support
297
- http.use_ssl = true if uri.scheme == "https"
298
-
299
- # Pass through Net::HTTP override values, which currently include:
300
- # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
301
- # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
302
- # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
303
- # :verify_callback, :verify_depth, :verify_mode
304
- options.each do |key, _value|
305
- http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
306
- end
307
-
308
- request = Net::HTTP::Get.new(uri.request_uri)
309
-
310
- # basic authentication
311
- auth = headers.delete(:basic_auth)
312
- auth ||= [uri.user, uri.password] if uri.user && uri.password
313
- request.basic_auth(auth.first, auth.last) if auth
314
-
315
- # remaining options are treated as headers
316
- headers.each { |key, value| request[key.to_s] = value.to_s }
317
-
318
- response = http.request(request)
319
-
320
- case response
321
- when Net::HTTPSuccess
322
- doc = parse(reencode(response.body, response["content-type"]), options)
323
- doc.instance_variable_set(:@response, response)
324
- doc.class.send(:attr_reader, :response)
325
- doc
326
- when Net::HTTPRedirection
327
- response.value if limit <= 1
328
- location = URI.join(uri, response["location"])
329
- get_impl(location, options.merge(follow_limit: limit - 1))
330
- else
331
- response.value
332
- end
333
- end
334
-
335
269
  # Charset sniffing is a complex and controversial topic that understandably isn't done _by
336
270
  # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
337
271
  # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.15.2"
5
+ VERSION = "1.16.2"
6
6
  end
@@ -94,11 +94,14 @@ module Nokogiri
94
94
  nokogiri["version"] = Nokogiri::VERSION
95
95
 
96
96
  unless jruby?
97
- # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ # enable gems to build against Nokogiri with the following in their extconf.rb:
98
98
  #
99
99
  # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
100
100
  # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
101
101
  #
102
+ # though, this won't work on all platform and versions of Ruby, and won't be supported
103
+ # forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
104
+ #
102
105
  cppflags = ["-I#{header_directory.shellescape}"]
103
106
  ldflags = []
104
107
 
@@ -108,7 +111,8 @@ module Nokogiri
108
111
  end
109
112
 
110
113
  if windows?
111
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
114
+ # on windows, third party libraries that wish to link against nokogiri
115
+ # should link against nokogiri.so to resolve symbols. see #2167
112
116
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
117
  unless File.exist?(lib_directory)
114
118
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
@@ -136,9 +140,6 @@ module Nokogiri
136
140
  libxml["source"] = "packaged"
137
141
  libxml["precompiled"] = libxml2_precompiled?
138
142
  libxml["patches"] = Nokogiri::LIBXML2_PATCHES
139
-
140
- # this is for nokogumbo and shouldn't be forever
141
- libxml["libxml2_path"] = header_directory
142
143
  else
143
144
  libxml["source"] = "system"
144
145
  end
@@ -18,8 +18,6 @@ module Nokogiri
18
18
  # - +value+ → (String) The value of the attribute.
19
19
  # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
20
  #
21
- # ⚡ This is an experimental feature, available since v1.14.0
22
- #
23
21
  # *Example*
24
22
  #
25
23
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -52,6 +50,8 @@ module Nokogiri
52
50
  # # href = "http://nokogiri.org/ns/noko"
53
51
  # # })}
54
52
  #
53
+ # Since v1.14.0
54
+ #
55
55
  def deconstruct_keys(keys)
56
56
  { name: name, value: value, namespace: namespace }
57
57
  end
@@ -174,8 +174,7 @@ module Nokogiri
174
174
  # Since v1.12.4
175
175
  attr_accessor :namespace_inheritance
176
176
 
177
- # :nodoc:
178
- def initialize(*args) # rubocop:disable Lint/MissingSuper
177
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
179
178
  @errors = []
180
179
  @decorators = nil
181
180
  @namespace_inheritance = false
@@ -330,7 +329,7 @@ module Nokogiri
330
329
  # Validate this Document against it's DTD. Returns a list of errors on
331
330
  # the document or +nil+ when there is no DTD.
332
331
  def validate
333
- return nil unless internal_subset
332
+ return unless internal_subset
334
333
 
335
334
  internal_subset.validate(self)
336
335
  end
@@ -427,8 +426,6 @@ module Nokogiri
427
426
  # instructions. If you have a use case and would like this functionality, please let us know
428
427
  # by opening an issue or a discussion on the github project.
429
428
  #
430
- # ⚡ This is an experimental feature, available since v1.14.0
431
- #
432
429
  # *Example*
433
430
  #
434
431
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -455,6 +452,8 @@ module Nokogiri
455
452
  # doc.deconstruct_keys([:root])
456
453
  # # => {:root=>nil}
457
454
  #
455
+ # Since v1.14.0
456
+ #
458
457
  def deconstruct_keys(keys)
459
458
  { root: root }
460
459
  end
@@ -154,8 +154,6 @@ module Nokogiri
154
154
  # root elements, you should deconstruct the array returned by
155
155
  # <tt>DocumentFragment#elements</tt>.
156
156
  #
157
- # ⚡ This is an experimental feature, available since v1.14.0
158
- #
159
157
  # *Example*
160
158
  #
161
159
  # frag = Nokogiri::HTML5.fragment(<<~HTML)
@@ -187,6 +185,8 @@ module Nokogiri
187
185
  # # }),
188
186
  # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
187
  #
188
+ # Since v1.14.0
189
+ #
190
190
  def deconstruct
191
191
  children.to_a
192
192
  end
@@ -16,8 +16,6 @@ module Nokogiri
16
16
  # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
17
  # - +href+ → (String) The namespace's URI
18
18
  #
19
- # ⚡ This is an experimental feature, available since v1.14.0
20
- #
21
19
  # *Example*
22
20
  #
23
21
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -43,6 +41,7 @@ module Nokogiri
43
41
  # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
42
  # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
43
  #
44
+ # Since v1.14.0
46
45
  #
47
46
  def deconstruct_keys(keys)
48
47
  { prefix: prefix, href: href }
@@ -1049,29 +1049,35 @@ module Nokogiri
1049
1049
 
1050
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1051
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1052
  error_count = document.errors.length
1068
1053
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1054
+ if document.errors.length > error_count
1055
+ raise document.errors[error_count] unless options.recover?
1056
+
1057
+ if node_set.empty?
1058
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1060
+ # behavior.
1061
+ #
1062
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1063
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1064
+ #
1065
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1066
+ # would have been inherited from the context node won't be handled correctly. This hack
1067
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1068
+ # a way that's not easily prevented (or even detected).
1069
+ #
1070
+ # I think preferable behavior would be to either:
1071
+ #
1072
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1073
+ # +recover+ option
1074
+ # b. don't recover, but raise a sensible exception
1075
+ #
1076
+ # For context and background:
1077
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1078
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1079
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1080
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1081
  end
1076
1082
  end
1077
1083
  node_set
@@ -1165,7 +1171,7 @@ module Nokogiri
1165
1171
  # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
1166
1172
  # nil on XML documents and on unknown tags.
1167
1173
  def description
1168
- return nil if document.xml?
1174
+ return if document.xml?
1169
1175
 
1170
1176
  Nokogiri::HTML4::ElementDescription[name]
1171
1177
  end
@@ -1254,8 +1260,8 @@ module Nokogiri
1254
1260
  # Compare two Node objects with respect to their Document. Nodes from
1255
1261
  # different documents cannot be compared.
1256
1262
  def <=>(other)
1257
- return nil unless other.is_a?(Nokogiri::XML::Node)
1258
- return nil unless document == other.document
1263
+ return unless other.is_a?(Nokogiri::XML::Node)
1264
+ return unless document == other.document
1259
1265
 
1260
1266
  compare(other)
1261
1267
  end
@@ -1278,6 +1284,7 @@ module Nokogiri
1278
1284
  # end
1279
1285
  #
1280
1286
  def serialize(*args, &block)
1287
+ # TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
1281
1288
  options = if args.first.is_a?(Hash)
1282
1289
  args.shift
1283
1290
  else
@@ -1429,8 +1436,6 @@ module Nokogiri
1429
1436
  # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1430
1437
  # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1431
1438
  #
1432
- # ⚡ This is an experimental feature, available since v1.14.0
1433
- #
1434
1439
  # *Example*
1435
1440
  #
1436
1441
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -1465,6 +1470,8 @@ module Nokogiri
1465
1470
  # # value = "def"
1466
1471
  # # })]}
1467
1472
  #
1473
+ # Since v1.14.0
1474
+ #
1468
1475
  def deconstruct_keys(keys)
1469
1476
  requested_keys = DECONSTRUCT_KEYS & keys
1470
1477
  {}.tap do |values|
@@ -372,7 +372,7 @@ module Nokogiri
372
372
  # Removes the last element from set and returns it, or +nil+ if
373
373
  # the set is empty
374
374
  def pop
375
- return nil if length == 0
375
+ return if length == 0
376
376
 
377
377
  delete(last)
378
378
  end
@@ -381,7 +381,7 @@ module Nokogiri
381
381
  # Returns the first element of the NodeSet and removes it. Returns
382
382
  # +nil+ if the set is empty.
383
383
  def shift
384
- return nil if length == 0
384
+ return if length == 0
385
385
 
386
386
  delete(first)
387
387
  end
@@ -435,7 +435,7 @@ module Nokogiri
435
435
  #
436
436
  # Returns the members of this NodeSet as an array, to use in pattern matching.
437
437
  #
438
- # This is an experimental feature, available since v1.14.0
438
+ # Since v1.14.0
439
439
  #
440
440
  def deconstruct
441
441
  to_a
@@ -3,9 +3,11 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
+ # Reader is given an XML document, and yields nodes to an each block.
8
+ #
9
+ # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
+ # parser, but do not want to write a Document handler.
9
11
  #
10
12
  # Here is an example of usage:
11
13
  #
@@ -22,13 +24,12 @@ module Nokogiri
22
24
  #
23
25
  # end
24
26
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
27
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
+ # document, you must parse the document again. It may be better to capture all information you
29
+ # need during a single iteration.
29
30
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
31
+ # libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
33
  class Reader
33
34
  include Enumerable
34
35
 
@@ -199,7 +199,7 @@ module Nokogiri
199
199
  #
200
200
  # Search this node's immediate children using CSS selector +selector+
201
201
  def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
202
- ns = (document.root&.namespaces || {})
202
+ ns = document.root&.namespaces || {}
203
203
  xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
204
204
  end
205
205
 
@@ -229,7 +229,7 @@ module Nokogiri
229
229
  def xpath_impl(node, path, handler, ns, binds)
230
230
  ctx = XPathContext.new(node)
231
231
  ctx.register_namespaces(ns)
232
- path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
232
+ path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
233
233
 
234
234
  binds&.each do |key, value|
235
235
  ctx.register_variable(key.to_s, value)
@@ -269,7 +269,7 @@ module Nokogiri
269
269
  end
270
270
  ns, binds = hashes.reverse
271
271
 
272
- ns ||= (document.root&.namespaces || {})
272
+ ns ||= document.root&.namespaces || {}
273
273
 
274
274
  [params, handler, ns, binds]
275
275
  end