nokogiri 1.15.0 → 1.16.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +17 -14
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +3 -4
  6. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  7. data/ext/nokogiri/nokogiri.h +10 -3
  8. data/ext/nokogiri/test_global_handlers.c +1 -1
  9. data/ext/nokogiri/xml_cdata.c +28 -23
  10. data/ext/nokogiri/xml_document.c +13 -5
  11. data/ext/nokogiri/xml_namespace.c +0 -4
  12. data/ext/nokogiri/xml_node.c +6 -9
  13. data/ext/nokogiri/xml_reader.c +26 -48
  14. data/ext/nokogiri/xml_relax_ng.c +1 -1
  15. data/ext/nokogiri/xml_sax_parser_context.c +4 -0
  16. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  17. data/ext/nokogiri/xml_schema.c +13 -8
  18. data/ext/nokogiri/xml_syntax_error.c +3 -3
  19. data/ext/nokogiri/xml_text.c +24 -19
  20. data/ext/nokogiri/xml_xpath_context.c +2 -5
  21. data/ext/nokogiri/xslt_stylesheet.c +8 -3
  22. data/gumbo-parser/Makefile +18 -0
  23. data/gumbo-parser/src/error.c +1 -1
  24. data/gumbo-parser/src/parser.c +8 -5
  25. data/gumbo-parser/src/tokenizer.c +1 -0
  26. data/lib/nokogiri/css/parser_extras.rb +1 -1
  27. data/lib/nokogiri/css/xpath_visitor.rb +1 -21
  28. data/lib/nokogiri/html4/document.rb +1 -1
  29. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  30. data/lib/nokogiri/html5.rb +0 -66
  31. data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
  32. data/lib/nokogiri/version/constant.rb +1 -1
  33. data/lib/nokogiri/version/info.rb +6 -5
  34. data/lib/nokogiri/xml/attr.rb +2 -2
  35. data/lib/nokogiri/xml/document.rb +4 -5
  36. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  37. data/lib/nokogiri/xml/namespace.rb +1 -2
  38. data/lib/nokogiri/xml/node.rb +31 -24
  39. data/lib/nokogiri/xml/node_set.rb +3 -3
  40. data/lib/nokogiri/xml/reader.rb +10 -9
  41. data/lib/nokogiri/xml/searchable.rb +3 -3
  42. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  43. data/lib/nokogiri/xml.rb +1 -1
  44. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  45. data/lib/nokogiri/xslt.rb +1 -1
  46. data/lib/nokogiri.rb +1 -1
  47. data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
  48. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  49. metadata +9 -9
  50. data/ports/archives/libxml2-2.11.3.tar.xz +0 -0
  51. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -9,33 +9,38 @@ VALUE cNokogiriXmlText ;
9
9
  * Create a new Text element on the +document+ with +content+
10
10
  */
11
11
  static VALUE
12
- new (int argc, VALUE *argv, VALUE klass)
12
+ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
13
13
  {
14
- xmlDocPtr doc;
15
- xmlNodePtr node;
16
- VALUE string;
17
- VALUE document;
18
- VALUE rest;
14
+ xmlDocPtr c_document;
15
+ xmlNodePtr c_node;
16
+ VALUE rb_string;
17
+ VALUE rb_document;
18
+ VALUE rb_rest;
19
19
  VALUE rb_node;
20
20
 
21
- rb_scan_args(argc, argv, "2*", &string, &document, &rest);
21
+ rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
22
22
 
23
- if (rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
24
- doc = noko_xml_document_unwrap(document);
25
- } else {
23
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
24
+ rb_raise(rb_eTypeError,
25
+ "expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
26
+ rb_obj_class(rb_document));
27
+ }
28
+
29
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
26
30
  xmlNodePtr deprecated_node_type_arg;
27
- // TODO: deprecate allowing Node
28
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
29
- Noko_Node_Get_Struct(document, xmlNode, deprecated_node_type_arg);
30
- doc = deprecated_node_type_arg->doc;
31
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
32
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
33
+ c_document = deprecated_node_type_arg->doc;
34
+ } else {
35
+ c_document = noko_xml_document_unwrap(rb_document);
31
36
  }
32
37
 
33
- node = xmlNewText((xmlChar *)StringValueCStr(string));
34
- node->doc = doc;
38
+ c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
39
+ c_node->doc = c_document;
35
40
 
36
- noko_xml_document_pin_node(node);
41
+ noko_xml_document_pin_node(c_node);
37
42
 
38
- rb_node = noko_xml_node_wrap(klass, node) ;
43
+ rb_node = noko_xml_node_wrap(klass, c_node) ;
39
44
  rb_obj_call_init(rb_node, argc, argv);
40
45
 
41
46
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -52,5 +57,5 @@ noko_init_xml_text(void)
52
57
  */
53
58
  cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
54
59
 
55
- rb_define_singleton_method(cNokogiriXmlText, "new", new, -1);
60
+ rb_define_singleton_method(cNokogiriXmlText, "new", rb_xml_text_s_new, -1);
56
61
  }
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
321
321
  VALUE rb_handler = (VALUE)data;
322
322
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
323
  if (c_ns_uri == NULL) {
324
- NOKO_WARN_DEPRECATION(
325
- "A custom XPath or CSS handler function named '%s' is being invoked without a namespace."
326
- " Please update your query to reference this function as 'nokogiri:%s'."
327
- " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
328
- c_name, c_name);
324
+ NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
+ c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
329
326
  }
330
327
  return method_caller;
331
328
  }
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -357,7 +357,7 @@ static void handle_parser_error (
357
357
  print_tag_stack(error, output);
358
358
  return;
359
359
  case GUMBO_TOKEN_END_TAG:
360
- print_message(output, "Eng tag '%s' isn't allowed here.",
360
+ print_message(output, "End tag '%s' isn't allowed here.",
361
361
  gumbo_normalized_tagname(error->input_tag));
362
362
  print_tag_stack(error, output);
363
363
  return;
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
506
506
  for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
507
507
  gumbo_destroy_attribute(tag_state->_attributes.data[i]);
508
508
  }
509
+ gumbo_free(tag_state->_name);
509
510
  gumbo_free(tag_state->_attributes.data);
510
511
  mark_tag_state_as_empty(tag_state);
511
512
  gumbo_string_buffer_destroy(&tag_state->_buffer);
@@ -23,7 +23,7 @@ module Nokogiri
23
23
 
24
24
  # Get the css selector in +string+ from the cache
25
25
  def [](string)
26
- return nil unless cache_on?
26
+ return unless cache_on?
27
27
 
28
28
  @mutex.synchronize { @cache[string] }
29
29
  end
@@ -302,7 +302,7 @@ module Nokogiri
302
302
  end
303
303
 
304
304
  def read_a_and_positive_b(values)
305
- op = values[2]
305
+ op = values[2].strip
306
306
  if op == "+"
307
307
  a = values[0].to_i
308
308
  b = values[3].to_i
@@ -335,25 +335,5 @@ module Nokogiri
335
335
  end
336
336
  end
337
337
  end
338
-
339
- module XPathVisitorAlwaysUseBuiltins # :nodoc:
340
- def self.new
341
- warn(
342
- "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
343
- { uplevel: 1 },
344
- )
345
- XPathVisitor.new(builtins: :always)
346
- end
347
- end
348
-
349
- module XPathVisitorOptimallyUseBuiltins # :nodoc:
350
- def self.new
351
- warn(
352
- "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
353
- { uplevel: 1 },
354
- )
355
- XPathVisitor.new(builtins: :optimal)
356
- end
357
- end
358
338
  end
359
339
  end
@@ -92,7 +92,7 @@ module Nokogiri
92
92
  title = XML::Node.new("title", self) << tnode
93
93
  if (head = at_xpath("//head"))
94
94
  head << title
95
- elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
95
+ elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
96
96
  # better put after charset declaration
97
97
  meta.add_next_sibling(title)
98
98
  else
@@ -94,7 +94,7 @@ module Nokogiri
94
94
  # no support for a call without len
95
95
 
96
96
  unless @firstchunk
97
- (@firstchunk = @io.read(len)) || (return nil)
97
+ (@firstchunk = @io.read(len)) || return
98
98
 
99
99
  # This implementation expects that the first call from
100
100
  # htmlReadIO() is made with a length long enough (~1KB) to
@@ -239,23 +239,6 @@ module Nokogiri
239
239
  DocumentFragment.parse(string, encoding, options)
240
240
  end
241
241
 
242
- # Fetch and parse a HTML document from the web, following redirects,
243
- # handling https, and determining the character encoding using HTML5
244
- # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
245
- # http headers and special options. Everything which is not a
246
- # special option is considered a header. Special options include:
247
- # * :follow_limit => number of redirects which are followed
248
- # * :basic_auth => [username, password]
249
- def get(uri, options = {})
250
- # TODO: deprecate
251
- warn(
252
- "Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
253
- uplevel: 1,
254
- category: :deprecated,
255
- )
256
- get_impl(uri, options)
257
- end
258
-
259
242
  # :nodoc:
260
243
  def read_and_encode(string, encoding)
261
244
  # Read the string with the given encoding.
@@ -283,55 +266,6 @@ module Nokogiri
283
266
 
284
267
  private
285
268
 
286
- def get_impl(uri, options = {})
287
- headers = options.clone
288
- headers = { follow_limit: headers } if Numeric === headers # deprecated
289
- limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
290
-
291
- require "net/http"
292
- uri = URI(uri) unless URI === uri
293
-
294
- http = Net::HTTP.new(uri.host, uri.port)
295
-
296
- # TLS / SSL support
297
- http.use_ssl = true if uri.scheme == "https"
298
-
299
- # Pass through Net::HTTP override values, which currently include:
300
- # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
301
- # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
302
- # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
303
- # :verify_callback, :verify_depth, :verify_mode
304
- options.each do |key, _value|
305
- http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
306
- end
307
-
308
- request = Net::HTTP::Get.new(uri.request_uri)
309
-
310
- # basic authentication
311
- auth = headers.delete(:basic_auth)
312
- auth ||= [uri.user, uri.password] if uri.user && uri.password
313
- request.basic_auth(auth.first, auth.last) if auth
314
-
315
- # remaining options are treated as headers
316
- headers.each { |key, value| request[key.to_s] = value.to_s }
317
-
318
- response = http.request(request)
319
-
320
- case response
321
- when Net::HTTPSuccess
322
- doc = parse(reencode(response.body, response["content-type"]), options)
323
- doc.instance_variable_set(:@response, response)
324
- doc.class.send(:attr_reader, :response)
325
- doc
326
- when Net::HTTPRedirection
327
- response.value if limit <= 1
328
- location = URI.join(uri, response["location"])
329
- get_impl(location, options.merge(follow_limit: limit - 1))
330
- else
331
- response.value
332
- end
333
- end
334
-
335
269
  # Charset sniffing is a complex and controversial topic that understandably isn't done _by
336
270
  # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
337
271
  # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
@@ -6,10 +6,10 @@ rescue LoadError
6
6
  require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
7
7
  require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
8
8
  require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
9
- require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
10
9
  require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
11
10
  require 'xalan/xalan/2.7.3/xalan-2.7.3.jar'
12
11
  require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
12
+ require 'org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar'
13
13
  require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
14
14
  end
15
15
 
@@ -18,10 +18,10 @@ if defined? Jars
18
18
  require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
19
19
  require_jar 'nu.validator', 'jing', '20200702VNU'
20
20
  require_jar 'xerces', 'xercesImpl', '2.12.2'
21
- require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
22
21
  require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
23
22
  require_jar 'xalan', 'xalan', '2.7.3'
24
23
  require_jar 'xml-apis', 'xml-apis', '1.4.01'
24
+ require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko2'
25
25
  require_jar 'isorelax', 'isorelax', '20030108'
26
26
  end
27
27
 
@@ -32,7 +32,7 @@ module Nokogiri
32
32
  "net.sf.saxon:Saxon-HE" => "9.6.0-4",
33
33
  "net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
34
34
  "nu.validator:jing" => "20200702VNU",
35
- "org.nokogiri:nekodtd" => "0.1.11.noko1",
35
+ "org.nokogiri:nekodtd" => "0.1.11.noko2",
36
36
  "xalan:serializer" => "2.7.3",
37
37
  "xalan:xalan" => "2.7.3",
38
38
  "xerces:xercesImpl" => "2.12.2",
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.15.0"
5
+ VERSION = "1.16.5"
6
6
  end
@@ -94,11 +94,14 @@ module Nokogiri
94
94
  nokogiri["version"] = Nokogiri::VERSION
95
95
 
96
96
  unless jruby?
97
- # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ # enable gems to build against Nokogiri with the following in their extconf.rb:
98
98
  #
99
99
  # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
100
100
  # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
101
101
  #
102
+ # though, this won't work on all platform and versions of Ruby, and won't be supported
103
+ # forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
104
+ #
102
105
  cppflags = ["-I#{header_directory.shellescape}"]
103
106
  ldflags = []
104
107
 
@@ -108,7 +111,8 @@ module Nokogiri
108
111
  end
109
112
 
110
113
  if windows?
111
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
114
+ # on windows, third party libraries that wish to link against nokogiri
115
+ # should link against nokogiri.so to resolve symbols. see #2167
112
116
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
117
  unless File.exist?(lib_directory)
114
118
  lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
@@ -136,9 +140,6 @@ module Nokogiri
136
140
  libxml["source"] = "packaged"
137
141
  libxml["precompiled"] = libxml2_precompiled?
138
142
  libxml["patches"] = Nokogiri::LIBXML2_PATCHES
139
-
140
- # this is for nokogumbo and shouldn't be forever
141
- libxml["libxml2_path"] = header_directory
142
143
  else
143
144
  libxml["source"] = "system"
144
145
  end
@@ -18,8 +18,6 @@ module Nokogiri
18
18
  # - +value+ → (String) The value of the attribute.
19
19
  # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
20
  #
21
- # ⚡ This is an experimental feature, available since v1.14.0
22
- #
23
21
  # *Example*
24
22
  #
25
23
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -52,6 +50,8 @@ module Nokogiri
52
50
  # # href = "http://nokogiri.org/ns/noko"
53
51
  # # })}
54
52
  #
53
+ # Since v1.14.0
54
+ #
55
55
  def deconstruct_keys(keys)
56
56
  { name: name, value: value, namespace: namespace }
57
57
  end
@@ -174,8 +174,7 @@ module Nokogiri
174
174
  # Since v1.12.4
175
175
  attr_accessor :namespace_inheritance
176
176
 
177
- # :nodoc:
178
- def initialize(*args) # rubocop:disable Lint/MissingSuper
177
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
179
178
  @errors = []
180
179
  @decorators = nil
181
180
  @namespace_inheritance = false
@@ -330,7 +329,7 @@ module Nokogiri
330
329
  # Validate this Document against it's DTD. Returns a list of errors on
331
330
  # the document or +nil+ when there is no DTD.
332
331
  def validate
333
- return nil unless internal_subset
332
+ return unless internal_subset
334
333
 
335
334
  internal_subset.validate(self)
336
335
  end
@@ -427,8 +426,6 @@ module Nokogiri
427
426
  # instructions. If you have a use case and would like this functionality, please let us know
428
427
  # by opening an issue or a discussion on the github project.
429
428
  #
430
- # ⚡ This is an experimental feature, available since v1.14.0
431
- #
432
429
  # *Example*
433
430
  #
434
431
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -455,6 +452,8 @@ module Nokogiri
455
452
  # doc.deconstruct_keys([:root])
456
453
  # # => {:root=>nil}
457
454
  #
455
+ # Since v1.14.0
456
+ #
458
457
  def deconstruct_keys(keys)
459
458
  { root: root }
460
459
  end
@@ -154,8 +154,6 @@ module Nokogiri
154
154
  # root elements, you should deconstruct the array returned by
155
155
  # <tt>DocumentFragment#elements</tt>.
156
156
  #
157
- # ⚡ This is an experimental feature, available since v1.14.0
158
- #
159
157
  # *Example*
160
158
  #
161
159
  # frag = Nokogiri::HTML5.fragment(<<~HTML)
@@ -187,6 +185,8 @@ module Nokogiri
187
185
  # # }),
188
186
  # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
187
  #
188
+ # Since v1.14.0
189
+ #
190
190
  def deconstruct
191
191
  children.to_a
192
192
  end
@@ -16,8 +16,6 @@ module Nokogiri
16
16
  # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
17
  # - +href+ → (String) The namespace's URI
18
18
  #
19
- # ⚡ This is an experimental feature, available since v1.14.0
20
- #
21
19
  # *Example*
22
20
  #
23
21
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -43,6 +41,7 @@ module Nokogiri
43
41
  # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
42
  # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
43
  #
44
+ # Since v1.14.0
46
45
  #
47
46
  def deconstruct_keys(keys)
48
47
  { prefix: prefix, href: href }
@@ -1049,29 +1049,35 @@ module Nokogiri
1049
1049
 
1050
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1051
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1052
  error_count = document.errors.length
1068
1053
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1054
+ if document.errors.length > error_count
1055
+ raise document.errors[error_count] unless options.recover?
1056
+
1057
+ if node_set.empty?
1058
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1060
+ # behavior.
1061
+ #
1062
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1063
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1064
+ #
1065
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1066
+ # would have been inherited from the context node won't be handled correctly. This hack
1067
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1068
+ # a way that's not easily prevented (or even detected).
1069
+ #
1070
+ # I think preferable behavior would be to either:
1071
+ #
1072
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1073
+ # +recover+ option
1074
+ # b. don't recover, but raise a sensible exception
1075
+ #
1076
+ # For context and background:
1077
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1078
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1079
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1080
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1081
  end
1076
1082
  end
1077
1083
  node_set
@@ -1165,7 +1171,7 @@ module Nokogiri
1165
1171
  # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
1166
1172
  # nil on XML documents and on unknown tags.
1167
1173
  def description
1168
- return nil if document.xml?
1174
+ return if document.xml?
1169
1175
 
1170
1176
  Nokogiri::HTML4::ElementDescription[name]
1171
1177
  end
@@ -1254,8 +1260,8 @@ module Nokogiri
1254
1260
  # Compare two Node objects with respect to their Document. Nodes from
1255
1261
  # different documents cannot be compared.
1256
1262
  def <=>(other)
1257
- return nil unless other.is_a?(Nokogiri::XML::Node)
1258
- return nil unless document == other.document
1263
+ return unless other.is_a?(Nokogiri::XML::Node)
1264
+ return unless document == other.document
1259
1265
 
1260
1266
  compare(other)
1261
1267
  end
@@ -1278,6 +1284,7 @@ module Nokogiri
1278
1284
  # end
1279
1285
  #
1280
1286
  def serialize(*args, &block)
1287
+ # TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
1281
1288
  options = if args.first.is_a?(Hash)
1282
1289
  args.shift
1283
1290
  else
@@ -1429,8 +1436,6 @@ module Nokogiri
1429
1436
  # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1430
1437
  # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1431
1438
  #
1432
- # ⚡ This is an experimental feature, available since v1.14.0
1433
- #
1434
1439
  # *Example*
1435
1440
  #
1436
1441
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -1465,6 +1470,8 @@ module Nokogiri
1465
1470
  # # value = "def"
1466
1471
  # # })]}
1467
1472
  #
1473
+ # Since v1.14.0
1474
+ #
1468
1475
  def deconstruct_keys(keys)
1469
1476
  requested_keys = DECONSTRUCT_KEYS & keys
1470
1477
  {}.tap do |values|
@@ -372,7 +372,7 @@ module Nokogiri
372
372
  # Removes the last element from set and returns it, or +nil+ if
373
373
  # the set is empty
374
374
  def pop
375
- return nil if length == 0
375
+ return if length == 0
376
376
 
377
377
  delete(last)
378
378
  end
@@ -381,7 +381,7 @@ module Nokogiri
381
381
  # Returns the first element of the NodeSet and removes it. Returns
382
382
  # +nil+ if the set is empty.
383
383
  def shift
384
- return nil if length == 0
384
+ return if length == 0
385
385
 
386
386
  delete(first)
387
387
  end
@@ -435,7 +435,7 @@ module Nokogiri
435
435
  #
436
436
  # Returns the members of this NodeSet as an array, to use in pattern matching.
437
437
  #
438
- # This is an experimental feature, available since v1.14.0
438
+ # Since v1.14.0
439
439
  #
440
440
  def deconstruct
441
441
  to_a
@@ -3,9 +3,11 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
+ # Reader is given an XML document, and yields nodes to an each block.
8
+ #
9
+ # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
+ # parser, but do not want to write a Document handler.
9
11
  #
10
12
  # Here is an example of usage:
11
13
  #
@@ -22,13 +24,12 @@ module Nokogiri
22
24
  #
23
25
  # end
24
26
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
27
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
+ # document, you must parse the document again. It may be better to capture all information you
29
+ # need during a single iteration.
29
30
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
31
+ # libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
33
  class Reader
33
34
  include Enumerable
34
35