nokogiri 1.13.6 → 1.14.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +100 -24
  8. data/ext/nokogiri/gumbo.c +21 -11
  9. data/ext/nokogiri/html4_document.c +2 -2
  10. data/ext/nokogiri/html4_element_description.c +1 -1
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +1 -6
  13. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  14. data/ext/nokogiri/nokogiri.c +38 -51
  15. data/ext/nokogiri/nokogiri.h +26 -14
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +3 -3
  18. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  19. data/ext/nokogiri/xml_cdata.c +3 -3
  20. data/ext/nokogiri/xml_comment.c +1 -1
  21. data/ext/nokogiri/xml_document.c +23 -14
  22. data/ext/nokogiri/xml_document_fragment.c +1 -1
  23. data/ext/nokogiri/xml_dtd.c +9 -9
  24. data/ext/nokogiri/xml_element_content.c +3 -3
  25. data/ext/nokogiri/xml_element_decl.c +5 -5
  26. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  27. data/ext/nokogiri/xml_entity_decl.c +6 -6
  28. data/ext/nokogiri/xml_entity_reference.c +1 -1
  29. data/ext/nokogiri/xml_namespace.c +80 -14
  30. data/ext/nokogiri/xml_node.c +363 -82
  31. data/ext/nokogiri/xml_node_set.c +4 -6
  32. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  33. data/ext/nokogiri/xml_reader.c +97 -22
  34. data/ext/nokogiri/xml_relax_ng.c +1 -3
  35. data/ext/nokogiri/xml_sax_parser.c +23 -17
  36. data/ext/nokogiri/xml_sax_parser_context.c +1 -6
  37. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  38. data/ext/nokogiri/xml_schema.c +4 -6
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +2 -2
  41. data/ext/nokogiri/xml_xpath_context.c +91 -84
  42. data/ext/nokogiri/xslt_stylesheet.c +15 -14
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +2 -2
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +2 -2
  48. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  49. data/gumbo-parser/src/parser.c +8 -5
  50. data/gumbo-parser/src/replacement.h +1 -1
  51. data/gumbo-parser/src/string_buffer.h +1 -1
  52. data/gumbo-parser/src/string_piece.c +1 -1
  53. data/gumbo-parser/src/svg_attrs.c +2 -2
  54. data/gumbo-parser/src/svg_tags.c +2 -2
  55. data/gumbo-parser/src/tag.c +2 -1
  56. data/gumbo-parser/src/tag_lookup.c +7 -7
  57. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  58. data/gumbo-parser/src/tag_lookup.h +1 -1
  59. data/gumbo-parser/src/token_buffer.h +1 -1
  60. data/gumbo-parser/src/tokenizer.c +1 -1
  61. data/gumbo-parser/src/tokenizer.h +1 -1
  62. data/gumbo-parser/src/utf8.c +1 -1
  63. data/gumbo-parser/src/utf8.h +1 -1
  64. data/gumbo-parser/src/util.c +1 -3
  65. data/gumbo-parser/src/util.h +4 -0
  66. data/gumbo-parser/src/vector.h +1 -1
  67. data/lib/nokogiri/css/node.rb +2 -2
  68. data/lib/nokogiri/css/xpath_visitor.rb +5 -3
  69. data/lib/nokogiri/css.rb +6 -0
  70. data/lib/nokogiri/decorators/slop.rb +1 -1
  71. data/lib/nokogiri/encoding_handler.rb +57 -0
  72. data/lib/nokogiri/extension.rb +3 -2
  73. data/lib/nokogiri/html4/document.rb +2 -121
  74. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  75. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  76. data/lib/nokogiri/html4.rb +1 -0
  77. data/lib/nokogiri/html5/document.rb +113 -36
  78. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  79. data/lib/nokogiri/html5/node.rb +3 -5
  80. data/lib/nokogiri/html5.rb +127 -216
  81. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  82. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  83. data/lib/nokogiri/version/constant.rb +1 -1
  84. data/lib/nokogiri/version/info.rb +11 -10
  85. data/lib/nokogiri/xml/attr.rb +49 -0
  86. data/lib/nokogiri/xml/builder.rb +1 -1
  87. data/lib/nokogiri/xml/document.rb +103 -55
  88. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  89. data/lib/nokogiri/xml/namespace.rb +42 -0
  90. data/lib/nokogiri/xml/node/save_options.rb +6 -4
  91. data/lib/nokogiri/xml/node.rb +190 -35
  92. data/lib/nokogiri/xml/node_set.rb +88 -9
  93. data/lib/nokogiri/xml/parse_options.rb +129 -50
  94. data/lib/nokogiri/xml/pp/node.rb +6 -4
  95. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  96. data/lib/nokogiri/xml/reader.rb +6 -8
  97. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  98. data/lib/nokogiri/xslt.rb +1 -1
  99. data/lib/nokogiri.rb +3 -11
  100. data/lib/xsd/xmlparser/nokogiri.rb +3 -1
  101. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  102. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  103. metadata +11 -242
  104. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  105. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  106. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  107. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  108. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  109. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -0,0 +1,43 @@
1
+ # this is a generated file, to avoid over-writing it just delete this comment
2
+ begin
3
+ require 'jar_dependencies'
4
+ rescue LoadError
5
+ require 'xalan/xalan/2.7.2/xalan-2.7.2.jar'
6
+ require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
7
+ require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
8
+ require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
9
+ require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
10
+ require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
11
+ require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
12
+ require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
13
+ require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
14
+ end
15
+
16
+ if defined? Jars
17
+ require_jar 'xalan', 'xalan', '2.7.2'
18
+ require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
19
+ require_jar 'nu.validator', 'jing', '20200702VNU'
20
+ require_jar 'xerces', 'xercesImpl', '2.12.2'
21
+ require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
22
+ require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
23
+ require_jar 'xml-apis', 'xml-apis', '1.4.01'
24
+ require_jar 'xalan', 'serializer', '2.7.2'
25
+ require_jar 'isorelax', 'isorelax', '20030108'
26
+ end
27
+
28
+ module Nokogiri
29
+ # generated by the :vendor_jars rake task
30
+ JAR_DEPENDENCIES = {
31
+ "isorelax:isorelax" => "20030108",
32
+ "net.sf.saxon:Saxon-HE" => "9.6.0-4",
33
+ "net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
34
+ "nu.validator:jing" => "20200702VNU",
35
+ "org.nokogiri:nekodtd" => "0.1.11.noko1",
36
+ "xalan:serializer" => "2.7.2",
37
+ "xalan:xalan" => "2.7.2",
38
+ "xerces:xercesImpl" => "2.12.2",
39
+ "xml-apis:xml-apis" => "1.4.01",
40
+ }.freeze
41
+ XERCES_VERSION = JAR_DEPENDENCIES["xerces:xercesImpl"]
42
+ NEKO_VERSION = JAR_DEPENDENCIES["net.sourceforge.htmlunit:neko-htmlunit"]
43
+ end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.13.6"
5
+ VERSION = "1.14.2"
6
6
  end
@@ -105,16 +105,16 @@ module Nokogiri
105
105
  if libxml2_using_packaged?
106
106
  cppflags << "-I#{File.join(header_directory, "include").shellescape}"
107
107
  cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
108
+ end
108
109
 
109
- if windows?
110
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
111
- lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
112
- unless File.exist?(lib_directory)
113
- lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
114
- end
115
- ldflags << "-L#{lib_directory.shellescape}"
116
- ldflags << "-l:nokogiri.so"
110
+ if windows?
111
+ # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
112
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
+ unless File.exist?(lib_directory)
114
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
117
115
  end
116
+ ldflags << "-L#{lib_directory.shellescape}"
117
+ ldflags << "-l:nokogiri.so"
118
118
  end
119
119
 
120
120
  nokogiri["cppflags"] = cppflags
@@ -169,8 +169,9 @@ module Nokogiri
169
169
  vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
170
170
  elsif jruby?
171
171
  vi["other_libraries"] = {}.tap do |ol|
172
- ol["xerces"] = Nokogiri::XERCES_VERSION
173
- ol["nekohtml"] = Nokogiri::NEKO_VERSION
172
+ Nokogiri::JAR_DEPENDENCIES.each do |k, v|
173
+ ol[k] = v
174
+ end
174
175
  end
175
176
  end
176
177
  end
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -7,6 +8,54 @@ module Nokogiri
7
8
  alias_method :to_s, :content
8
9
  alias_method :content=, :value=
9
10
 
11
+ #
12
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
13
+ #
14
+ # Returns a hash describing the Attr, to use in pattern matching.
15
+ #
16
+ # Valid keys and their values:
17
+ # - +name+ → (String) The name of the attribute.
18
+ # - +value+ → (String) The value of the attribute.
19
+ # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
+ #
21
+ # ⚡ This is an experimental feature, available since v1.14.0
22
+ #
23
+ # *Example*
24
+ #
25
+ # doc = Nokogiri::XML.parse(<<~XML)
26
+ # <?xml version="1.0"?>
27
+ # <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
28
+ # <child1 foo="abc" noko:bar="def"/>
29
+ # </root>
30
+ # XML
31
+ #
32
+ # attributes = doc.root.elements.first.attribute_nodes
33
+ # # => [#(Attr:0x35c { name = "foo", value = "abc" }),
34
+ # # #(Attr:0x370 {
35
+ # # name = "bar",
36
+ # # namespace = #(Namespace:0x384 {
37
+ # # prefix = "noko",
38
+ # # href = "http://nokogiri.org/ns/noko"
39
+ # # }),
40
+ # # value = "def"
41
+ # # })]
42
+ #
43
+ # attributes.first.deconstruct_keys([:name, :value, :namespace])
44
+ # # => {:name=>"foo", :value=>"abc", :namespace=>nil}
45
+ #
46
+ # attributes.last.deconstruct_keys([:name, :value, :namespace])
47
+ # # => {:name=>"bar",
48
+ # # :value=>"def",
49
+ # # :namespace=>
50
+ # # #(Namespace:0x384 {
51
+ # # prefix = "noko",
52
+ # # href = "http://nokogiri.org/ns/noko"
53
+ # # })}
54
+ #
55
+ def deconstruct_keys(keys)
56
+ { name: name, value: value, namespace: namespace }
57
+ end
58
+
10
59
  private
11
60
 
12
61
  def inspect_attributes
@@ -234,7 +234,7 @@ module Nokogiri
234
234
  #
235
235
  # == Document Types
236
236
  #
237
- # To create a document type (DTD), access use the Builder#doc method to get
237
+ # To create a document type (DTD), use the Builder#doc method to get
238
238
  # the current context document. Then call Node#create_internal_subset to
239
239
  # create the DTD node.
240
240
  #
@@ -19,63 +19,72 @@ module Nokogiri
19
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
21
 
22
- ##
23
- # Parse an XML file.
24
- #
25
- # +string_or_io+ may be a String, or any object that responds to
26
- # _read_ and _close_ such as an IO, or StringIO.
27
- #
28
- # +url+ (optional) is the URI where this document is located.
29
- #
30
- # +encoding+ (optional) is the encoding that should be used when processing
31
- # the document.
32
- #
33
- # +options+ (optional) is a configuration object that sets options during
34
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
- # Nokogiri::XML::ParseOptions for more information.
36
- #
37
- # +block+ (optional) is passed a configuration object on which
38
- # parse options may be set.
39
- #
40
- # By default, Nokogiri treats documents as untrusted, and so
41
- # does not attempt to load DTDs or access the network. See
42
- # Nokogiri::XML::ParseOptions for a complete list of options;
43
- # and that module's DEFAULT_XML constant for what's set (and not
44
- # set) by default.
45
- #
46
- # Nokogiri.XML() is a convenience method which will call this method.
47
- #
48
- def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
- yield options if block_given?
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
51
61
 
52
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
53
68
 
54
- if empty_doc?(string_or_io)
55
- if options.strict?
56
- raise Nokogiri::XML::SyntaxError, "Empty document"
69
+ read_io(string_or_io, url, encoding, options.to_i)
57
70
  else
58
- return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
59
73
  end
60
- end
61
74
 
62
- doc = if string_or_io.respond_to?(:read)
63
- if string_or_io.is_a?(Pathname)
64
- # resolve the Pathname to the file and open it as an IO object, see #2110
65
- string_or_io = string_or_io.expand_path.open
66
- url ||= string_or_io.path
67
- end
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
68
77
 
69
- read_io(string_or_io, url, encoding, options.to_i)
70
- else
71
- # read_memory pukes on empty docs
72
- read_memory(string_or_io, url, encoding, options.to_i)
78
+ doc
73
79
  end
74
80
 
75
- # do xinclude processing
76
- doc.do_xinclude(options) if options.xinclude?
81
+ private
77
82
 
78
- doc
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
79
88
  end
80
89
 
81
90
  ##
@@ -165,7 +174,8 @@ module Nokogiri
165
174
  # Since v1.12.4
166
175
  attr_accessor :namespace_inheritance
167
176
 
168
- def initialize(*args) # :nodoc:
177
+ # :nodoc:
178
+ def initialize(*args) # rubocop:disable Lint/MissingSuper
169
179
  @errors = []
170
180
  @decorators = nil
171
181
  @namespace_inheritance = false
@@ -405,14 +415,52 @@ module Nokogiri
405
415
  Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
406
416
  end
407
417
 
408
- private
409
-
410
- def self.empty_doc?(string_or_io)
411
- string_or_io.nil? ||
412
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
413
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
414
460
  end
415
461
 
462
+ private
463
+
416
464
  IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
417
465
 
418
466
  def inspect_attributes
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -66,9 +67,7 @@ module Nokogiri
66
67
  def to_html(*args)
67
68
  if Nokogiri.jruby?
68
69
  options = args.first.is_a?(Hash) ? args.shift : {}
69
- unless options[:save_with]
70
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
71
- end
70
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
72
71
  args.insert(0, options)
73
72
  end
74
73
  children.to_html(*args)
@@ -80,9 +79,7 @@ module Nokogiri
80
79
  def to_xhtml(*args)
81
80
  if Nokogiri.jruby?
82
81
  options = args.first.is_a?(Hash) ? args.shift : {}
83
- unless options[:save_with]
84
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
85
- end
82
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
86
83
  args.insert(0, options)
87
84
  end
88
85
  children.to_xhtml(*args)
@@ -148,6 +145,52 @@ module Nokogiri
148
145
  document.fragment(data)
149
146
  end
150
147
 
148
+ #
149
+ # :call-seq: deconstruct() → Array
150
+ #
151
+ # Returns the root nodes of this document fragment as an array, to use in pattern matching.
152
+ #
153
+ # 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
154
+ # root elements, you should deconstruct the array returned by
155
+ # <tt>DocumentFragment#elements</tt>.
156
+ #
157
+ # ⚡ This is an experimental feature, available since v1.14.0
158
+ #
159
+ # *Example*
160
+ #
161
+ # frag = Nokogiri::HTML5.fragment(<<~HTML)
162
+ # <div>Start</div>
163
+ # This is a <a href="#jump">shortcut</a> for you.
164
+ # <div>End</div>
165
+ # HTML
166
+ #
167
+ # frag.deconstruct
168
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
169
+ # # #(Text "\n" + "This is a "),
170
+ # # #(Element:0x370 {
171
+ # # name = "a",
172
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
173
+ # # children = [ #(Text "shortcut")]
174
+ # # }),
175
+ # # #(Text " for you.\n"),
176
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
177
+ # # #(Text "\n")]
178
+ #
179
+ # *Example* only the elements, not the text nodes.
180
+ #
181
+ # frag.elements.deconstruct
182
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
183
+ # # #(Element:0x370 {
184
+ # # name = "a",
185
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
186
+ # # children = [ #(Text "shortcut")]
187
+ # # }),
188
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
+ #
190
+ def deconstruct
191
+ children.to_a
192
+ end
193
+
151
194
  private
152
195
 
153
196
  # fix for issue 770
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -6,6 +7,47 @@ module Nokogiri
6
7
  include Nokogiri::XML::PP::Node
7
8
  attr_reader :document
8
9
 
10
+ #
11
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
12
+ #
13
+ # Returns a hash describing the Namespace, to use in pattern matching.
14
+ #
15
+ # Valid keys and their values:
16
+ # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
+ # - +href+ → (String) The namespace's URI
18
+ #
19
+ # ⚡ This is an experimental feature, available since v1.14.0
20
+ #
21
+ # *Example*
22
+ #
23
+ # doc = Nokogiri::XML.parse(<<~XML)
24
+ # <?xml version="1.0"?>
25
+ # <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
26
+ # <child1 foo="abc" noko:bar="def"/>
27
+ # <noko:child2 foo="qwe" noko:bar="rty"/>
28
+ # </root>
29
+ # XML
30
+ #
31
+ # doc.root.elements.first.namespace
32
+ # # => #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })
33
+ #
34
+ # doc.root.elements.first.namespace.deconstruct_keys([:prefix, :href])
35
+ # # => {:prefix=>nil, :href=>"http://nokogiri.org/ns/default"}
36
+ #
37
+ # doc.root.elements.last.namespace
38
+ # # => #(Namespace:0x370 {
39
+ # # prefix = "noko",
40
+ # # href = "http://nokogiri.org/ns/noko"
41
+ # # })
42
+ #
43
+ # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
+ # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
+ #
46
+ #
47
+ def deconstruct_keys(keys)
48
+ { prefix: prefix, href: href }
49
+ end
50
+
9
51
  private
10
52
 
11
53
  def inspect_attributes
@@ -29,14 +29,16 @@ module Nokogiri
29
29
  DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
30
30
  # the default for HTML document
31
31
  DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
32
+ # the default for XHTML document
33
+ DEFAULT_XHTML = NO_DECLARATION | AS_XHTML
32
34
  else
33
35
  # the default for XML documents
34
36
  DEFAULT_XML = FORMAT | AS_XML
35
37
  # the default for HTML document
36
38
  DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
39
+ # the default for XHTML document
40
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
37
41
  end
38
- # the default for XHTML document
39
- DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
40
42
 
41
43
  # Integer representation of the SaveOptions
42
44
  attr_reader :options
@@ -47,7 +49,7 @@ module Nokogiri
47
49
  end
48
50
 
49
51
  constants.each do |constant|
50
- class_eval %{
52
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
51
53
  def #{constant.downcase}
52
54
  @options |= #{constant}
53
55
  self
@@ -56,7 +58,7 @@ module Nokogiri
56
58
  def #{constant.downcase}?
57
59
  #{constant} & @options == #{constant}
58
60
  end
59
- }
61
+ RUBY
60
62
  end
61
63
 
62
64
  alias_method :to_i, :options