nokogiri 1.16.8 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +18 -33
  15. data/ext/nokogiri/xml_attr.c +1 -1
  16. data/ext/nokogiri/xml_cdata.c +2 -10
  17. data/ext/nokogiri/xml_comment.c +3 -8
  18. data/ext/nokogiri/xml_document.c +163 -156
  19. data/ext/nokogiri/xml_document_fragment.c +10 -25
  20. data/ext/nokogiri/xml_dtd.c +1 -1
  21. data/ext/nokogiri/xml_element_content.c +9 -9
  22. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  23. data/ext/nokogiri/xml_namespace.c +6 -6
  24. data/ext/nokogiri/xml_node.c +134 -103
  25. data/ext/nokogiri/xml_node_set.c +46 -44
  26. data/ext/nokogiri/xml_reader.c +54 -58
  27. data/ext/nokogiri/xml_relax_ng.c +35 -56
  28. data/ext/nokogiri/xml_sax_parser.c +156 -88
  29. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  30. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  31. data/ext/nokogiri/xml_schema.c +50 -85
  32. data/ext/nokogiri/xml_syntax_error.c +19 -11
  33. data/ext/nokogiri/xml_text.c +2 -4
  34. data/ext/nokogiri/xml_xpath_context.c +2 -2
  35. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  36. data/gumbo-parser/src/error.c +76 -48
  37. data/gumbo-parser/src/error.h +5 -1
  38. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  39. data/gumbo-parser/src/parser.c +61 -23
  40. data/gumbo-parser/src/tokenizer.c +6 -6
  41. data/lib/nokogiri/class_resolver.rb +1 -1
  42. data/lib/nokogiri/css/node.rb +6 -2
  43. data/lib/nokogiri/css/parser.rb +6 -4
  44. data/lib/nokogiri/css/parser.y +2 -2
  45. data/lib/nokogiri/css/parser_extras.rb +6 -66
  46. data/lib/nokogiri/css/selector_cache.rb +38 -0
  47. data/lib/nokogiri/css/tokenizer.rb +4 -4
  48. data/lib/nokogiri/css/tokenizer.rex +9 -8
  49. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  50. data/lib/nokogiri/css.rb +86 -20
  51. data/lib/nokogiri/decorators/slop.rb +3 -5
  52. data/lib/nokogiri/encoding_handler.rb +2 -2
  53. data/lib/nokogiri/html4/document.rb +44 -23
  54. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  55. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  56. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  57. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  58. data/lib/nokogiri/html4.rb +9 -14
  59. data/lib/nokogiri/html5/builder.rb +40 -0
  60. data/lib/nokogiri/html5/document.rb +61 -30
  61. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  62. data/lib/nokogiri/html5/node.rb +4 -4
  63. data/lib/nokogiri/html5.rb +114 -72
  64. data/lib/nokogiri/version/constant.rb +1 -1
  65. data/lib/nokogiri/xml/builder.rb +8 -1
  66. data/lib/nokogiri/xml/document.rb +70 -26
  67. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  68. data/lib/nokogiri/xml/node.rb +82 -11
  69. data/lib/nokogiri/xml/node_set.rb +9 -7
  70. data/lib/nokogiri/xml/parse_options.rb +1 -1
  71. data/lib/nokogiri/xml/pp/node.rb +6 -1
  72. data/lib/nokogiri/xml/reader.rb +46 -13
  73. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  74. data/lib/nokogiri/xml/sax/document.rb +174 -83
  75. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  76. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  77. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  78. data/lib/nokogiri/xml/sax.rb +48 -0
  79. data/lib/nokogiri/xml/schema.rb +112 -45
  80. data/lib/nokogiri/xml/searchable.rb +6 -8
  81. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  82. data/lib/nokogiri/xml.rb +13 -24
  83. data/lib/nokogiri/xslt.rb +3 -9
  84. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  85. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  86. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  87. metadata +11 -8
  88. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  89. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  90. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  91. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -3,60 +3,45 @@
3
3
  module Nokogiri
4
4
  module HTML4
5
5
  ###
6
- # Nokogiri lets you write a SAX parser to process HTML but get HTML correction features.
6
+ # Nokogiri provides a SAX parser to process HTML4 which will provide HTML recovery
7
+ # ("autocorrection") features.
7
8
  #
8
9
  # See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
9
10
  #
10
11
  # For more information on SAX parsers, see Nokogiri::XML::SAX
12
+ #
11
13
  module SAX
12
14
  ###
13
- # This class lets you perform SAX style parsing on HTML with HTML error correction.
15
+ # This parser is a SAX style parser that reads its input as it deems necessary. The parser
16
+ # takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an HTML input, sends
17
+ # messages to the Nokogiri::XML::SAX::Document.
18
+ #
19
+ # ⚠ This is an HTML4 parser and so may not support some HTML5 features and behaviors.
14
20
  #
15
21
  # Here is a basic usage example:
16
22
  #
17
- # class MyDoc < Nokogiri::XML::SAX::Document
23
+ # class MyHandler < Nokogiri::XML::SAX::Document
18
24
  # def start_element name, attributes = []
19
25
  # puts "found a #{name}"
20
26
  # end
21
27
  # end
22
28
  #
23
- # parser = Nokogiri::HTML4::SAX::Parser.new(MyDoc.new)
24
- # parser.parse(File.read(ARGV[0], mode: 'rb'))
29
+ # parser = Nokogiri::HTML4::SAX::Parser.new(MyHandler.new)
30
+ #
31
+ # # Hand an IO object to the parser, which will read the HTML from the IO.
32
+ # File.open(path_to_html) do |f|
33
+ # parser.parse(f)
34
+ # end
35
+ #
36
+ # For more information on \SAX parsers, see Nokogiri::XML::SAX or the parent class
37
+ # Nokogiri::XML::SAX::Parser.
38
+ #
39
+ # Also see Nokogiri::XML::SAX::Document for the available events.
25
40
  #
26
- # For more information on SAX parsers, see Nokogiri::XML::SAX
27
41
  class Parser < Nokogiri::XML::SAX::Parser
28
- ###
29
- # Parse html stored in +data+ using +encoding+
30
- def parse_memory(data, encoding = "UTF-8")
31
- raise TypeError unless String === data
32
- return if data.empty?
33
-
34
- ctx = ParserContext.memory(data, encoding)
35
- yield ctx if block_given?
36
- ctx.parse_with(self)
37
- end
38
-
39
- ###
40
- # Parse given +io+
41
- def parse_io(io, encoding = "UTF-8")
42
- check_encoding(encoding)
43
- @encoding = encoding
44
- ctx = ParserContext.io(io, ENCODINGS[encoding])
45
- yield ctx if block_given?
46
- ctx.parse_with(self)
47
- end
48
-
49
- ###
50
- # Parse a file with +filename+
51
- def parse_file(filename, encoding = "UTF-8")
52
- raise ArgumentError unless filename
53
- raise Errno::ENOENT unless File.exist?(filename)
54
- raise Errno::EISDIR if File.directory?(filename)
55
-
56
- ctx = ParserContext.file(filename, encoding)
57
- yield ctx if block_given?
58
- ctx.parse_with(self)
59
- end
42
+ # this class inherits its behavior from Nokogiri::XML::SAX::Parser, but note that superclass
43
+ # uses Nokogiri::ClassResolver to use HTML4::SAX::ParserContext as the context class for
44
+ # this class, which is where the real behavioral differences are implemented.
60
45
  end
61
46
  end
62
47
  end
@@ -4,16 +4,11 @@ module Nokogiri
4
4
  module HTML4
5
5
  module SAX
6
6
  ###
7
- # Context for HTML SAX parsers. This class is usually not instantiated by the user. Instead,
8
- # you should be looking at Nokogiri::HTML4::SAX::Parser
7
+ # Context object to invoke the HTML4 SAX parser on the SAX::Document handler.
8
+ #
9
+ # 💡 This class is usually not instantiated by the user. Use Nokogiri::HTML4::SAX::Parser
10
+ # instead.
9
11
  class ParserContext < Nokogiri::XML::SAX::ParserContext
10
- def self.new(thing, encoding = "UTF-8")
11
- if [:read, :close].all? { |x| thing.respond_to?(x) }
12
- super
13
- else
14
- memory(thing, encoding)
15
- end
16
- end
17
12
  end
18
13
  end
19
14
  end
@@ -3,12 +3,9 @@
3
3
 
4
4
  module Nokogiri
5
5
  class << self
6
- # :call-seq:
7
- # HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
8
- #
9
- # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
10
- def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
11
- Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
6
+ # Convenience method for Nokogiri::HTML4::Document.parse
7
+ def HTML4(...)
8
+ Nokogiri::HTML4::Document.parse(...)
12
9
  end
13
10
  end
14
11
 
@@ -18,16 +15,14 @@ module Nokogiri
18
15
  # for parsing HTML.
19
16
  module HTML4
20
17
  class << self
21
- ###
22
- # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
23
- def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
24
- Document.parse(input, url, encoding, options, &block)
18
+ # Convenience method for Nokogiri::HTML4::Document.parse
19
+ def parse(...)
20
+ Document.parse(...)
25
21
  end
26
22
 
27
- ####
28
- # Parse a fragment from +string+ in to a NodeSet.
29
- def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
30
- HTML4::DocumentFragment.parse(string, encoding, options, &block)
23
+ # Convenience method for Nokogiri::HTML4::DocumentFragment.parse
24
+ def fragment(...)
25
+ HTML4::DocumentFragment.parse(...)
31
26
  end
32
27
  end
33
28
 
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ ###
6
+ # Nokogiri HTML5 builder is used for building HTML documents. It is very similar to the
7
+ # Nokogiri::XML::Builder. In fact, you should go read the documentation for
8
+ # Nokogiri::XML::Builder before reading this documentation.
9
+ #
10
+ # The construction behavior is identical to HTML4::Builder, but HTML5 documents implement the
11
+ # [HTML5 standard's serialization
12
+ # algorithm](https://www.w3.org/TR/2008/WD-html5-20080610/serializing.html).
13
+ #
14
+ # == Synopsis:
15
+ #
16
+ # Create an HTML5 document with a body that has an onload attribute, and a
17
+ # span tag with a class of "bold" that has content of "Hello world".
18
+ #
19
+ # builder = Nokogiri::HTML5::Builder.new do |doc|
20
+ # doc.html {
21
+ # doc.body(:onload => 'some_func();') {
22
+ # doc.span.bold {
23
+ # doc.text "Hello world"
24
+ # }
25
+ # }
26
+ # }
27
+ # end
28
+ # puts builder.to_html
29
+ #
30
+ # The HTML5 builder inherits from the XML builder, so make sure to read the
31
+ # Nokogiri::XML::Builder documentation.
32
+ class Builder < Nokogiri::XML::Builder
33
+ ###
34
+ # Convert the builder to HTML
35
+ def to_html
36
+ @doc.to_html
37
+ end
38
+ end
39
+ end
40
+ end
@@ -43,41 +43,69 @@ module Nokogiri
43
43
 
44
44
  # Get the parser's quirks mode value. See HTML5::QuirksMode.
45
45
  #
46
- # This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::Document.new`).
46
+ # This method returns +nil+ if the parser was not invoked (e.g., Nokogiri::HTML5::Document.new).
47
47
  #
48
48
  # Since v1.14.0
49
49
  attr_reader :quirks_mode
50
50
 
51
51
  class << self
52
52
  # :call-seq:
53
- # parse(input)
54
- # parse(input, url=nil, encoding=nil, **options)
55
- # parse(input, url=nil, encoding=nil) { |options| ... }
53
+ # parse(input) { |options| ... } → HTML5::Document
54
+ # parse(input, url: encoding:) { |options| ... } → HTML5::Document
55
+ # parse(input, **options) HTML5::Document
56
56
  #
57
- # Parse HTML5 input.
57
+ # Parse \HTML input with a parser compliant with the HTML5 spec. This method uses the
58
+ # encoding of +input+ if it can be determined, or else falls back to the +encoding:+
59
+ # parameter.
58
60
  #
59
- # [Parameters]
60
- # - +input+ may be a String, or any object that responds to _read_ and _close_ such as an
61
- # IO, or StringIO.
61
+ # [Required Parameters]
62
+ # - +input+ (String | IO) the \HTML content to be parsed.
62
63
  #
63
- # - +url+ (optional) is a String indicating the canonical URI where this document is located.
64
+ # [Optional Parameters]
65
+ # - +url:+ (String) the base URI of the document.
64
66
  #
65
- # - +encoding+ (optional) is the encoding that should be used when processing
66
- # the document.
67
+ # [Optional Keyword Arguments]
68
+ # - +encoding:+ (Encoding) The name of the encoding that should be used when processing the
69
+ # document. When not provided, the encoding will be determined based on the document
70
+ # content.
67
71
  #
68
- # - +options+ (optional) is a configuration Hash (or keyword arguments) to set options
69
- # during parsing. The three currently supported options are +:max_errors+,
70
- # +:max_tree_depth+ and +:max_attributes+, described at Nokogiri::HTML5.
72
+ # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
73
+ # +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
71
74
  #
72
- # Note that these options are different than those made available by
73
- # Nokogiri::XML::Document and Nokogiri::HTML4::Document.
75
+ # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
76
+ # +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
74
77
  #
75
- # - +block+ (optional) is passed a configuration Hash on which parse options may be set. See
76
- # Nokogiri::HTML5 for more information and usage.
78
+ # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
79
+ # element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
80
+ #
81
+ # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
82
+ # elements as text. (default +false+)
83
+ #
84
+ # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
85
+ #
86
+ # [Yields]
87
+ # If present, the block will be passed a Hash object to modify with parse options before the
88
+ # input is parsed. See rdoc-ref:HTML5@Parsing+options for a list of available options.
89
+ #
90
+ # ⚠ Note that +url:+ and +encoding:+ cannot be set by the configuration block.
77
91
  #
78
92
  # [Returns] Nokogiri::HTML5::Document
79
93
  #
80
- def parse(string_or_io, url = nil, encoding = nil, **options, &block)
94
+ # *Example:* Parse a string with a specific encoding and custom max errors limit.
95
+ #
96
+ # Nokogiri::HTML5::Document.parse(socket, encoding: "ISO-8859-1", max_errors: 10)
97
+ #
98
+ # *Example:* Parse a string setting the +:parse_noscript_content_as_text+ option using the
99
+ # configuration block parameter.
100
+ #
101
+ # Nokogiri::HTML5::Document.parse(input) { |c| c[:parse_noscript_content_as_text] = true }
102
+ #
103
+ def parse(
104
+ string_or_io,
105
+ url_ = nil, encoding_ = nil,
106
+ url: url_, encoding: encoding_,
107
+ **options, &block
108
+ )
81
109
  yield options if block
82
110
  string_or_io = "" unless string_or_io
83
111
 
@@ -92,35 +120,37 @@ module Nokogiri
92
120
  raise ArgumentError, "not a string or IO object"
93
121
  end
94
122
 
95
- do_parse(string_or_io, url, encoding, options)
123
+ do_parse(string_or_io, url, encoding, **options)
96
124
  end
97
125
 
98
126
  # Create a new document from an IO object.
99
127
  #
100
128
  # 💡 Most users should prefer Document.parse to this method.
101
- def read_io(io, url = nil, encoding = nil, **options)
129
+ def read_io(io, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
102
130
  raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
103
131
 
104
- do_parse(io, url, encoding, options)
132
+ do_parse(io, url, encoding, **options)
105
133
  end
106
134
 
107
135
  # Create a new document from a String.
108
136
  #
109
137
  # 💡 Most users should prefer Document.parse to this method.
110
- def read_memory(string, url = nil, encoding = nil, **options)
138
+ def read_memory(string, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
111
139
  raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
112
140
 
113
- do_parse(string, url, encoding, options)
141
+ do_parse(string, url, encoding, **options)
114
142
  end
115
143
 
116
144
  private
117
145
 
118
- def do_parse(string_or_io, url, encoding, options)
146
+ def do_parse(string_or_io, url, encoding, **options)
119
147
  string = HTML5.read_and_encode(string_or_io, encoding)
120
- max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
121
- max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
122
- max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
123
- doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth, self)
148
+
149
+ options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
150
+ options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
151
+ options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
152
+
153
+ doc = Nokogiri::Gumbo.parse(string, url, self, **options)
124
154
  doc.encoding = "UTF-8"
125
155
  doc
126
156
  end
@@ -142,7 +172,8 @@ module Nokogiri
142
172
  # - +markup+ (String) The HTML5 markup fragment to be parsed
143
173
  #
144
174
  # [Returns]
145
- # Nokogiri::HTML5::DocumentFragment. This object's children will be empty if `markup` is not passed, is empty, or is `nil`.
175
+ # Nokogiri::HTML5::DocumentFragment. This object's children will be empty if +markup+ is not
176
+ # passed, is empty, or is +nil+.
146
177
  #
147
178
  def fragment(markup = nil)
148
179
  DocumentFragment.new(self, markup)
@@ -25,27 +25,145 @@ module Nokogiri
25
25
  #
26
26
  # 💡 HTML5 functionality is not available when running JRuby.
27
27
  class DocumentFragment < Nokogiri::HTML4::DocumentFragment
28
+ class << self
29
+ # :call-seq:
30
+ # parse(input, **options) → HTML5::DocumentFragment
31
+ #
32
+ # Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment. This
33
+ # method creates a new, empty HTML5::Document to contain the fragment.
34
+ #
35
+ # [Parameters]
36
+ # - +input+ (String | IO) The HTML5 document fragment to parse.
37
+ #
38
+ # [Optional Keyword Arguments]
39
+ # - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
40
+ # used when processing the document. When not provided, the encoding will be determined
41
+ # based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
42
+ # encoding is handled by the parser.
43
+ #
44
+ # - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, "in
45
+ # context" of which to parse the document fragment. See below for more
46
+ # information. (default +"body"+)
47
+ #
48
+ # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
49
+ # +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
50
+ #
51
+ # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
52
+ # +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
53
+ #
54
+ # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
55
+ # element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
56
+ #
57
+ # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
58
+ # elements as text. (default +false+)
59
+ #
60
+ # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
61
+ #
62
+ # [Returns] Nokogiri::HTML5::DocumentFragment
63
+ #
64
+ # === Context \Node
65
+ #
66
+ # If a context node is specified using +context:+, then the parser will behave as if that
67
+ # Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
68
+ #
69
+ def parse(
70
+ input,
71
+ encoding_ = nil, positional_options_hash = nil,
72
+ encoding: encoding_, **options
73
+ )
74
+ unless positional_options_hash.nil? || positional_options_hash.empty?
75
+ options.merge!(positional_options_hash)
76
+ end
77
+
78
+ context = options.delete(:context)
79
+
80
+ document = HTML5::Document.new
81
+ document.encoding = "UTF-8"
82
+ input = HTML5.read_and_encode(input, encoding)
83
+
84
+ new(document, input, context, options)
85
+ end
86
+ end
87
+
28
88
  attr_accessor :document
29
89
  attr_accessor :errors
30
90
 
31
91
  # Get the parser's quirks mode value. See HTML5::QuirksMode.
32
92
  #
33
- # This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`).
93
+ # This method returns `nil` if the parser was not invoked (e.g.,
94
+ # `Nokogiri::HTML5::DocumentFragment.new(doc)`).
34
95
  #
35
96
  # Since v1.14.0
36
97
  attr_reader :quirks_mode
37
98
 
38
- # Create a document fragment.
39
- def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/MissingSuper
40
- self.document = doc
41
- self.errors = []
42
- return self unless tags
43
-
44
- max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
45
- max_errors = options[:max_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
46
- max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
47
- tags = Nokogiri::HTML5.read_and_encode(tags, nil)
48
- Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
99
+ #
100
+ # :call-seq:
101
+ # new(document, input, **options) → HTML5::DocumentFragment
102
+ #
103
+ # Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment.
104
+ #
105
+ # 💡 It's recommended to use either HTML5::DocumentFragment.parse or HTML5::Node#fragment
106
+ # rather than call this method directly.
107
+ #
108
+ # [Required Parameters]
109
+ # - +document+ (HTML5::Document) The parent document to associate the returned fragment with.
110
+ #
111
+ # [Optional Parameters]
112
+ # - +input+ (String) The content to be parsed.
113
+ #
114
+ # [Optional Keyword Arguments]
115
+ # - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
116
+ # used when processing the document. When not provided, the encoding will be determined
117
+ # based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
118
+ # encoding is handled by the parser.
119
+ #
120
+ # - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, in
121
+ # which to parse the document fragment. (default +"body"+)
122
+ #
123
+ # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
124
+ # +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
125
+ #
126
+ # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
127
+ # +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
128
+ #
129
+ # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
130
+ # element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
131
+ #
132
+ # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
133
+ # elements as text. (default +false+)
134
+ #
135
+ # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
136
+ #
137
+ # [Returns] HTML5::DocumentFragment
138
+ #
139
+ # === Context \Node
140
+ #
141
+ # If a context node is specified using +context:+, then the parser will behave as if that
142
+ # Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
143
+ #
144
+ def initialize(
145
+ doc, input = nil,
146
+ context_ = nil, positional_options_hash = nil,
147
+ context: context_,
148
+ **options
149
+ ) # rubocop:disable Lint/MissingSuper
150
+ unless positional_options_hash.nil? || positional_options_hash.empty?
151
+ options.merge!(positional_options_hash)
152
+ end
153
+
154
+ @document = doc
155
+ @errors = []
156
+ return self unless input
157
+
158
+ input = Nokogiri::HTML5.read_and_encode(input, nil)
159
+
160
+ context = options.delete(:context) if options.key?(:context)
161
+
162
+ options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
163
+ options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
164
+ options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
165
+
166
+ Nokogiri::Gumbo.fragment(self, input, context, **options)
49
167
  end
50
168
 
51
169
  def serialize(options = {}, &block) # :nodoc:
@@ -54,14 +172,6 @@ module Nokogiri
54
172
  XML::Node.instance_method(:serialize).bind_call(self, options, &block)
55
173
  end
56
174
 
57
- # Parse a document fragment from +tags+, returning a Nodeset.
58
- def self.parse(tags, encoding = nil, options = {})
59
- doc = HTML5::Document.new
60
- tags = HTML5.read_and_encode(tags, encoding)
61
- doc.encoding = "UTF-8"
62
- new(doc, tags, nil, options)
63
- end
64
-
65
175
  def extract_params(params) # :nodoc:
66
176
  handler = params.find do |param|
67
177
  ![Hash, String, Symbol].include?(param.class)
@@ -29,7 +29,7 @@ module Nokogiri
29
29
  # 💡 HTML5 functionality is not available when running JRuby.
30
30
  module Node
31
31
  def inner_html(options = {})
32
- return super(options) unless document.is_a?(HTML5::Document)
32
+ return super unless document.is_a?(HTML5::Document)
33
33
 
34
34
  result = options[:preserve_newline] && prepend_newline? ? +"\n" : +""
35
35
  result << children.map { |child| child.to_html(options) }.join
@@ -37,7 +37,7 @@ module Nokogiri
37
37
  end
38
38
 
39
39
  def write_to(io, *options)
40
- return super(io, *options) unless document.is_a?(HTML5::Document)
40
+ return super unless document.is_a?(HTML5::Document)
41
41
 
42
42
  options = options.first.is_a?(Hash) ? options.shift : {}
43
43
  encoding = options[:encoding] || options[0]
@@ -68,7 +68,7 @@ module Nokogiri
68
68
  end
69
69
 
70
70
  def fragment(tags)
71
- return super(tags) unless document.is_a?(HTML5::Document)
71
+ return super unless document.is_a?(HTML5::Document)
72
72
 
73
73
  DocumentFragment.new(document, tags, self)
74
74
  end
@@ -81,7 +81,7 @@ module Nokogiri
81
81
  # annoying with attribute names like xml:lang since libxml2 will
82
82
  # actually create the xml namespace if it doesn't exist already.
83
83
  def add_child_node_and_reparent_attrs(node)
84
- return super(node) unless document.is_a?(HTML5::Document)
84
+ return super unless document.is_a?(HTML5::Document)
85
85
 
86
86
  # I'm not sure what this method is supposed to do. Reparenting
87
87
  # namespaces is handled by libxml2, including child namespaces which