nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,6 +1,7 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
- require 'pathname'
4
+ require "pathname"
4
5
 
5
6
  module Nokogiri
6
7
  module HTML4
@@ -9,11 +10,10 @@ module Nokogiri
9
10
  # Get the meta tag encoding for this document. If there is no meta tag,
10
11
  # then nil is returned.
11
12
  def meta_encoding
12
- case
13
- when meta = at('//meta[@charset]')
13
+ if (meta = at_xpath("//meta[@charset]"))
14
14
  meta[:charset]
15
- when meta = meta_content_type
16
- meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
15
+ elsif (meta = meta_content_type)
16
+ meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
17
17
  end
18
18
  end
19
19
 
@@ -33,24 +33,22 @@ module Nokogiri
33
33
  #
34
34
  # Beware in CRuby, that libxml2 automatically inserts a meta tag
35
35
  # into a head element.
36
- def meta_encoding= encoding
37
- case
38
- when meta = meta_content_type
39
- meta['content'] = 'text/html; charset=%s' % encoding
36
+ def meta_encoding=(encoding)
37
+ if (meta = meta_content_type)
38
+ meta["content"] = format("text/html; charset=%s", encoding)
40
39
  encoding
41
- when meta = at('//meta[@charset]')
42
- meta['charset'] = encoding
40
+ elsif (meta = at_xpath("//meta[@charset]"))
41
+ meta["charset"] = encoding
43
42
  else
44
- meta = XML::Node.new('meta', self)
45
- if dtd = internal_subset and dtd.html5_dtd?
46
- meta['charset'] = encoding
43
+ meta = XML::Node.new("meta", self)
44
+ if (dtd = internal_subset) && dtd.html5_dtd?
45
+ meta["charset"] = encoding
47
46
  else
48
- meta['http-equiv'] = 'Content-Type'
49
- meta['content'] = 'text/html; charset=%s' % encoding
47
+ meta["http-equiv"] = "Content-Type"
48
+ meta["content"] = format("text/html; charset=%s", encoding)
50
49
  end
51
50
 
52
- case
53
- when head = at('//head')
51
+ if (head = at_xpath("//head"))
54
52
  head.prepend_child(meta)
55
53
  else
56
54
  set_metadata_element(meta)
@@ -60,9 +58,9 @@ module Nokogiri
60
58
  end
61
59
 
62
60
  def meta_content_type
63
- xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
64
- node['http-equiv'] =~ /\AContent-Type\z/i
65
- }
61
+ xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
62
+ node["http-equiv"] =~ /\AContent-Type\z/i
63
+ end
66
64
  end
67
65
  private :meta_content_type
68
66
 
@@ -70,7 +68,7 @@ module Nokogiri
70
68
  # Get the title string of this document. Return nil if there is
71
69
  # no title tag.
72
70
  def title
73
- title = at('//title') and title.inner_text
71
+ (title = at_xpath("//title")) && title.inner_text
74
72
  end
75
73
 
76
74
  ###
@@ -86,52 +84,50 @@ module Nokogiri
86
84
  # content element (typically <body>) if any.
87
85
  def title=(text)
88
86
  tnode = XML::Text.new(text, self)
89
- if title = at('//title')
87
+ if (title = at_xpath("//title"))
90
88
  title.children = tnode
91
89
  return text
92
90
  end
93
91
 
94
- title = XML::Node.new('title', self) << tnode
95
- case
96
- when head = at('//head')
92
+ title = XML::Node.new("title", self) << tnode
93
+ if (head = at_xpath("//head"))
97
94
  head << title
98
- when meta = at('//meta[@charset]') || meta_content_type
95
+ elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
99
96
  # better put after charset declaration
100
97
  meta.add_next_sibling(title)
101
98
  else
102
99
  set_metadata_element(title)
103
100
  end
104
- text
105
101
  end
106
102
 
107
- def set_metadata_element(element)
108
- case
109
- when head = at('//head')
103
+ def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
104
+ if (head = at_xpath("//head"))
110
105
  head << element
111
- when html = at('//html')
112
- head = html.prepend_child(XML::Node.new('head', self))
106
+ elsif (html = at_xpath("//html"))
107
+ head = html.prepend_child(XML::Node.new("head", self))
113
108
  head.prepend_child(element)
114
- when first = children.find { |node|
115
- case node
116
- when XML::Element, XML::Text
117
- true
118
- end
119
- }
109
+ elsif (first = children.find do |node|
110
+ case node
111
+ when XML::Element, XML::Text
112
+ true
113
+ end
114
+ end)
120
115
  # We reach here only if the underlying document model
121
116
  # allows <html>/<head> elements to be omitted and does not
122
117
  # automatically supply them.
123
118
  first.add_previous_sibling(element)
124
119
  else
125
- html = add_child(XML::Node.new('html', self))
126
- head = html.add_child(XML::Node.new('head', self))
120
+ html = add_child(XML::Node.new("html", self))
121
+ head = html.add_child(XML::Node.new("head", self))
127
122
  head.prepend_child(element)
128
123
  end
129
124
  end
130
125
  private :set_metadata_element
131
126
 
132
127
  ####
133
- # Serialize Node using +options+. Save options can also be set using a
134
- # block. See SaveOptions.
128
+ # Serialize Node using +options+. Save options can also be set using a block.
129
+ #
130
+ # See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
135
131
  #
136
132
  # These two statements are equivalent:
137
133
  #
@@ -143,15 +139,25 @@ module Nokogiri
143
139
  # config.format.as_xml
144
140
  # end
145
141
  #
146
- def serialize options = {}
142
+ def serialize(options = {})
147
143
  options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
148
144
  super
149
145
  end
150
146
 
151
147
  ####
152
148
  # Create a Nokogiri::XML::DocumentFragment from +tags+
153
- def fragment tags = nil
154
- DocumentFragment.new(self, tags, self.root)
149
+ def fragment(tags = nil)
150
+ DocumentFragment.new(self, tags, root)
151
+ end
152
+
153
+ # :call-seq:
154
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
155
+ #
156
+ # [Returns] The document type which determines CSS-to-XPath translation.
157
+ #
158
+ # See XPathVisitor for more information.
159
+ def xpath_doctype
160
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
155
161
  end
156
162
 
157
163
  class << self
@@ -163,15 +169,14 @@ module Nokogiri
163
169
  # is a number that sets options in the parser, such as
164
170
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
165
171
  # Nokogiri::XML::ParseOptions.
166
- def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
172
+ def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
167
173
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
168
-
169
174
  yield options if block_given?
170
175
 
171
176
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
177
 
173
178
  if string_or_io.respond_to?(:encoding)
174
- unless string_or_io.encoding.name == "ASCII-8BIT"
179
+ unless string_or_io.encoding == Encoding::ASCII_8BIT
175
180
  encoding ||= string_or_io.encoding.name
176
181
  end
177
182
  end
@@ -184,21 +189,10 @@ module Nokogiri
184
189
  end
185
190
 
186
191
  unless encoding
187
- # Libxml2's parser has poor support for encoding
188
- # detection. First, it does not recognize the HTML5
189
- # style meta charset declaration. Secondly, even if it
190
- # successfully detects an encoding hint, it does not
191
- # re-decode or re-parse the preceding part which may be
192
- # garbled.
193
- #
194
- # EncodingReader aims to perform advanced encoding
195
- # detection beyond what Libxml2 does, and to emulate
196
- # rewinding of a stream and make Libxml2 redo parsing
197
- # from the start when an encoding hint is found.
198
192
  string_or_io = EncodingReader.new(string_or_io)
199
193
  begin
200
194
  return read_io(string_or_io, url, encoding, options.to_i)
201
- rescue EncodingFound => e
195
+ rescue EncodingReader::EncodingFound => e
202
196
  encoding = e.found_encoding
203
197
  end
204
198
  end
@@ -206,7 +200,7 @@ module Nokogiri
206
200
  end
207
201
 
208
202
  # read_memory pukes on empty docs
209
- if string_or_io.nil? or string_or_io.empty?
203
+ if string_or_io.nil? || string_or_io.empty?
210
204
  return encoding ? new.tap { |i| i.encoding = encoding } : new
211
205
  end
212
206
 
@@ -215,108 +209,6 @@ module Nokogiri
215
209
  read_memory(string_or_io, url, encoding, options.to_i)
216
210
  end
217
211
  end
218
-
219
- class EncodingFound < StandardError # :nodoc:
220
- attr_reader :found_encoding
221
-
222
- def initialize(encoding)
223
- @found_encoding = encoding
224
- super("encoding found: %s" % encoding)
225
- end
226
- end
227
-
228
- class EncodingReader # :nodoc:
229
- class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
230
- attr_reader :encoding
231
-
232
- def initialize
233
- @encoding = nil
234
- super()
235
- end
236
-
237
- def start_element(name, attrs = [])
238
- return unless name == 'meta'
239
- attr = Hash[attrs]
240
- charset = attr['charset'] and
241
- @encoding = charset
242
- http_equiv = attr['http-equiv'] and
243
- http_equiv.match(/\AContent-Type\z/i) and
244
- content = attr['content'] and
245
- m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
246
- @encoding = m[1]
247
- end
248
- end
249
-
250
- class JumpSAXHandler < SAXHandler
251
- def initialize(jumptag)
252
- @jumptag = jumptag
253
- super()
254
- end
255
-
256
- def start_element(name, attrs = [])
257
- super
258
- throw @jumptag, @encoding if @encoding
259
- throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
260
- end
261
- end
262
-
263
- def self.detect_encoding(chunk)
264
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
265
- return Nokogiri.XML(m[1]).encoding
266
-
267
- if Nokogiri.jruby?
268
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
269
- return m[4]
270
- catch(:encoding_found) {
271
- Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
272
- nil
273
- }
274
- else
275
- handler = SAXHandler.new
276
- parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
277
- parser << chunk rescue Nokogiri::SyntaxError
278
- handler.encoding
279
- end
280
- end
281
-
282
- def initialize(io)
283
- @io = io
284
- @firstchunk = nil
285
- @encoding_found = nil
286
- end
287
-
288
- # This method is used by the C extension so that
289
- # Nokogiri::HTML4::Document#read_io() does not leak memory when
290
- # EncodingFound is raised.
291
- attr_reader :encoding_found
292
-
293
- def read(len)
294
- # no support for a call without len
295
-
296
- if !@firstchunk
297
- @firstchunk = @io.read(len) or return nil
298
-
299
- # This implementation expects that the first call from
300
- # htmlReadIO() is made with a length long enough (~1KB) to
301
- # achieve advanced encoding detection.
302
- if encoding = EncodingReader.detect_encoding(@firstchunk)
303
- # The first chunk is stored for the next read in retry.
304
- raise @encoding_found = EncodingFound.new(encoding)
305
- end
306
- end
307
- @encoding_found = nil
308
-
309
- ret = @firstchunk.slice!(0, len)
310
- if (len -= ret.length) > 0
311
- rest = @io.read(len) and ret << rest
312
- end
313
- if ret.empty?
314
- nil
315
- else
316
- ret
317
- end
318
- end
319
- end
320
212
  end
321
213
  end
322
214
  end
@@ -1,34 +1,38 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  class DocumentFragment < Nokogiri::XML::DocumentFragment
5
6
  ####
6
7
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
7
- def self.parse(tags, encoding = nil)
8
+ def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
8
9
  doc = HTML4::Document.new
9
10
 
10
11
  encoding ||= if tags.respond_to?(:encoding)
11
12
  encoding = tags.encoding
12
13
  if encoding == ::Encoding::ASCII_8BIT
13
- 'UTF-8'
14
+ "UTF-8"
14
15
  else
15
16
  encoding.name
16
17
  end
17
18
  else
18
- 'UTF-8'
19
+ "UTF-8"
19
20
  end
20
21
 
21
22
  doc.encoding = encoding
22
23
 
23
- new(doc, tags)
24
+ new(doc, tags, nil, options, &block)
24
25
  end
25
26
 
26
- def initialize(document, tags = nil, ctx = nil)
27
+ def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
27
28
  return self unless tags
28
29
 
30
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
31
+ yield options if block_given?
32
+
29
33
  if ctx
30
34
  preexisting_errors = document.errors.dup
31
- node_set = ctx.parse("<div>#{tags}</div>")
35
+ node_set = ctx.parse("<div>#{tags}</div>", options)
32
36
  node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
33
37
  self.errors = document.errors - preexisting_errors
34
38
  else
@@ -39,7 +43,7 @@ module Nokogiri
39
43
  "/html/body/node()"
40
44
  end
41
45
 
42
- temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
46
+ temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
43
47
  temp_doc.xpath(path).each { |child| child.parent = self }
44
48
  self.errors = temp_doc.errors
45
49
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  class ElementDescription