nokogiri 1.12.5 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +41 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +23 -14
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -66
- data/ext/nokogiri/extconf.rb +159 -63
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +2 -2
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +3 -9
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.c +38 -51
- data/ext/nokogiri/nokogiri.h +26 -14
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +3 -3
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +53 -44
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +11 -11
- data/ext/nokogiri/xml_element_content.c +3 -3
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +28 -14
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +80 -14
- data/ext/nokogiri/xml_node.c +982 -396
- data/ext/nokogiri/xml_node_set.c +4 -6
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +133 -32
- data/ext/nokogiri/xml_relax_ng.c +1 -3
- data/ext/nokogiri/xml_sax_parser.c +23 -17
- data/ext/nokogiri/xml_sax_parser_context.c +11 -9
- data/ext/nokogiri/xml_sax_push_parser.c +1 -3
- data/ext/nokogiri/xml_schema.c +4 -6
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +2 -2
- data/ext/nokogiri/xml_xpath_context.c +144 -114
- data/ext/nokogiri/xslt_stylesheet.c +122 -23
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +2 -2
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +8 -16
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +184 -85
- data/lib/nokogiri/css.rb +44 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +56 -164
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +12 -5
- data/lib/nokogiri/html5/document.rb +126 -32
- data/lib/nokogiri/html5/document_fragment.rb +14 -4
- data/lib/nokogiri/html5/node.rb +12 -7
- data/lib/nokogiri/html5.rb +138 -222
- data/lib/nokogiri/jruby/dependencies.rb +2 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +32 -24
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +54 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +35 -33
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +232 -143
- data/lib/nokogiri/xml/document_fragment.rb +88 -42
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -8
- data/lib/nokogiri/xml/node.rb +708 -383
- data/lib/nokogiri/xml/node_set.rb +134 -59
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +140 -56
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +26 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +38 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +22 -27
- data/lib/xsd/xmlparser/nokogiri.rb +28 -25
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +20 -171
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,6 +1,7 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
+
require "pathname"
|
4
5
|
|
5
6
|
module Nokogiri
|
6
7
|
module HTML4
|
@@ -9,11 +10,10 @@ module Nokogiri
|
|
9
10
|
# Get the meta tag encoding for this document. If there is no meta tag,
|
10
11
|
# then nil is returned.
|
11
12
|
def meta_encoding
|
12
|
-
|
13
|
-
when meta = at('//meta[@charset]')
|
13
|
+
if (meta = at_xpath("//meta[@charset]"))
|
14
14
|
meta[:charset]
|
15
|
-
|
16
|
-
meta[
|
15
|
+
elsif (meta = meta_content_type)
|
16
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
@@ -33,24 +33,22 @@ module Nokogiri
|
|
33
33
|
#
|
34
34
|
# Beware in CRuby, that libxml2 automatically inserts a meta tag
|
35
35
|
# into a head element.
|
36
|
-
def meta_encoding=
|
37
|
-
|
38
|
-
|
39
|
-
meta['content'] = 'text/html; charset=%s' % encoding
|
36
|
+
def meta_encoding=(encoding)
|
37
|
+
if (meta = meta_content_type)
|
38
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
40
39
|
encoding
|
41
|
-
|
42
|
-
meta[
|
40
|
+
elsif (meta = at_xpath("//meta[@charset]"))
|
41
|
+
meta["charset"] = encoding
|
43
42
|
else
|
44
|
-
meta = XML::Node.new(
|
45
|
-
if dtd = internal_subset
|
46
|
-
meta[
|
43
|
+
meta = XML::Node.new("meta", self)
|
44
|
+
if (dtd = internal_subset) && dtd.html5_dtd?
|
45
|
+
meta["charset"] = encoding
|
47
46
|
else
|
48
|
-
meta[
|
49
|
-
meta[
|
47
|
+
meta["http-equiv"] = "Content-Type"
|
48
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
50
49
|
end
|
51
50
|
|
52
|
-
|
53
|
-
when head = at('//head')
|
51
|
+
if (head = at_xpath("//head"))
|
54
52
|
head.prepend_child(meta)
|
55
53
|
else
|
56
54
|
set_metadata_element(meta)
|
@@ -60,9 +58,9 @@ module Nokogiri
|
|
60
58
|
end
|
61
59
|
|
62
60
|
def meta_content_type
|
63
|
-
xpath(
|
64
|
-
node[
|
65
|
-
|
61
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
62
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
63
|
+
end
|
66
64
|
end
|
67
65
|
private :meta_content_type
|
68
66
|
|
@@ -70,7 +68,7 @@ module Nokogiri
|
|
70
68
|
# Get the title string of this document. Return nil if there is
|
71
69
|
# no title tag.
|
72
70
|
def title
|
73
|
-
title =
|
71
|
+
(title = at_xpath("//title")) && title.inner_text
|
74
72
|
end
|
75
73
|
|
76
74
|
###
|
@@ -86,52 +84,50 @@ module Nokogiri
|
|
86
84
|
# content element (typically <body>) if any.
|
87
85
|
def title=(text)
|
88
86
|
tnode = XML::Text.new(text, self)
|
89
|
-
if title =
|
87
|
+
if (title = at_xpath("//title"))
|
90
88
|
title.children = tnode
|
91
89
|
return text
|
92
90
|
end
|
93
91
|
|
94
|
-
title = XML::Node.new(
|
95
|
-
|
96
|
-
when head = at('//head')
|
92
|
+
title = XML::Node.new("title", self) << tnode
|
93
|
+
if (head = at_xpath("//head"))
|
97
94
|
head << title
|
98
|
-
|
95
|
+
elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
|
99
96
|
# better put after charset declaration
|
100
97
|
meta.add_next_sibling(title)
|
101
98
|
else
|
102
99
|
set_metadata_element(title)
|
103
100
|
end
|
104
|
-
text
|
105
101
|
end
|
106
102
|
|
107
|
-
def set_metadata_element(element)
|
108
|
-
|
109
|
-
when head = at('//head')
|
103
|
+
def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
|
104
|
+
if (head = at_xpath("//head"))
|
110
105
|
head << element
|
111
|
-
|
112
|
-
head = html.prepend_child(XML::Node.new(
|
106
|
+
elsif (html = at_xpath("//html"))
|
107
|
+
head = html.prepend_child(XML::Node.new("head", self))
|
113
108
|
head.prepend_child(element)
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
109
|
+
elsif (first = children.find do |node|
|
110
|
+
case node
|
111
|
+
when XML::Element, XML::Text
|
112
|
+
true
|
113
|
+
end
|
114
|
+
end)
|
120
115
|
# We reach here only if the underlying document model
|
121
116
|
# allows <html>/<head> elements to be omitted and does not
|
122
117
|
# automatically supply them.
|
123
118
|
first.add_previous_sibling(element)
|
124
119
|
else
|
125
|
-
html = add_child(XML::Node.new(
|
126
|
-
head = html.add_child(XML::Node.new(
|
120
|
+
html = add_child(XML::Node.new("html", self))
|
121
|
+
head = html.add_child(XML::Node.new("head", self))
|
127
122
|
head.prepend_child(element)
|
128
123
|
end
|
129
124
|
end
|
130
125
|
private :set_metadata_element
|
131
126
|
|
132
127
|
####
|
133
|
-
# Serialize Node using +options+.
|
134
|
-
#
|
128
|
+
# Serialize Node using +options+. Save options can also be set using a block.
|
129
|
+
#
|
130
|
+
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
|
135
131
|
#
|
136
132
|
# These two statements are equivalent:
|
137
133
|
#
|
@@ -143,15 +139,25 @@ module Nokogiri
|
|
143
139
|
# config.format.as_xml
|
144
140
|
# end
|
145
141
|
#
|
146
|
-
def serialize
|
142
|
+
def serialize(options = {})
|
147
143
|
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
148
144
|
super
|
149
145
|
end
|
150
146
|
|
151
147
|
####
|
152
148
|
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
153
|
-
def fragment
|
154
|
-
DocumentFragment.new(self, tags,
|
149
|
+
def fragment(tags = nil)
|
150
|
+
DocumentFragment.new(self, tags, root)
|
151
|
+
end
|
152
|
+
|
153
|
+
# :call-seq:
|
154
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
155
|
+
#
|
156
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
157
|
+
#
|
158
|
+
# See XPathVisitor for more information.
|
159
|
+
def xpath_doctype
|
160
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
|
155
161
|
end
|
156
162
|
|
157
163
|
class << self
|
@@ -163,15 +169,14 @@ module Nokogiri
|
|
163
169
|
# is a number that sets options in the parser, such as
|
164
170
|
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
165
171
|
# Nokogiri::XML::ParseOptions.
|
166
|
-
def parse
|
172
|
+
def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
167
173
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
168
|
-
|
169
174
|
yield options if block_given?
|
170
175
|
|
171
176
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
172
177
|
|
173
178
|
if string_or_io.respond_to?(:encoding)
|
174
|
-
unless string_or_io.encoding
|
179
|
+
unless string_or_io.encoding == Encoding::ASCII_8BIT
|
175
180
|
encoding ||= string_or_io.encoding.name
|
176
181
|
end
|
177
182
|
end
|
@@ -184,21 +189,10 @@ module Nokogiri
|
|
184
189
|
end
|
185
190
|
|
186
191
|
unless encoding
|
187
|
-
# Libxml2's parser has poor support for encoding
|
188
|
-
# detection. First, it does not recognize the HTML5
|
189
|
-
# style meta charset declaration. Secondly, even if it
|
190
|
-
# successfully detects an encoding hint, it does not
|
191
|
-
# re-decode or re-parse the preceding part which may be
|
192
|
-
# garbled.
|
193
|
-
#
|
194
|
-
# EncodingReader aims to perform advanced encoding
|
195
|
-
# detection beyond what Libxml2 does, and to emulate
|
196
|
-
# rewinding of a stream and make Libxml2 redo parsing
|
197
|
-
# from the start when an encoding hint is found.
|
198
192
|
string_or_io = EncodingReader.new(string_or_io)
|
199
193
|
begin
|
200
194
|
return read_io(string_or_io, url, encoding, options.to_i)
|
201
|
-
rescue EncodingFound => e
|
195
|
+
rescue EncodingReader::EncodingFound => e
|
202
196
|
encoding = e.found_encoding
|
203
197
|
end
|
204
198
|
end
|
@@ -206,7 +200,7 @@ module Nokogiri
|
|
206
200
|
end
|
207
201
|
|
208
202
|
# read_memory pukes on empty docs
|
209
|
-
if string_or_io.nil?
|
203
|
+
if string_or_io.nil? || string_or_io.empty?
|
210
204
|
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
211
205
|
end
|
212
206
|
|
@@ -215,108 +209,6 @@ module Nokogiri
|
|
215
209
|
read_memory(string_or_io, url, encoding, options.to_i)
|
216
210
|
end
|
217
211
|
end
|
218
|
-
|
219
|
-
class EncodingFound < StandardError # :nodoc:
|
220
|
-
attr_reader :found_encoding
|
221
|
-
|
222
|
-
def initialize(encoding)
|
223
|
-
@found_encoding = encoding
|
224
|
-
super("encoding found: %s" % encoding)
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
|
-
class EncodingReader # :nodoc:
|
229
|
-
class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
230
|
-
attr_reader :encoding
|
231
|
-
|
232
|
-
def initialize
|
233
|
-
@encoding = nil
|
234
|
-
super()
|
235
|
-
end
|
236
|
-
|
237
|
-
def start_element(name, attrs = [])
|
238
|
-
return unless name == 'meta'
|
239
|
-
attr = Hash[attrs]
|
240
|
-
charset = attr['charset'] and
|
241
|
-
@encoding = charset
|
242
|
-
http_equiv = attr['http-equiv'] and
|
243
|
-
http_equiv.match(/\AContent-Type\z/i) and
|
244
|
-
content = attr['content'] and
|
245
|
-
m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
|
246
|
-
@encoding = m[1]
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
250
|
-
class JumpSAXHandler < SAXHandler
|
251
|
-
def initialize(jumptag)
|
252
|
-
@jumptag = jumptag
|
253
|
-
super()
|
254
|
-
end
|
255
|
-
|
256
|
-
def start_element(name, attrs = [])
|
257
|
-
super
|
258
|
-
throw @jumptag, @encoding if @encoding
|
259
|
-
throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
|
260
|
-
end
|
261
|
-
end
|
262
|
-
|
263
|
-
def self.detect_encoding(chunk)
|
264
|
-
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
265
|
-
return Nokogiri.XML(m[1]).encoding
|
266
|
-
|
267
|
-
if Nokogiri.jruby?
|
268
|
-
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
269
|
-
return m[4]
|
270
|
-
catch(:encoding_found) {
|
271
|
-
Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
|
272
|
-
nil
|
273
|
-
}
|
274
|
-
else
|
275
|
-
handler = SAXHandler.new
|
276
|
-
parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
|
277
|
-
parser << chunk rescue Nokogiri::SyntaxError
|
278
|
-
handler.encoding
|
279
|
-
end
|
280
|
-
end
|
281
|
-
|
282
|
-
def initialize(io)
|
283
|
-
@io = io
|
284
|
-
@firstchunk = nil
|
285
|
-
@encoding_found = nil
|
286
|
-
end
|
287
|
-
|
288
|
-
# This method is used by the C extension so that
|
289
|
-
# Nokogiri::HTML4::Document#read_io() does not leak memory when
|
290
|
-
# EncodingFound is raised.
|
291
|
-
attr_reader :encoding_found
|
292
|
-
|
293
|
-
def read(len)
|
294
|
-
# no support for a call without len
|
295
|
-
|
296
|
-
if !@firstchunk
|
297
|
-
@firstchunk = @io.read(len) or return nil
|
298
|
-
|
299
|
-
# This implementation expects that the first call from
|
300
|
-
# htmlReadIO() is made with a length long enough (~1KB) to
|
301
|
-
# achieve advanced encoding detection.
|
302
|
-
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
303
|
-
# The first chunk is stored for the next read in retry.
|
304
|
-
raise @encoding_found = EncodingFound.new(encoding)
|
305
|
-
end
|
306
|
-
end
|
307
|
-
@encoding_found = nil
|
308
|
-
|
309
|
-
ret = @firstchunk.slice!(0, len)
|
310
|
-
if (len -= ret.length) > 0
|
311
|
-
rest = @io.read(len) and ret << rest
|
312
|
-
end
|
313
|
-
if ret.empty?
|
314
|
-
nil
|
315
|
-
else
|
316
|
-
ret
|
317
|
-
end
|
318
|
-
end
|
319
|
-
end
|
320
212
|
end
|
321
213
|
end
|
322
214
|
end
|
@@ -1,34 +1,38 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module HTML4
|
4
5
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
5
6
|
####
|
6
7
|
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
7
|
-
def self.parse(tags, encoding = nil)
|
8
|
+
def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
9
|
doc = HTML4::Document.new
|
9
10
|
|
10
11
|
encoding ||= if tags.respond_to?(:encoding)
|
11
12
|
encoding = tags.encoding
|
12
13
|
if encoding == ::Encoding::ASCII_8BIT
|
13
|
-
|
14
|
+
"UTF-8"
|
14
15
|
else
|
15
16
|
encoding.name
|
16
17
|
end
|
17
18
|
else
|
18
|
-
|
19
|
+
"UTF-8"
|
19
20
|
end
|
20
21
|
|
21
22
|
doc.encoding = encoding
|
22
23
|
|
23
|
-
new(doc, tags)
|
24
|
+
new(doc, tags, nil, options, &block)
|
24
25
|
end
|
25
26
|
|
26
|
-
def initialize(document, tags = nil, ctx = nil)
|
27
|
+
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
27
28
|
return self unless tags
|
28
29
|
|
30
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
31
|
+
yield options if block_given?
|
32
|
+
|
29
33
|
if ctx
|
30
34
|
preexisting_errors = document.errors.dup
|
31
|
-
node_set = ctx.parse("<div>#{tags}</div>")
|
35
|
+
node_set = ctx.parse("<div>#{tags}</div>", options)
|
32
36
|
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
|
33
37
|
self.errors = document.errors - preexisting_errors
|
34
38
|
else
|
@@ -39,7 +43,7 @@ module Nokogiri
|
|
39
43
|
"/html/body/node()"
|
40
44
|
end
|
41
45
|
|
42
|
-
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
|
46
|
+
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
|
43
47
|
temp_doc.xpath(path).each { |child| child.parent = self }
|
44
48
|
self.errors = temp_doc.errors
|
45
49
|
end
|