nokogiri 1.12.2 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +5 -6
- data/ext/nokogiri/extconf.rb +51 -35
- data/ext/nokogiri/gumbo.c +11 -11
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +2 -1
- data/ext/nokogiri/nokogiri.c +1 -1
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +36 -36
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_namespace.c +2 -2
- data/ext/nokogiri/xml_node.c +647 -335
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +11 -3
- data/lib/nokogiri/css/parser.y +10 -2
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rex +2 -1
- data/lib/nokogiri/css/xpath_visitor.rb +174 -75
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +42 -22
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +3 -2
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- metadata +101 -27
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module HTML4
|
4
5
|
class EntityDescription < Struct.new(:value, :name, :description); end
|
@@ -6,7 +7,7 @@ module Nokogiri
|
|
6
7
|
class EntityLookup
|
7
8
|
###
|
8
9
|
# Look up entity with +name+
|
9
|
-
def []
|
10
|
+
def [](name)
|
10
11
|
(val = get(name)) && val.value
|
11
12
|
end
|
12
13
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module HTML4
|
4
5
|
###
|
@@ -28,7 +29,7 @@ module Nokogiri
|
|
28
29
|
# Parse html stored in +data+ using +encoding+
|
29
30
|
def parse_memory(data, encoding = "UTF-8")
|
30
31
|
raise ArgumentError unless data
|
31
|
-
return
|
32
|
+
return if data.empty?
|
32
33
|
ctx = ParserContext.memory(data, encoding)
|
33
34
|
yield ctx if block_given?
|
34
35
|
ctx.parse_with(self)
|
@@ -1,14 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module HTML4
|
4
5
|
module SAX
|
5
6
|
class PushParser
|
6
|
-
|
7
7
|
# The Nokogiri::HTML4::SAX::Document on which the PushParser will be
|
8
8
|
# operating
|
9
9
|
attr_accessor :document
|
10
|
-
|
11
|
-
def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding =
|
10
|
+
|
11
|
+
def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
12
12
|
@document = doc
|
13
13
|
@encoding = encoding
|
14
14
|
@sax_parser = HTML4::SAX::Parser.new(doc, @encoding)
|
@@ -16,20 +16,20 @@ module Nokogiri
|
|
16
16
|
## Create our push parser context
|
17
17
|
initialize_native(@sax_parser, file_name, encoding)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
###
|
21
21
|
# Write a +chunk+ of HTML to the PushParser. Any callback methods
|
22
22
|
# that can be called will be called immediately.
|
23
|
-
def write
|
23
|
+
def write(chunk, last_chunk = false)
|
24
24
|
native_write(chunk, last_chunk)
|
25
25
|
end
|
26
|
-
|
26
|
+
alias_method :<<, :write
|
27
27
|
|
28
28
|
###
|
29
29
|
# Finish the parsing. This method is only necessary for
|
30
30
|
# Nokogiri::HTML4::SAX::Document#end_document to be called.
|
31
31
|
def finish
|
32
|
-
write
|
32
|
+
write("", true)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
data/lib/nokogiri/html4.rb
CHANGED
@@ -1,15 +1,21 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
class << self
|
4
|
-
|
6
|
+
# :call-seq:
|
7
|
+
# HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
|
8
|
+
#
|
5
9
|
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
6
10
|
def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
7
11
|
Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
|
8
12
|
end
|
9
13
|
end
|
10
14
|
|
11
|
-
#
|
12
|
-
#
|
15
|
+
# Since v1.12.0
|
16
|
+
#
|
17
|
+
# 💡 Before v1.12.0, Nokogiri::HTML4 did not exist, and Nokogiri::HTML was the module/namespace
|
18
|
+
# for parsing HTML.
|
13
19
|
module HTML4
|
14
20
|
class << self
|
15
21
|
###
|
@@ -20,8 +26,8 @@ module Nokogiri
|
|
20
26
|
|
21
27
|
####
|
22
28
|
# Parse a fragment from +string+ in to a NodeSet.
|
23
|
-
def fragment(string, encoding = nil)
|
24
|
-
HTML4::DocumentFragment.parse(string, encoding)
|
29
|
+
def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
30
|
+
HTML4::DocumentFragment.parse(string, encoding, options, &block)
|
25
31
|
end
|
26
32
|
end
|
27
33
|
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
6
|
#
|
@@ -19,14 +21,15 @@ require_relative "../html4/document"
|
|
19
21
|
|
20
22
|
module Nokogiri
|
21
23
|
module HTML5
|
22
|
-
#
|
23
|
-
#
|
24
|
+
# Since v1.12.0
|
25
|
+
#
|
26
|
+
# 💡 HTML5 functionality is not available when running JRuby.
|
24
27
|
class Document < Nokogiri::HTML4::Document
|
25
28
|
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
26
|
-
yield options if
|
27
|
-
|
29
|
+
yield options if block
|
30
|
+
string_or_io = "" unless string_or_io
|
28
31
|
|
29
|
-
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name !=
|
32
|
+
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != "ASCII-8BIT"
|
30
33
|
encoding ||= string_or_io.encoding.name
|
31
34
|
end
|
32
35
|
|
@@ -34,23 +37,23 @@ module Nokogiri
|
|
34
37
|
url ||= string_or_io.path
|
35
38
|
end
|
36
39
|
unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
|
37
|
-
raise ArgumentError
|
40
|
+
raise ArgumentError, "not a string or IO object"
|
38
41
|
end
|
39
42
|
do_parse(string_or_io, url, encoding, options)
|
40
43
|
end
|
41
44
|
|
42
45
|
def self.read_io(io, url = nil, encoding = nil, **options)
|
43
|
-
raise ArgumentError
|
46
|
+
raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
|
44
47
|
do_parse(io, url, encoding, options)
|
45
48
|
end
|
46
49
|
|
47
50
|
def self.read_memory(string, url = nil, encoding = nil, **options)
|
48
|
-
raise ArgumentError
|
51
|
+
raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
|
49
52
|
do_parse(string, url, encoding, options)
|
50
53
|
end
|
51
54
|
|
52
55
|
def fragment(tags = nil)
|
53
|
-
DocumentFragment.new(self, tags,
|
56
|
+
DocumentFragment.new(self, tags, root)
|
54
57
|
end
|
55
58
|
|
56
59
|
def to_xml(options = {}, &block)
|
@@ -59,14 +62,25 @@ module Nokogiri
|
|
59
62
|
XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
|
60
63
|
end
|
61
64
|
|
65
|
+
# :call-seq:
|
66
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
67
|
+
#
|
68
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
69
|
+
#
|
70
|
+
# See XPathVisitor for more information.
|
71
|
+
def xpath_doctype
|
72
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5
|
73
|
+
end
|
74
|
+
|
62
75
|
private
|
76
|
+
|
63
77
|
def self.do_parse(string_or_io, url, encoding, options)
|
64
78
|
string = HTML5.read_and_encode(string_or_io, encoding)
|
65
79
|
max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
|
66
80
|
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
|
67
81
|
max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
|
68
82
|
doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
|
69
|
-
doc.encoding =
|
83
|
+
doc.encoding = "UTF-8"
|
70
84
|
doc
|
71
85
|
end
|
72
86
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
6
|
#
|
@@ -19,8 +21,9 @@ require_relative "../html4/document_fragment"
|
|
19
21
|
|
20
22
|
module Nokogiri
|
21
23
|
module HTML5
|
22
|
-
#
|
23
|
-
#
|
24
|
+
# Since v1.12.0
|
25
|
+
#
|
26
|
+
# 💡 HTML5 functionality is not available when running JRuby.
|
24
27
|
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
|
25
28
|
attr_accessor :document
|
26
29
|
attr_accessor :errors
|
data/lib/nokogiri/html5/node.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
6
|
#
|
@@ -19,12 +21,13 @@ require_relative "../xml/node"
|
|
19
21
|
|
20
22
|
module Nokogiri
|
21
23
|
module HTML5
|
22
|
-
#
|
23
|
-
#
|
24
|
+
# Since v1.12.0
|
25
|
+
#
|
26
|
+
# 💡 HTML5 functionality is not available when running JRuby.
|
24
27
|
module Node
|
25
28
|
def inner_html(options = {})
|
26
29
|
return super(options) unless document.is_a?(HTML5::Document)
|
27
|
-
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ?
|
30
|
+
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? +"\n" : +""
|
28
31
|
result << children.map { |child| child.to_html(options) }.join
|
29
32
|
result
|
30
33
|
end
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
# frozen_string_literal: true
|
3
|
+
|
3
4
|
#
|
4
5
|
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
5
6
|
#
|
@@ -16,13 +17,15 @@
|
|
16
17
|
# limitations under the License.
|
17
18
|
#
|
18
19
|
|
19
|
-
require_relative
|
20
|
-
require_relative
|
21
|
-
require_relative
|
20
|
+
require_relative "html5/document"
|
21
|
+
require_relative "html5/document_fragment"
|
22
|
+
require_relative "html5/node"
|
22
23
|
|
23
24
|
module Nokogiri
|
24
|
-
#
|
25
|
-
#
|
25
|
+
# Since v1.12.0
|
26
|
+
#
|
27
|
+
# ⚠ HTML5 functionality is not available when running JRuby.
|
28
|
+
#
|
26
29
|
# Parse an HTML5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
|
27
30
|
def self.HTML5(input, url = nil, encoding = nil, **options, &block)
|
28
31
|
Nokogiri::HTML5::Document.parse(input, url, encoding, **options, &block)
|
@@ -30,6 +33,8 @@ module Nokogiri
|
|
30
33
|
|
31
34
|
# == Usage
|
32
35
|
#
|
36
|
+
# ⚠ HTML5 functionality is not available when running JRuby.
|
37
|
+
#
|
33
38
|
# Parse an HTML5 document:
|
34
39
|
#
|
35
40
|
# doc = Nokogiri.HTML5(string)
|
@@ -220,16 +225,15 @@ module Nokogiri
|
|
220
225
|
# * Instead of returning +unknown+ as the element name for unknown tags, the
|
221
226
|
# original tag name is returned verbatim.
|
222
227
|
#
|
223
|
-
#
|
224
|
-
# @note HTML5 functionality is not available when running JRuby.
|
228
|
+
# Since v1.12.0
|
225
229
|
module HTML5
|
226
230
|
# HTML uses the XHTML namespace.
|
227
|
-
HTML_NAMESPACE =
|
228
|
-
MATHML_NAMESPACE =
|
229
|
-
SVG_NAMESPACE =
|
230
|
-
XLINK_NAMESPACE =
|
231
|
-
XML_NAMESPACE =
|
232
|
-
XMLNS_NAMESPACE =
|
231
|
+
HTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
|
232
|
+
MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML"
|
233
|
+
SVG_NAMESPACE = "http://www.w3.org/2000/svg"
|
234
|
+
XLINK_NAMESPACE = "http://www.w3.org/1999/xlink"
|
235
|
+
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
236
|
+
XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
233
237
|
|
234
238
|
# Parse an HTML 5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
|
235
239
|
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
@@ -249,34 +253,35 @@ module Nokogiri
|
|
249
253
|
# special option is considered a header. Special options include:
|
250
254
|
# * :follow_limit => number of redirects which are followed
|
251
255
|
# * :basic_auth => [username, password]
|
252
|
-
def self.get(uri, options={})
|
256
|
+
def self.get(uri, options = {})
|
257
|
+
# TODO: deprecate
|
253
258
|
warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
254
|
-
|
259
|
+
uplevel: 1, category: :deprecated)
|
255
260
|
get_impl(uri, options)
|
256
261
|
end
|
257
262
|
|
258
263
|
private
|
259
264
|
|
260
|
-
def self.get_impl(uri, options={})
|
265
|
+
def self.get_impl(uri, options = {})
|
261
266
|
headers = options.clone
|
262
|
-
headers = {:
|
263
|
-
limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
267
|
+
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
268
|
+
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
264
269
|
|
265
|
-
require
|
270
|
+
require "net/http"
|
266
271
|
uri = URI(uri) unless URI === uri
|
267
272
|
|
268
273
|
http = Net::HTTP.new(uri.host, uri.port)
|
269
274
|
|
270
275
|
# TLS / SSL support
|
271
|
-
http.use_ssl = true if uri.scheme ==
|
276
|
+
http.use_ssl = true if uri.scheme == "https"
|
272
277
|
|
273
278
|
# Pass through Net::HTTP override values, which currently include:
|
274
279
|
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
275
280
|
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
276
281
|
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
277
282
|
# :verify_callback, :verify_depth, :verify_mode
|
278
|
-
options.each do |key,
|
279
|
-
http.send
|
283
|
+
options.each do |key, _value|
|
284
|
+
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
280
285
|
end
|
281
286
|
|
282
287
|
request = Net::HTTP::Get.new(uri.request_uri)
|
@@ -284,23 +289,23 @@ module Nokogiri
|
|
284
289
|
# basic authentication
|
285
290
|
auth = headers.delete(:basic_auth)
|
286
291
|
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
287
|
-
request.basic_auth
|
292
|
+
request.basic_auth(auth.first, auth.last) if auth
|
288
293
|
|
289
294
|
# remaining options are treated as headers
|
290
|
-
headers.each {|key, value| request[key.to_s] = value.to_s}
|
295
|
+
headers.each { |key, value| request[key.to_s] = value.to_s }
|
291
296
|
|
292
297
|
response = http.request(request)
|
293
298
|
|
294
299
|
case response
|
295
300
|
when Net::HTTPSuccess
|
296
|
-
doc = parse(reencode(response.body, response[
|
297
|
-
doc.instance_variable_set(
|
301
|
+
doc = parse(reencode(response.body, response["content-type"]), options)
|
302
|
+
doc.instance_variable_set("@response", response)
|
298
303
|
doc.class.send(:attr_reader, :response)
|
299
304
|
doc
|
300
305
|
when Net::HTTPRedirection
|
301
306
|
response.value if limit <= 1
|
302
|
-
location = URI.join(uri, response[
|
303
|
-
get_impl(location, options.merge(:
|
307
|
+
location = URI.join(uri, response["location"])
|
308
|
+
get_impl(location, options.merge(follow_limit: limit - 1))
|
304
309
|
else
|
305
310
|
response.value
|
306
311
|
end
|
@@ -309,10 +314,10 @@ module Nokogiri
|
|
309
314
|
def self.read_and_encode(string, encoding)
|
310
315
|
# Read the string with the given encoding.
|
311
316
|
if string.respond_to?(:read)
|
312
|
-
if encoding.nil?
|
313
|
-
string
|
317
|
+
string = if encoding.nil?
|
318
|
+
string.read
|
314
319
|
else
|
315
|
-
string
|
320
|
+
string.read(encoding: encoding)
|
316
321
|
end
|
317
322
|
else
|
318
323
|
# Otherwise the string has the given encoding.
|
@@ -342,7 +347,7 @@ module Nokogiri
|
|
342
347
|
# http://bugs.ruby-lang.org/issues/2567
|
343
348
|
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
344
349
|
#
|
345
|
-
def self.reencode(body, content_type=nil)
|
350
|
+
def self.reencode(body, content_type = nil)
|
346
351
|
if body.encoding == Encoding::ASCII_8BIT
|
347
352
|
encoding = nil
|
348
353
|
|
@@ -362,8 +367,8 @@ module Nokogiri
|
|
362
367
|
end
|
363
368
|
|
364
369
|
# look for a charset in a meta tag in the first 1024 bytes
|
365
|
-
|
366
|
-
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m,
|
370
|
+
unless encoding
|
371
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
|
367
372
|
data.scan(/<meta.*?>/m).each do |meta|
|
368
373
|
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
369
374
|
end
|
@@ -390,57 +395,56 @@ module Nokogiri
|
|
390
395
|
ns = current_node.namespace
|
391
396
|
ns_uri = ns.nil? ? nil : ns.href
|
392
397
|
# XXX(sfc): attach namespaces to all nodes, even html?
|
393
|
-
if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
394
|
-
|
398
|
+
tagname = if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
399
|
+
current_node.name
|
395
400
|
else
|
396
|
-
|
401
|
+
"#{ns.prefix}:#{current_node.name}"
|
397
402
|
end
|
398
|
-
io <<
|
403
|
+
io << "<" << tagname
|
399
404
|
current_node.attribute_nodes.each do |attr|
|
400
405
|
attr_ns = attr.namespace
|
401
406
|
if attr_ns.nil?
|
402
407
|
attr_name = attr.name
|
403
408
|
else
|
404
409
|
ns_uri = attr_ns.href
|
405
|
-
if ns_uri == XML_NAMESPACE
|
406
|
-
|
407
|
-
elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/,
|
408
|
-
|
410
|
+
attr_name = if ns_uri == XML_NAMESPACE
|
411
|
+
"xml:" + attr.name.sub(/^[^:]*:/, "")
|
412
|
+
elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, "") == "xmlns"
|
413
|
+
"xmlns"
|
409
414
|
elsif ns_uri == XMLNS_NAMESPACE
|
410
|
-
|
415
|
+
"xmlns:" + attr.name.sub(/^[^:]*:/, "")
|
411
416
|
elsif ns_uri == XLINK_NAMESPACE
|
412
|
-
|
417
|
+
"xlink:" + attr.name.sub(/^[^:]*:/, "")
|
413
418
|
else
|
414
|
-
|
419
|
+
"#{attr_ns.prefix}:#{attr.name}"
|
415
420
|
end
|
416
421
|
end
|
417
|
-
io <<
|
422
|
+
io << " " << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
|
418
423
|
end
|
419
|
-
io <<
|
420
|
-
|
421
|
-
link meta param source track wbr].include?(current_node.name)
|
424
|
+
io << ">"
|
425
|
+
unless ["area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"].include?(current_node.name)
|
422
426
|
io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
|
423
427
|
current_node.children.each do |child|
|
424
428
|
# XXX(sfc): Templates handled specially?
|
425
429
|
serialize_node_internal(child, io, encoding, options)
|
426
430
|
end
|
427
|
-
io <<
|
431
|
+
io << "</" << tagname << ">"
|
428
432
|
end
|
429
433
|
when XML::Node::TEXT_NODE
|
430
434
|
parent = current_node.parent
|
431
|
-
if parent.element? &&
|
432
|
-
|
435
|
+
io << if parent.element? && ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"].include?(parent.name)
|
436
|
+
current_node.content
|
433
437
|
else
|
434
|
-
|
438
|
+
escape_text(current_node.content, encoding, false)
|
435
439
|
end
|
436
440
|
when XML::Node::CDATA_SECTION_NODE
|
437
|
-
io <<
|
441
|
+
io << "<![CDATA[" << current_node.content << "]]>"
|
438
442
|
when XML::Node::COMMENT_NODE
|
439
|
-
io <<
|
443
|
+
io << "<!--" << current_node.content << "-->"
|
440
444
|
when XML::Node::PI_NODE
|
441
|
-
io <<
|
445
|
+
io << "<?" << current_node.content << ">"
|
442
446
|
when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
|
443
|
-
|
447
|
+
io << "<!DOCTYPE " << current_node.name << ">"
|
444
448
|
when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
|
445
449
|
current_node.children.each do |child|
|
446
450
|
serialize_node_internal(child, io, encoding, options)
|
@@ -451,23 +455,23 @@ module Nokogiri
|
|
451
455
|
end
|
452
456
|
|
453
457
|
def self.escape_text(text, encoding, attribute_mode)
|
454
|
-
if attribute_mode
|
455
|
-
text
|
456
|
-
|
458
|
+
text = if attribute_mode
|
459
|
+
text.gsub(/[&\u00a0"]/,
|
460
|
+
"&" => "&", "\u00a0" => " ", '"' => """)
|
457
461
|
else
|
458
|
-
text
|
459
|
-
|
462
|
+
text.gsub(/[&\u00a0<>]/,
|
463
|
+
"&" => "&", "\u00a0" => " ", "<" => "<", ">" => ">")
|
460
464
|
end
|
461
465
|
# Not part of the standard
|
462
466
|
text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
|
463
467
|
end
|
464
468
|
|
465
469
|
def self.prepend_newline?(node)
|
466
|
-
return false unless
|
470
|
+
return false unless ["pre", "textarea", "listing"].include?(node.name) && !node.children.empty?
|
467
471
|
first_child = node.children[0]
|
468
472
|
first_child.text? && first_child.content.start_with?("\n")
|
469
473
|
end
|
470
474
|
end
|
471
475
|
end
|
472
476
|
|
473
|
-
require_relative
|
477
|
+
require_relative "gumbo"
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
# The line below caused a problem on non-GAE rack environment.
|
3
4
|
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
4
5
|
#
|
@@ -8,13 +9,13 @@
|
|
8
9
|
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
9
10
|
# already set in the classpath.
|
10
11
|
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
18
|
-
require
|
19
|
-
require
|
12
|
+
require "stringio"
|
13
|
+
require "isorelax.jar"
|
14
|
+
require "jing.jar"
|
15
|
+
require "nekohtml.jar"
|
16
|
+
require "nekodtd.jar"
|
17
|
+
require "xercesImpl.jar"
|
18
|
+
require "serializer.jar"
|
19
|
+
require "xalan.jar"
|
20
|
+
require "xml-apis.jar"
|
20
21
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require "singleton"
|
3
4
|
require "shellwords"
|
4
5
|
|
@@ -102,8 +103,8 @@ module Nokogiri
|
|
102
103
|
ldflags = []
|
103
104
|
|
104
105
|
if libxml2_using_packaged?
|
105
|
-
cppflags << "-I#{File.join(header_directory,
|
106
|
-
cppflags << "-I#{File.join(header_directory,
|
106
|
+
cppflags << "-I#{File.join(header_directory, "include").shellescape}"
|
107
|
+
cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
|
107
108
|
|
108
109
|
if windows?
|
109
110
|
# on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
|
@@ -176,13 +177,9 @@ module Nokogiri
|
|
176
177
|
end
|
177
178
|
|
178
179
|
def to_markdown
|
179
|
-
begin
|
180
|
-
require "psych"
|
181
|
-
rescue LoadError
|
182
|
-
end
|
183
180
|
require "yaml"
|
184
181
|
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
185
|
-
|
182
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
186
183
|
end
|
187
184
|
|
188
185
|
instance.warnings.each do |warning|
|
@@ -190,26 +187,35 @@ module Nokogiri
|
|
190
187
|
end
|
191
188
|
end
|
192
189
|
|
193
|
-
|
190
|
+
# :nodoc:
|
191
|
+
def self.uses_libxml?(requirement = nil)
|
194
192
|
return false unless VersionInfo.instance.libxml2?
|
195
193
|
return true unless requirement
|
196
194
|
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
197
195
|
end
|
198
196
|
|
197
|
+
# :nodoc:
|
199
198
|
def self.uses_gumbo?
|
200
199
|
uses_libxml? # TODO: replace with Gumbo functionality
|
201
200
|
end
|
202
201
|
|
203
|
-
|
202
|
+
# :nodoc:
|
203
|
+
def self.jruby?
|
204
204
|
VersionInfo.instance.jruby?
|
205
205
|
end
|
206
206
|
|
207
|
-
#
|
208
|
-
|
209
|
-
|
207
|
+
# :nodoc:
|
208
|
+
def self.libxml2_patches
|
209
|
+
if VersionInfo.instance.libxml2_using_packaged?
|
210
|
+
Nokogiri::VERSION_INFO["libxml"]["patches"]
|
211
|
+
else
|
212
|
+
[]
|
213
|
+
end
|
210
214
|
end
|
215
|
+
|
216
|
+
require_relative "../jruby/dependencies" if Nokogiri.jruby?
|
211
217
|
require_relative "../extension"
|
212
218
|
|
213
|
-
#
|
219
|
+
# Detailed version info about Nokogiri and the installed extension dependencies.
|
214
220
|
VERSION_INFO = VersionInfo.instance.to_hash
|
215
221
|
end
|
data/lib/nokogiri/version.rb
CHANGED