nokogiri 1.16.8 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +61 -23
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +11 -8
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -3,60 +3,45 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module HTML4
|
5
5
|
###
|
6
|
-
# Nokogiri
|
6
|
+
# Nokogiri provides a SAX parser to process HTML4 which will provide HTML recovery
|
7
|
+
# ("autocorrection") features.
|
7
8
|
#
|
8
9
|
# See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
|
9
10
|
#
|
10
11
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
12
|
+
#
|
11
13
|
module SAX
|
12
14
|
###
|
13
|
-
# This
|
15
|
+
# This parser is a SAX style parser that reads its input as it deems necessary. The parser
|
16
|
+
# takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an HTML input, sends
|
17
|
+
# messages to the Nokogiri::XML::SAX::Document.
|
18
|
+
#
|
19
|
+
# ⚠ This is an HTML4 parser and so may not support some HTML5 features and behaviors.
|
14
20
|
#
|
15
21
|
# Here is a basic usage example:
|
16
22
|
#
|
17
|
-
# class
|
23
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
18
24
|
# def start_element name, attributes = []
|
19
25
|
# puts "found a #{name}"
|
20
26
|
# end
|
21
27
|
# end
|
22
28
|
#
|
23
|
-
# parser = Nokogiri::HTML4::SAX::Parser.new(
|
24
|
-
#
|
29
|
+
# parser = Nokogiri::HTML4::SAX::Parser.new(MyHandler.new)
|
30
|
+
#
|
31
|
+
# # Hand an IO object to the parser, which will read the HTML from the IO.
|
32
|
+
# File.open(path_to_html) do |f|
|
33
|
+
# parser.parse(f)
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# For more information on \SAX parsers, see Nokogiri::XML::SAX or the parent class
|
37
|
+
# Nokogiri::XML::SAX::Parser.
|
38
|
+
#
|
39
|
+
# Also see Nokogiri::XML::SAX::Document for the available events.
|
25
40
|
#
|
26
|
-
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
27
41
|
class Parser < Nokogiri::XML::SAX::Parser
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
raise TypeError unless String === data
|
32
|
-
return if data.empty?
|
33
|
-
|
34
|
-
ctx = ParserContext.memory(data, encoding)
|
35
|
-
yield ctx if block_given?
|
36
|
-
ctx.parse_with(self)
|
37
|
-
end
|
38
|
-
|
39
|
-
###
|
40
|
-
# Parse given +io+
|
41
|
-
def parse_io(io, encoding = "UTF-8")
|
42
|
-
check_encoding(encoding)
|
43
|
-
@encoding = encoding
|
44
|
-
ctx = ParserContext.io(io, ENCODINGS[encoding])
|
45
|
-
yield ctx if block_given?
|
46
|
-
ctx.parse_with(self)
|
47
|
-
end
|
48
|
-
|
49
|
-
###
|
50
|
-
# Parse a file with +filename+
|
51
|
-
def parse_file(filename, encoding = "UTF-8")
|
52
|
-
raise ArgumentError unless filename
|
53
|
-
raise Errno::ENOENT unless File.exist?(filename)
|
54
|
-
raise Errno::EISDIR if File.directory?(filename)
|
55
|
-
|
56
|
-
ctx = ParserContext.file(filename, encoding)
|
57
|
-
yield ctx if block_given?
|
58
|
-
ctx.parse_with(self)
|
59
|
-
end
|
42
|
+
# this class inherits its behavior from Nokogiri::XML::SAX::Parser, but note that superclass
|
43
|
+
# uses Nokogiri::ClassResolver to use HTML4::SAX::ParserContext as the context class for
|
44
|
+
# this class, which is where the real behavioral differences are implemented.
|
60
45
|
end
|
61
46
|
end
|
62
47
|
end
|
@@ -4,16 +4,11 @@ module Nokogiri
|
|
4
4
|
module HTML4
|
5
5
|
module SAX
|
6
6
|
###
|
7
|
-
# Context
|
8
|
-
#
|
7
|
+
# Context object to invoke the HTML4 SAX parser on the SAX::Document handler.
|
8
|
+
#
|
9
|
+
# 💡 This class is usually not instantiated by the user. Use Nokogiri::HTML4::SAX::Parser
|
10
|
+
# instead.
|
9
11
|
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
10
|
-
def self.new(thing, encoding = "UTF-8")
|
11
|
-
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
12
|
-
super
|
13
|
-
else
|
14
|
-
memory(thing, encoding)
|
15
|
-
end
|
16
|
-
end
|
17
12
|
end
|
18
13
|
end
|
19
14
|
end
|
data/lib/nokogiri/html4.rb
CHANGED
@@ -3,12 +3,9 @@
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
class << self
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
10
|
-
def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
11
|
-
Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
|
6
|
+
# Convenience method for Nokogiri::HTML4::Document.parse
|
7
|
+
def HTML4(...)
|
8
|
+
Nokogiri::HTML4::Document.parse(...)
|
12
9
|
end
|
13
10
|
end
|
14
11
|
|
@@ -18,16 +15,14 @@ module Nokogiri
|
|
18
15
|
# for parsing HTML.
|
19
16
|
module HTML4
|
20
17
|
class << self
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
Document.parse(input, url, encoding, options, &block)
|
18
|
+
# Convenience method for Nokogiri::HTML4::Document.parse
|
19
|
+
def parse(...)
|
20
|
+
Document.parse(...)
|
25
21
|
end
|
26
22
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
HTML4::DocumentFragment.parse(string, encoding, options, &block)
|
23
|
+
# Convenience method for Nokogiri::HTML4::DocumentFragment.parse
|
24
|
+
def fragment(...)
|
25
|
+
HTML4::DocumentFragment.parse(...)
|
31
26
|
end
|
32
27
|
end
|
33
28
|
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML5
|
5
|
+
###
|
6
|
+
# Nokogiri HTML5 builder is used for building HTML documents. It is very similar to the
|
7
|
+
# Nokogiri::XML::Builder. In fact, you should go read the documentation for
|
8
|
+
# Nokogiri::XML::Builder before reading this documentation.
|
9
|
+
#
|
10
|
+
# The construction behavior is identical to HTML4::Builder, but HTML5 documents implement the
|
11
|
+
# [HTML5 standard's serialization
|
12
|
+
# algorithm](https://www.w3.org/TR/2008/WD-html5-20080610/serializing.html).
|
13
|
+
#
|
14
|
+
# == Synopsis:
|
15
|
+
#
|
16
|
+
# Create an HTML5 document with a body that has an onload attribute, and a
|
17
|
+
# span tag with a class of "bold" that has content of "Hello world".
|
18
|
+
#
|
19
|
+
# builder = Nokogiri::HTML5::Builder.new do |doc|
|
20
|
+
# doc.html {
|
21
|
+
# doc.body(:onload => 'some_func();') {
|
22
|
+
# doc.span.bold {
|
23
|
+
# doc.text "Hello world"
|
24
|
+
# }
|
25
|
+
# }
|
26
|
+
# }
|
27
|
+
# end
|
28
|
+
# puts builder.to_html
|
29
|
+
#
|
30
|
+
# The HTML5 builder inherits from the XML builder, so make sure to read the
|
31
|
+
# Nokogiri::XML::Builder documentation.
|
32
|
+
class Builder < Nokogiri::XML::Builder
|
33
|
+
###
|
34
|
+
# Convert the builder to HTML
|
35
|
+
def to_html
|
36
|
+
@doc.to_html
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -43,41 +43,69 @@ module Nokogiri
|
|
43
43
|
|
44
44
|
# Get the parser's quirks mode value. See HTML5::QuirksMode.
|
45
45
|
#
|
46
|
-
# This method returns
|
46
|
+
# This method returns +nil+ if the parser was not invoked (e.g., Nokogiri::HTML5::Document.new).
|
47
47
|
#
|
48
48
|
# Since v1.14.0
|
49
49
|
attr_reader :quirks_mode
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# :call-seq:
|
53
|
-
# parse(input)
|
54
|
-
# parse(input, url
|
55
|
-
# parse(input,
|
53
|
+
# parse(input) { |options| ... } → HTML5::Document
|
54
|
+
# parse(input, url: encoding:) { |options| ... } → HTML5::Document
|
55
|
+
# parse(input, **options) → HTML5::Document
|
56
56
|
#
|
57
|
-
# Parse HTML5
|
57
|
+
# Parse \HTML input with a parser compliant with the HTML5 spec. This method uses the
|
58
|
+
# encoding of +input+ if it can be determined, or else falls back to the +encoding:+
|
59
|
+
# parameter.
|
58
60
|
#
|
59
|
-
# [Parameters]
|
60
|
-
# - +input+
|
61
|
-
# IO, or StringIO.
|
61
|
+
# [Required Parameters]
|
62
|
+
# - +input+ (String | IO) the \HTML content to be parsed.
|
62
63
|
#
|
63
|
-
#
|
64
|
+
# [Optional Parameters]
|
65
|
+
# - +url:+ (String) the base URI of the document.
|
64
66
|
#
|
65
|
-
#
|
66
|
-
#
|
67
|
+
# [Optional Keyword Arguments]
|
68
|
+
# - +encoding:+ (Encoding) The name of the encoding that should be used when processing the
|
69
|
+
# document. When not provided, the encoding will be determined based on the document
|
70
|
+
# content.
|
67
71
|
#
|
68
|
-
# - +
|
69
|
-
#
|
70
|
-
# +:max_tree_depth+ and +:max_attributes+, described at Nokogiri::HTML5.
|
72
|
+
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
|
73
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
|
71
74
|
#
|
72
|
-
#
|
73
|
-
# Nokogiri::
|
75
|
+
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
|
76
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
|
74
77
|
#
|
75
|
-
# - +
|
76
|
-
# Nokogiri::
|
78
|
+
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
|
79
|
+
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
|
80
|
+
#
|
81
|
+
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
|
82
|
+
# elements as text. (default +false+)
|
83
|
+
#
|
84
|
+
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
|
85
|
+
#
|
86
|
+
# [Yields]
|
87
|
+
# If present, the block will be passed a Hash object to modify with parse options before the
|
88
|
+
# input is parsed. See rdoc-ref:HTML5@Parsing+options for a list of available options.
|
89
|
+
#
|
90
|
+
# ⚠ Note that +url:+ and +encoding:+ cannot be set by the configuration block.
|
77
91
|
#
|
78
92
|
# [Returns] Nokogiri::HTML5::Document
|
79
93
|
#
|
80
|
-
|
94
|
+
# *Example:* Parse a string with a specific encoding and custom max errors limit.
|
95
|
+
#
|
96
|
+
# Nokogiri::HTML5::Document.parse(socket, encoding: "ISO-8859-1", max_errors: 10)
|
97
|
+
#
|
98
|
+
# *Example:* Parse a string setting the +:parse_noscript_content_as_text+ option using the
|
99
|
+
# configuration block parameter.
|
100
|
+
#
|
101
|
+
# Nokogiri::HTML5::Document.parse(input) { |c| c[:parse_noscript_content_as_text] = true }
|
102
|
+
#
|
103
|
+
def parse(
|
104
|
+
string_or_io,
|
105
|
+
url_ = nil, encoding_ = nil,
|
106
|
+
url: url_, encoding: encoding_,
|
107
|
+
**options, &block
|
108
|
+
)
|
81
109
|
yield options if block
|
82
110
|
string_or_io = "" unless string_or_io
|
83
111
|
|
@@ -92,35 +120,37 @@ module Nokogiri
|
|
92
120
|
raise ArgumentError, "not a string or IO object"
|
93
121
|
end
|
94
122
|
|
95
|
-
do_parse(string_or_io, url, encoding, options)
|
123
|
+
do_parse(string_or_io, url, encoding, **options)
|
96
124
|
end
|
97
125
|
|
98
126
|
# Create a new document from an IO object.
|
99
127
|
#
|
100
128
|
# 💡 Most users should prefer Document.parse to this method.
|
101
|
-
def read_io(io,
|
129
|
+
def read_io(io, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
|
102
130
|
raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
|
103
131
|
|
104
|
-
do_parse(io, url, encoding, options)
|
132
|
+
do_parse(io, url, encoding, **options)
|
105
133
|
end
|
106
134
|
|
107
135
|
# Create a new document from a String.
|
108
136
|
#
|
109
137
|
# 💡 Most users should prefer Document.parse to this method.
|
110
|
-
def read_memory(string,
|
138
|
+
def read_memory(string, url_ = nil, encoding_ = nil, url: url_, encoding: encoding_, **options)
|
111
139
|
raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
|
112
140
|
|
113
|
-
do_parse(string, url, encoding, options)
|
141
|
+
do_parse(string, url, encoding, **options)
|
114
142
|
end
|
115
143
|
|
116
144
|
private
|
117
145
|
|
118
|
-
def do_parse(string_or_io, url, encoding, options)
|
146
|
+
def do_parse(string_or_io, url, encoding, **options)
|
119
147
|
string = HTML5.read_and_encode(string_or_io, encoding)
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
148
|
+
|
149
|
+
options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
|
150
|
+
options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
|
151
|
+
options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
|
152
|
+
|
153
|
+
doc = Nokogiri::Gumbo.parse(string, url, self, **options)
|
124
154
|
doc.encoding = "UTF-8"
|
125
155
|
doc
|
126
156
|
end
|
@@ -142,7 +172,8 @@ module Nokogiri
|
|
142
172
|
# - +markup+ (String) The HTML5 markup fragment to be parsed
|
143
173
|
#
|
144
174
|
# [Returns]
|
145
|
-
# Nokogiri::HTML5::DocumentFragment. This object's children will be empty if
|
175
|
+
# Nokogiri::HTML5::DocumentFragment. This object's children will be empty if +markup+ is not
|
176
|
+
# passed, is empty, or is +nil+.
|
146
177
|
#
|
147
178
|
def fragment(markup = nil)
|
148
179
|
DocumentFragment.new(self, markup)
|
@@ -25,27 +25,145 @@ module Nokogiri
|
|
25
25
|
#
|
26
26
|
# 💡 HTML5 functionality is not available when running JRuby.
|
27
27
|
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
|
28
|
+
class << self
|
29
|
+
# :call-seq:
|
30
|
+
# parse(input, **options) → HTML5::DocumentFragment
|
31
|
+
#
|
32
|
+
# Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment. This
|
33
|
+
# method creates a new, empty HTML5::Document to contain the fragment.
|
34
|
+
#
|
35
|
+
# [Parameters]
|
36
|
+
# - +input+ (String | IO) The HTML5 document fragment to parse.
|
37
|
+
#
|
38
|
+
# [Optional Keyword Arguments]
|
39
|
+
# - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
|
40
|
+
# used when processing the document. When not provided, the encoding will be determined
|
41
|
+
# based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
|
42
|
+
# encoding is handled by the parser.
|
43
|
+
#
|
44
|
+
# - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, "in
|
45
|
+
# context" of which to parse the document fragment. See below for more
|
46
|
+
# information. (default +"body"+)
|
47
|
+
#
|
48
|
+
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
|
49
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
|
50
|
+
#
|
51
|
+
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
|
52
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
|
53
|
+
#
|
54
|
+
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
|
55
|
+
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
|
56
|
+
#
|
57
|
+
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
|
58
|
+
# elements as text. (default +false+)
|
59
|
+
#
|
60
|
+
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
|
61
|
+
#
|
62
|
+
# [Returns] Nokogiri::HTML5::DocumentFragment
|
63
|
+
#
|
64
|
+
# === Context \Node
|
65
|
+
#
|
66
|
+
# If a context node is specified using +context:+, then the parser will behave as if that
|
67
|
+
# Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
|
68
|
+
#
|
69
|
+
def parse(
|
70
|
+
input,
|
71
|
+
encoding_ = nil, positional_options_hash = nil,
|
72
|
+
encoding: encoding_, **options
|
73
|
+
)
|
74
|
+
unless positional_options_hash.nil? || positional_options_hash.empty?
|
75
|
+
options.merge!(positional_options_hash)
|
76
|
+
end
|
77
|
+
|
78
|
+
context = options.delete(:context)
|
79
|
+
|
80
|
+
document = HTML5::Document.new
|
81
|
+
document.encoding = "UTF-8"
|
82
|
+
input = HTML5.read_and_encode(input, encoding)
|
83
|
+
|
84
|
+
new(document, input, context, options)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
28
88
|
attr_accessor :document
|
29
89
|
attr_accessor :errors
|
30
90
|
|
31
91
|
# Get the parser's quirks mode value. See HTML5::QuirksMode.
|
32
92
|
#
|
33
|
-
# This method returns `nil` if the parser was not invoked (e.g.,
|
93
|
+
# This method returns `nil` if the parser was not invoked (e.g.,
|
94
|
+
# `Nokogiri::HTML5::DocumentFragment.new(doc)`).
|
34
95
|
#
|
35
96
|
# Since v1.14.0
|
36
97
|
attr_reader :quirks_mode
|
37
98
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
99
|
+
#
|
100
|
+
# :call-seq:
|
101
|
+
# new(document, input, **options) → HTML5::DocumentFragment
|
102
|
+
#
|
103
|
+
# Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment.
|
104
|
+
#
|
105
|
+
# 💡 It's recommended to use either HTML5::DocumentFragment.parse or HTML5::Node#fragment
|
106
|
+
# rather than call this method directly.
|
107
|
+
#
|
108
|
+
# [Required Parameters]
|
109
|
+
# - +document+ (HTML5::Document) The parent document to associate the returned fragment with.
|
110
|
+
#
|
111
|
+
# [Optional Parameters]
|
112
|
+
# - +input+ (String) The content to be parsed.
|
113
|
+
#
|
114
|
+
# [Optional Keyword Arguments]
|
115
|
+
# - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
|
116
|
+
# used when processing the document. When not provided, the encoding will be determined
|
117
|
+
# based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
|
118
|
+
# encoding is handled by the parser.
|
119
|
+
#
|
120
|
+
# - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, in
|
121
|
+
# which to parse the document fragment. (default +"body"+)
|
122
|
+
#
|
123
|
+
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
|
124
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
|
125
|
+
#
|
126
|
+
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
|
127
|
+
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
|
128
|
+
#
|
129
|
+
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
|
130
|
+
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
|
131
|
+
#
|
132
|
+
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
|
133
|
+
# elements as text. (default +false+)
|
134
|
+
#
|
135
|
+
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
|
136
|
+
#
|
137
|
+
# [Returns] HTML5::DocumentFragment
|
138
|
+
#
|
139
|
+
# === Context \Node
|
140
|
+
#
|
141
|
+
# If a context node is specified using +context:+, then the parser will behave as if that
|
142
|
+
# Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
|
143
|
+
#
|
144
|
+
def initialize(
|
145
|
+
doc, input = nil,
|
146
|
+
context_ = nil, positional_options_hash = nil,
|
147
|
+
context: context_,
|
148
|
+
**options
|
149
|
+
) # rubocop:disable Lint/MissingSuper
|
150
|
+
unless positional_options_hash.nil? || positional_options_hash.empty?
|
151
|
+
options.merge!(positional_options_hash)
|
152
|
+
end
|
153
|
+
|
154
|
+
@document = doc
|
155
|
+
@errors = []
|
156
|
+
return self unless input
|
157
|
+
|
158
|
+
input = Nokogiri::HTML5.read_and_encode(input, nil)
|
159
|
+
|
160
|
+
context = options.delete(:context) if options.key?(:context)
|
161
|
+
|
162
|
+
options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
|
163
|
+
options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
|
164
|
+
options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
|
165
|
+
|
166
|
+
Nokogiri::Gumbo.fragment(self, input, context, **options)
|
49
167
|
end
|
50
168
|
|
51
169
|
def serialize(options = {}, &block) # :nodoc:
|
@@ -54,14 +172,6 @@ module Nokogiri
|
|
54
172
|
XML::Node.instance_method(:serialize).bind_call(self, options, &block)
|
55
173
|
end
|
56
174
|
|
57
|
-
# Parse a document fragment from +tags+, returning a Nodeset.
|
58
|
-
def self.parse(tags, encoding = nil, options = {})
|
59
|
-
doc = HTML5::Document.new
|
60
|
-
tags = HTML5.read_and_encode(tags, encoding)
|
61
|
-
doc.encoding = "UTF-8"
|
62
|
-
new(doc, tags, nil, options)
|
63
|
-
end
|
64
|
-
|
65
175
|
def extract_params(params) # :nodoc:
|
66
176
|
handler = params.find do |param|
|
67
177
|
![Hash, String, Symbol].include?(param.class)
|
data/lib/nokogiri/html5/node.rb
CHANGED
@@ -29,7 +29,7 @@ module Nokogiri
|
|
29
29
|
# 💡 HTML5 functionality is not available when running JRuby.
|
30
30
|
module Node
|
31
31
|
def inner_html(options = {})
|
32
|
-
return super
|
32
|
+
return super unless document.is_a?(HTML5::Document)
|
33
33
|
|
34
34
|
result = options[:preserve_newline] && prepend_newline? ? +"\n" : +""
|
35
35
|
result << children.map { |child| child.to_html(options) }.join
|
@@ -37,7 +37,7 @@ module Nokogiri
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def write_to(io, *options)
|
40
|
-
return super
|
40
|
+
return super unless document.is_a?(HTML5::Document)
|
41
41
|
|
42
42
|
options = options.first.is_a?(Hash) ? options.shift : {}
|
43
43
|
encoding = options[:encoding] || options[0]
|
@@ -68,7 +68,7 @@ module Nokogiri
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def fragment(tags)
|
71
|
-
return super
|
71
|
+
return super unless document.is_a?(HTML5::Document)
|
72
72
|
|
73
73
|
DocumentFragment.new(document, tags, self)
|
74
74
|
end
|
@@ -81,7 +81,7 @@ module Nokogiri
|
|
81
81
|
# annoying with attribute names like xml:lang since libxml2 will
|
82
82
|
# actually create the xml namespace if it doesn't exist already.
|
83
83
|
def add_child_node_and_reparent_attrs(node)
|
84
|
-
return super
|
84
|
+
return super unless document.is_a?(HTML5::Document)
|
85
85
|
|
86
86
|
# I'm not sure what this method is supposed to do. Reparenting
|
87
87
|
# namespaces is handled by libxml2, including child namespaces which
|