nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
module HTML4
|
8
|
+
class Document < Nokogiri::XML::Document
|
9
|
+
###
|
10
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
11
|
+
# then nil is returned.
|
12
|
+
def meta_encoding
|
13
|
+
if (meta = at_xpath("//meta[@charset]"))
|
14
|
+
meta[:charset]
|
15
|
+
elsif (meta = meta_content_type)
|
16
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Set the meta tag encoding for this document.
|
22
|
+
#
|
23
|
+
# If an meta encoding tag is already present, its content is
|
24
|
+
# replaced with the given text.
|
25
|
+
#
|
26
|
+
# Otherwise, this method tries to create one at an appropriate
|
27
|
+
# place supplying head and/or html elements as necessary, which
|
28
|
+
# is inside a head element if any, and before any text node or
|
29
|
+
# content element (typically <body>) if any.
|
30
|
+
#
|
31
|
+
# The result when trying to set an encoding that is different
|
32
|
+
# from the document encoding is undefined.
|
33
|
+
#
|
34
|
+
# Beware in CRuby, that libxml2 automatically inserts a meta tag
|
35
|
+
# into a head element.
|
36
|
+
def meta_encoding=(encoding)
|
37
|
+
if (meta = meta_content_type)
|
38
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
39
|
+
encoding
|
40
|
+
elsif (meta = at_xpath("//meta[@charset]"))
|
41
|
+
meta["charset"] = encoding
|
42
|
+
else
|
43
|
+
meta = XML::Node.new("meta", self)
|
44
|
+
if (dtd = internal_subset) && dtd.html5_dtd?
|
45
|
+
meta["charset"] = encoding
|
46
|
+
else
|
47
|
+
meta["http-equiv"] = "Content-Type"
|
48
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
49
|
+
end
|
50
|
+
|
51
|
+
if (head = at_xpath("//head"))
|
52
|
+
head.prepend_child(meta)
|
53
|
+
else
|
54
|
+
set_metadata_element(meta)
|
55
|
+
end
|
56
|
+
encoding
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def meta_content_type
|
61
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
62
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private :meta_content_type
|
66
|
+
|
67
|
+
###
|
68
|
+
# Get the title string of this document. Return nil if there is
|
69
|
+
# no title tag.
|
70
|
+
def title
|
71
|
+
(title = at_xpath("//title")) && title.inner_text
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
# Set the title string of this document.
|
76
|
+
#
|
77
|
+
# If a title element is already present, its content is replaced
|
78
|
+
# with the given text.
|
79
|
+
#
|
80
|
+
# Otherwise, this method tries to create one at an appropriate
|
81
|
+
# place supplying head and/or html elements as necessary, which
|
82
|
+
# is inside a head element if any, right after a meta
|
83
|
+
# encoding/charset tag if any, and before any text node or
|
84
|
+
# content element (typically <body>) if any.
|
85
|
+
def title=(text)
|
86
|
+
tnode = XML::Text.new(text, self)
|
87
|
+
if (title = at_xpath("//title"))
|
88
|
+
title.children = tnode
|
89
|
+
return text
|
90
|
+
end
|
91
|
+
|
92
|
+
title = XML::Node.new("title", self) << tnode
|
93
|
+
if (head = at_xpath("//head"))
|
94
|
+
head << title
|
95
|
+
elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
|
96
|
+
# better put after charset declaration
|
97
|
+
meta.add_next_sibling(title)
|
98
|
+
else
|
99
|
+
set_metadata_element(title)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
|
104
|
+
if (head = at_xpath("//head"))
|
105
|
+
head << element
|
106
|
+
elsif (html = at_xpath("//html"))
|
107
|
+
head = html.prepend_child(XML::Node.new("head", self))
|
108
|
+
head.prepend_child(element)
|
109
|
+
elsif (first = children.find do |node|
|
110
|
+
case node
|
111
|
+
when XML::Element, XML::Text
|
112
|
+
true
|
113
|
+
end
|
114
|
+
end)
|
115
|
+
# We reach here only if the underlying document model
|
116
|
+
# allows <html>/<head> elements to be omitted and does not
|
117
|
+
# automatically supply them.
|
118
|
+
first.add_previous_sibling(element)
|
119
|
+
else
|
120
|
+
html = add_child(XML::Node.new("html", self))
|
121
|
+
head = html.add_child(XML::Node.new("head", self))
|
122
|
+
head.prepend_child(element)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
private :set_metadata_element
|
126
|
+
|
127
|
+
####
|
128
|
+
# Serialize Node using +options+. Save options can also be set using a block.
|
129
|
+
#
|
130
|
+
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
|
131
|
+
#
|
132
|
+
# These two statements are equivalent:
|
133
|
+
#
|
134
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
135
|
+
#
|
136
|
+
# or
|
137
|
+
#
|
138
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
139
|
+
# config.format.as_xml
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
def serialize(options = {})
|
143
|
+
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
144
|
+
super
|
145
|
+
end
|
146
|
+
|
147
|
+
####
|
148
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
149
|
+
def fragment(tags = nil)
|
150
|
+
DocumentFragment.new(self, tags, root)
|
151
|
+
end
|
152
|
+
|
153
|
+
# :call-seq:
|
154
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
155
|
+
#
|
156
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
157
|
+
#
|
158
|
+
# See XPathVisitor for more information.
|
159
|
+
def xpath_doctype
|
160
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
|
161
|
+
end
|
162
|
+
|
163
|
+
class << self
|
164
|
+
###
|
165
|
+
# Parse HTML. +string_or_io+ may be a String, or any object that
|
166
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
167
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
168
|
+
# encoding that should be used when processing the document. +options+
|
169
|
+
# is a number that sets options in the parser, such as
|
170
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
171
|
+
# Nokogiri::XML::ParseOptions.
|
172
|
+
def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
173
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
174
|
+
yield options if block_given?
|
175
|
+
|
176
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
177
|
+
|
178
|
+
if string_or_io.respond_to?(:encoding)
|
179
|
+
unless string_or_io.encoding == Encoding::ASCII_8BIT
|
180
|
+
encoding ||= string_or_io.encoding.name
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
if string_or_io.respond_to?(:read)
|
185
|
+
if string_or_io.is_a?(Pathname)
|
186
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
187
|
+
string_or_io = string_or_io.expand_path.open
|
188
|
+
url ||= string_or_io.path
|
189
|
+
end
|
190
|
+
|
191
|
+
unless encoding
|
192
|
+
string_or_io = EncodingReader.new(string_or_io)
|
193
|
+
begin
|
194
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
195
|
+
rescue EncodingReader::EncodingFound => e
|
196
|
+
encoding = e.found_encoding
|
197
|
+
end
|
198
|
+
end
|
199
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
200
|
+
end
|
201
|
+
|
202
|
+
# read_memory pukes on empty docs
|
203
|
+
if string_or_io.nil? || string_or_io.empty?
|
204
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
205
|
+
end
|
206
|
+
|
207
|
+
encoding ||= EncodingReader.detect_encoding(string_or_io)
|
208
|
+
|
209
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML4
|
5
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
6
|
+
####
|
7
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
8
|
+
def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
9
|
+
doc = HTML4::Document.new
|
10
|
+
|
11
|
+
encoding ||= if tags.respond_to?(:encoding)
|
12
|
+
encoding = tags.encoding
|
13
|
+
if encoding == ::Encoding::ASCII_8BIT
|
14
|
+
"UTF-8"
|
15
|
+
else
|
16
|
+
encoding.name
|
17
|
+
end
|
18
|
+
else
|
19
|
+
"UTF-8"
|
20
|
+
end
|
21
|
+
|
22
|
+
doc.encoding = encoding
|
23
|
+
|
24
|
+
new(doc, tags, nil, options, &block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
28
|
+
return self unless tags
|
29
|
+
|
30
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
31
|
+
yield options if block_given?
|
32
|
+
|
33
|
+
if ctx
|
34
|
+
preexisting_errors = document.errors.dup
|
35
|
+
node_set = ctx.parse("<div>#{tags}</div>", options)
|
36
|
+
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
|
37
|
+
self.errors = document.errors - preexisting_errors
|
38
|
+
else
|
39
|
+
# This is a horrible hack, but I don't care
|
40
|
+
path = if /^\s*?<body/i.match?(tags)
|
41
|
+
"/html/body"
|
42
|
+
else
|
43
|
+
"/html/body/node()"
|
44
|
+
end
|
45
|
+
|
46
|
+
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
|
47
|
+
temp_doc.xpath(path).each { |child| child.parent = self }
|
48
|
+
self.errors = temp_doc.errors
|
49
|
+
end
|
50
|
+
children
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML4
|
5
|
+
class ElementDescription
|
6
|
+
###
|
7
|
+
# Is this element a block element?
|
8
|
+
def block?
|
9
|
+
!inline?
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Convert this description to a string
|
14
|
+
def to_s
|
15
|
+
"#{name}: #{description}"
|
16
|
+
end
|
17
|
+
|
18
|
+
###
|
19
|
+
# Inspection information
|
20
|
+
def inspect
|
21
|
+
"#<#{self.class.name}: #{name} #{description}>"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|