nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,446 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
####
|
7
|
+
# A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
|
8
|
+
# a NodeSet is return as a result of searching a Document via
|
9
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
|
10
|
+
class NodeSet
|
11
|
+
include Nokogiri::XML::Searchable
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
# The Document this NodeSet is associated with
|
15
|
+
attr_accessor :document
|
16
|
+
|
17
|
+
alias_method :clone, :dup
|
18
|
+
|
19
|
+
# Create a NodeSet with +document+ defaulting to +list+
|
20
|
+
def initialize(document, list = [])
|
21
|
+
@document = document
|
22
|
+
document.decorate(self)
|
23
|
+
list.each { |x| self << x }
|
24
|
+
yield self if block_given?
|
25
|
+
end
|
26
|
+
|
27
|
+
###
|
28
|
+
# Get the first element of the NodeSet.
|
29
|
+
def first(n = nil)
|
30
|
+
return self[0] unless n
|
31
|
+
|
32
|
+
list = []
|
33
|
+
[n, length].min.times { |i| list << self[i] }
|
34
|
+
list
|
35
|
+
end
|
36
|
+
|
37
|
+
###
|
38
|
+
# Get the last element of the NodeSet.
|
39
|
+
def last
|
40
|
+
self[-1]
|
41
|
+
end
|
42
|
+
|
43
|
+
###
|
44
|
+
# Is this NodeSet empty?
|
45
|
+
def empty?
|
46
|
+
length == 0
|
47
|
+
end
|
48
|
+
|
49
|
+
###
|
50
|
+
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
51
|
+
def index(node = nil)
|
52
|
+
if node
|
53
|
+
warn("given block not used") if block_given?
|
54
|
+
each_with_index { |member, j| return j if member == node }
|
55
|
+
elsif block_given?
|
56
|
+
each_with_index { |member, j| return j if yield(member) }
|
57
|
+
end
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
###
|
62
|
+
# Insert +datum+ before the first Node in this NodeSet
|
63
|
+
def before(datum)
|
64
|
+
first.before(datum)
|
65
|
+
end
|
66
|
+
|
67
|
+
###
|
68
|
+
# Insert +datum+ after the last Node in this NodeSet
|
69
|
+
def after(datum)
|
70
|
+
last.after(datum)
|
71
|
+
end
|
72
|
+
|
73
|
+
alias_method :<<, :push
|
74
|
+
alias_method :remove, :unlink
|
75
|
+
|
76
|
+
###
|
77
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
78
|
+
#
|
79
|
+
# Search this node set for CSS +rules+. +rules+ must be one or more CSS
|
80
|
+
# selectors. For example:
|
81
|
+
#
|
82
|
+
# For more information see Nokogiri::XML::Searchable#css
|
83
|
+
def css(*args)
|
84
|
+
rules, handler, ns, _ = extract_params(args)
|
85
|
+
paths = css_rules_to_xpath(rules, ns)
|
86
|
+
|
87
|
+
inject(NodeSet.new(document)) do |set, node|
|
88
|
+
set + xpath_internal(node, paths, handler, ns, nil)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
###
|
93
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
94
|
+
#
|
95
|
+
# Search this node set for XPath +paths+. +paths+ must be one or more XPath
|
96
|
+
# queries.
|
97
|
+
#
|
98
|
+
# For more information see Nokogiri::XML::Searchable#xpath
|
99
|
+
def xpath(*args)
|
100
|
+
paths, handler, ns, binds = extract_params(args)
|
101
|
+
|
102
|
+
inject(NodeSet.new(document)) do |set, node|
|
103
|
+
set + xpath_internal(node, paths, handler, ns, binds)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
###
|
108
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
109
|
+
#
|
110
|
+
# Search this object for +paths+, and return only the first
|
111
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
112
|
+
#
|
113
|
+
# See Searchable#search for more information.
|
114
|
+
#
|
115
|
+
# Or, if passed an integer, index into the NodeSet:
|
116
|
+
#
|
117
|
+
# node_set.at(3) # same as node_set[3]
|
118
|
+
#
|
119
|
+
def at(*args)
|
120
|
+
if args.length == 1 && args.first.is_a?(Numeric)
|
121
|
+
return self[args.first]
|
122
|
+
end
|
123
|
+
|
124
|
+
super(*args)
|
125
|
+
end
|
126
|
+
alias_method :%, :at
|
127
|
+
|
128
|
+
###
|
129
|
+
# Filter this list for nodes that match +expr+
|
130
|
+
def filter(expr)
|
131
|
+
find_all { |node| node.matches?(expr) }
|
132
|
+
end
|
133
|
+
|
134
|
+
###
|
135
|
+
# Add the class attribute +name+ to all Node objects in the
|
136
|
+
# NodeSet.
|
137
|
+
#
|
138
|
+
# See Nokogiri::XML::Node#add_class for more information.
|
139
|
+
def add_class(name)
|
140
|
+
each do |el|
|
141
|
+
el.add_class(name)
|
142
|
+
end
|
143
|
+
self
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
# Append the class attribute +name+ to all Node objects in the
|
148
|
+
# NodeSet.
|
149
|
+
#
|
150
|
+
# See Nokogiri::XML::Node#append_class for more information.
|
151
|
+
def append_class(name)
|
152
|
+
each do |el|
|
153
|
+
el.append_class(name)
|
154
|
+
end
|
155
|
+
self
|
156
|
+
end
|
157
|
+
|
158
|
+
###
|
159
|
+
# Remove the class attribute +name+ from all Node objects in the
|
160
|
+
# NodeSet.
|
161
|
+
#
|
162
|
+
# See Nokogiri::XML::Node#remove_class for more information.
|
163
|
+
def remove_class(name = nil)
|
164
|
+
each do |el|
|
165
|
+
el.remove_class(name)
|
166
|
+
end
|
167
|
+
self
|
168
|
+
end
|
169
|
+
|
170
|
+
###
|
171
|
+
# Set attributes on each Node in the NodeSet, or get an
|
172
|
+
# attribute from the first Node in the NodeSet.
|
173
|
+
#
|
174
|
+
# To get an attribute from the first Node in a NodeSet:
|
175
|
+
#
|
176
|
+
# node_set.attr("href") # => "https://www.nokogiri.org"
|
177
|
+
#
|
178
|
+
# Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
|
179
|
+
#
|
180
|
+
# To set an attribute on each node, +key+ can either be an
|
181
|
+
# attribute name, or a Hash of attribute names and values. When
|
182
|
+
# called as a setter, +#attr+ returns the NodeSet.
|
183
|
+
#
|
184
|
+
# If +key+ is an attribute name, then either +value+ or +block+
|
185
|
+
# must be passed.
|
186
|
+
#
|
187
|
+
# If +key+ is a Hash then attributes will be set for each
|
188
|
+
# key/value pair:
|
189
|
+
#
|
190
|
+
# node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
|
191
|
+
#
|
192
|
+
# If +value+ is passed, it will be used as the attribute value
|
193
|
+
# for all nodes:
|
194
|
+
#
|
195
|
+
# node_set.attr("href", "https://www.nokogiri.org")
|
196
|
+
#
|
197
|
+
# If +block+ is passed, it will be called on each Node object in
|
198
|
+
# the NodeSet and the return value used as the attribute value
|
199
|
+
# for that node:
|
200
|
+
#
|
201
|
+
# node_set.attr("class") { |node| node.name }
|
202
|
+
#
|
203
|
+
def attr(key, value = nil, &block)
|
204
|
+
unless key.is_a?(Hash) || (key && (value || block))
|
205
|
+
return first&.attribute(key)
|
206
|
+
end
|
207
|
+
|
208
|
+
hash = key.is_a?(Hash) ? key : { key => value }
|
209
|
+
|
210
|
+
hash.each do |k, v|
|
211
|
+
each do |node|
|
212
|
+
node[k] = v || yield(node)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
self
|
217
|
+
end
|
218
|
+
alias_method :set, :attr
|
219
|
+
alias_method :attribute, :attr
|
220
|
+
|
221
|
+
###
|
222
|
+
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
223
|
+
def remove_attr(name)
|
224
|
+
each { |el| el.delete(name) }
|
225
|
+
self
|
226
|
+
end
|
227
|
+
alias_method :remove_attribute, :remove_attr
|
228
|
+
|
229
|
+
###
|
230
|
+
# Iterate over each node, yielding to +block+
|
231
|
+
def each
|
232
|
+
return to_enum unless block_given?
|
233
|
+
|
234
|
+
0.upto(length - 1) do |x|
|
235
|
+
yield self[x]
|
236
|
+
end
|
237
|
+
self
|
238
|
+
end
|
239
|
+
|
240
|
+
###
|
241
|
+
# Get the inner text of all contained Node objects
|
242
|
+
#
|
243
|
+
# Note: This joins the text of all Node objects in the NodeSet:
|
244
|
+
#
|
245
|
+
# doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
|
246
|
+
# doc.css('d').text # => "foobar"
|
247
|
+
#
|
248
|
+
# Instead, if you want to return the text of all nodes in the NodeSet:
|
249
|
+
#
|
250
|
+
# doc.css('d').map(&:text) # => ["foo", "bar"]
|
251
|
+
#
|
252
|
+
# See Nokogiri::XML::Node#content for more information.
|
253
|
+
def inner_text
|
254
|
+
collect(&:inner_text).join("")
|
255
|
+
end
|
256
|
+
alias_method :text, :inner_text
|
257
|
+
|
258
|
+
###
|
259
|
+
# Get the inner html of all contained Node objects
|
260
|
+
def inner_html(*args)
|
261
|
+
collect { |j| j.inner_html(*args) }.join("")
|
262
|
+
end
|
263
|
+
|
264
|
+
# :call-seq:
|
265
|
+
# wrap(markup) -> self
|
266
|
+
# wrap(node) -> self
|
267
|
+
#
|
268
|
+
# Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
|
269
|
+
#
|
270
|
+
# [Parameters]
|
271
|
+
# - *markup* (String)
|
272
|
+
# Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
|
273
|
+
# node's parent, if it exists, is used as the context node for parsing; otherwise the
|
274
|
+
# associated document is used. If the parsed fragment has multiple roots, the first root
|
275
|
+
# node is used as the wrapper.
|
276
|
+
# - *node* (Nokogiri::XML::Node)
|
277
|
+
# An element that is `#dup`ed and used as the wrapper.
|
278
|
+
#
|
279
|
+
# [Returns] +self+, to support chaining.
|
280
|
+
#
|
281
|
+
# ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
|
282
|
+
# NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
|
283
|
+
# use by passing a +Node+ instead.
|
284
|
+
#
|
285
|
+
# Also see Node#wrap
|
286
|
+
#
|
287
|
+
# *Example* with a +String+ argument:
|
288
|
+
#
|
289
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
290
|
+
# <html><body>
|
291
|
+
# <a>a</a>
|
292
|
+
# <a>b</a>
|
293
|
+
# <a>c</a>
|
294
|
+
# <a>d</a>
|
295
|
+
# </body></html>
|
296
|
+
# HTML
|
297
|
+
# doc.css("a").wrap("<div></div>")
|
298
|
+
# doc.to_html
|
299
|
+
# # => <html><head></head><body>
|
300
|
+
# # <div><a>a</a></div>
|
301
|
+
# # <div><a>b</a></div>
|
302
|
+
# # <div><a>c</a></div>
|
303
|
+
# # <div><a>d</a></div>
|
304
|
+
# # </body></html>
|
305
|
+
#
|
306
|
+
# *Example* with a +Node+ argument
|
307
|
+
#
|
308
|
+
# 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
|
309
|
+
# having to reparse the wrapper markup for each node.
|
310
|
+
#
|
311
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
312
|
+
# <html><body>
|
313
|
+
# <a>a</a>
|
314
|
+
# <a>b</a>
|
315
|
+
# <a>c</a>
|
316
|
+
# <a>d</a>
|
317
|
+
# </body></html>
|
318
|
+
# HTML
|
319
|
+
# doc.css("a").wrap(doc.create_element("div"))
|
320
|
+
# doc.to_html
|
321
|
+
# # => <html><head></head><body>
|
322
|
+
# # <div><a>a</a></div>
|
323
|
+
# # <div><a>b</a></div>
|
324
|
+
# # <div><a>c</a></div>
|
325
|
+
# # <div><a>d</a></div>
|
326
|
+
# # </body></html>
|
327
|
+
#
|
328
|
+
def wrap(node_or_tags)
|
329
|
+
map { |node| node.wrap(node_or_tags) }
|
330
|
+
self
|
331
|
+
end
|
332
|
+
|
333
|
+
###
|
334
|
+
# Convert this NodeSet to a string.
|
335
|
+
def to_s
|
336
|
+
map(&:to_s).join
|
337
|
+
end
|
338
|
+
|
339
|
+
###
|
340
|
+
# Convert this NodeSet to HTML
|
341
|
+
def to_html(*args)
|
342
|
+
if Nokogiri.jruby?
|
343
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
344
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
345
|
+
args.insert(0, options)
|
346
|
+
end
|
347
|
+
if empty?
|
348
|
+
encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil) || document.encoding
|
349
|
+
"".encode(encoding)
|
350
|
+
else
|
351
|
+
map { |x| x.to_html(*args) }.join
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
###
|
356
|
+
# Convert this NodeSet to XHTML
|
357
|
+
def to_xhtml(*args)
|
358
|
+
map { |x| x.to_xhtml(*args) }.join
|
359
|
+
end
|
360
|
+
|
361
|
+
###
|
362
|
+
# Convert this NodeSet to XML
|
363
|
+
def to_xml(*args)
|
364
|
+
map { |x| x.to_xml(*args) }.join
|
365
|
+
end
|
366
|
+
|
367
|
+
alias_method :size, :length
|
368
|
+
alias_method :to_ary, :to_a
|
369
|
+
|
370
|
+
###
|
371
|
+
# Removes the last element from set and returns it, or +nil+ if
|
372
|
+
# the set is empty
|
373
|
+
def pop
|
374
|
+
return nil if length == 0
|
375
|
+
|
376
|
+
delete(last)
|
377
|
+
end
|
378
|
+
|
379
|
+
###
|
380
|
+
# Returns the first element of the NodeSet and removes it. Returns
|
381
|
+
# +nil+ if the set is empty.
|
382
|
+
def shift
|
383
|
+
return nil if length == 0
|
384
|
+
|
385
|
+
delete(first)
|
386
|
+
end
|
387
|
+
|
388
|
+
###
|
389
|
+
# Equality -- Two NodeSets are equal if the contain the same number
|
390
|
+
# of elements and if each element is equal to the corresponding
|
391
|
+
# element in the other NodeSet
|
392
|
+
def ==(other)
|
393
|
+
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
394
|
+
return false unless length == other.length
|
395
|
+
|
396
|
+
each_with_index do |node, i|
|
397
|
+
return false unless node == other[i]
|
398
|
+
end
|
399
|
+
true
|
400
|
+
end
|
401
|
+
|
402
|
+
###
|
403
|
+
# Returns a new NodeSet containing all the children of all the nodes in
|
404
|
+
# the NodeSet
|
405
|
+
def children
|
406
|
+
node_set = NodeSet.new(document)
|
407
|
+
each do |node|
|
408
|
+
node.children.each { |n| node_set.push(n) }
|
409
|
+
end
|
410
|
+
node_set
|
411
|
+
end
|
412
|
+
|
413
|
+
###
|
414
|
+
# Returns a new NodeSet containing all the nodes in the NodeSet
|
415
|
+
# in reverse order
|
416
|
+
def reverse
|
417
|
+
node_set = NodeSet.new(document)
|
418
|
+
(length - 1).downto(0) do |x|
|
419
|
+
node_set.push(self[x])
|
420
|
+
end
|
421
|
+
node_set
|
422
|
+
end
|
423
|
+
|
424
|
+
###
|
425
|
+
# Return a nicely formated string representation
|
426
|
+
def inspect
|
427
|
+
"[#{map(&:inspect).join(", ")}]"
|
428
|
+
end
|
429
|
+
|
430
|
+
alias_method :+, :|
|
431
|
+
|
432
|
+
#
|
433
|
+
# :call-seq: deconstruct() → Array
|
434
|
+
#
|
435
|
+
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
436
|
+
#
|
437
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
438
|
+
#
|
439
|
+
def deconstruct
|
440
|
+
to_a
|
441
|
+
end
|
442
|
+
|
443
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
6
|
+
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
66
|
+
#
|
67
|
+
class ParseOptions
|
68
|
+
# Strict parsing
|
69
|
+
STRICT = 0
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
73
|
+
RECOVER = 1 << 0
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
80
|
+
NOENT = 1 << 1
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
85
|
+
DTDLOAD = 1 << 2
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
88
|
+
DTDATTR = 1 << 3
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
91
|
+
DTDVALID = 1 << 4
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
94
|
+
NOERROR = 1 << 5
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
97
|
+
NOWARNING = 1 << 6
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
100
|
+
PEDANTIC = 1 << 7
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
103
|
+
NOBLANKS = 1 << 8
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
106
|
+
SAX1 = 1 << 9
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
109
|
+
XINCLUDE = 1 << 10
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
115
|
+
NONET = 1 << 11
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
118
|
+
NODICT = 1 << 12
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
121
|
+
NSCLEAN = 1 << 13
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
124
|
+
NOCDATA = 1 << 14
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
127
|
+
NOXINCNODE = 1 << 15
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
133
|
+
COMPACT = 1 << 16
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
136
|
+
OLD10 = 1 << 17
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
139
|
+
NOBASEFIX = 1 << 18
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ There may be a performance penalty when this option is set.
|
144
|
+
HUGE = 1 << 19
|
145
|
+
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
149
|
+
BIG_LINES = 1 << 22
|
150
|
+
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
152
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
155
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
158
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
161
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
162
|
+
|
163
|
+
attr_accessor :options
|
164
|
+
|
165
|
+
def initialize(options = STRICT)
|
166
|
+
@options = options
|
167
|
+
end
|
168
|
+
|
169
|
+
constants.each do |constant|
|
170
|
+
next if constant.to_sym == :STRICT
|
171
|
+
|
172
|
+
class_eval %{
|
173
|
+
def #{constant.downcase}
|
174
|
+
@options |= #{constant}
|
175
|
+
self
|
176
|
+
end
|
177
|
+
|
178
|
+
def no#{constant.downcase}
|
179
|
+
@options &= ~#{constant}
|
180
|
+
self
|
181
|
+
end
|
182
|
+
|
183
|
+
def #{constant.downcase}?
|
184
|
+
#{constant} & @options == #{constant}
|
185
|
+
end
|
186
|
+
}
|
187
|
+
end
|
188
|
+
|
189
|
+
def strict
|
190
|
+
@options &= ~RECOVER
|
191
|
+
self
|
192
|
+
end
|
193
|
+
|
194
|
+
def strict?
|
195
|
+
@options & RECOVER == STRICT
|
196
|
+
end
|
197
|
+
|
198
|
+
def ==(other)
|
199
|
+
other.to_i == to_i
|
200
|
+
end
|
201
|
+
|
202
|
+
alias_method :to_i, :options
|
203
|
+
|
204
|
+
def inspect
|
205
|
+
options = []
|
206
|
+
self.class.constants.each do |k|
|
207
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
208
|
+
end
|
209
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# :nodoc: all
|
6
|
+
module PP
|
7
|
+
module CharacterData
|
8
|
+
def pretty_print(pp)
|
9
|
+
nice_name = self.class.name.split("::").last
|
10
|
+
pp.group(2, "#(#{nice_name} ", ")") do
|
11
|
+
pp.pp(text)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|