nokogiri 1.18.0.rc1-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,449 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
####
|
7
|
+
# A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
|
8
|
+
#
|
9
|
+
# Typically a NodeSet is returned as a result of searching a Document via
|
10
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
|
11
|
+
#
|
12
|
+
# Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
|
13
|
+
# the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
|
14
|
+
class NodeSet
|
15
|
+
include Nokogiri::XML::Searchable
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
# The Document this NodeSet is associated with
|
19
|
+
attr_accessor :document
|
20
|
+
|
21
|
+
# Create a NodeSet with +document+ defaulting to +list+
|
22
|
+
def initialize(document, list = [])
|
23
|
+
@document = document
|
24
|
+
document.decorate(self)
|
25
|
+
list.each { |x| self << x }
|
26
|
+
yield self if block_given?
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
# Get the first element of the NodeSet.
|
31
|
+
def first(n = nil)
|
32
|
+
return self[0] unless n
|
33
|
+
|
34
|
+
list = []
|
35
|
+
[n, length].min.times { |i| list << self[i] }
|
36
|
+
list
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
# Get the last element of the NodeSet.
|
41
|
+
def last
|
42
|
+
self[-1]
|
43
|
+
end
|
44
|
+
|
45
|
+
###
|
46
|
+
# Is this NodeSet empty?
|
47
|
+
def empty?
|
48
|
+
length == 0
|
49
|
+
end
|
50
|
+
|
51
|
+
###
|
52
|
+
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
53
|
+
def index(node = nil)
|
54
|
+
if node
|
55
|
+
warn("given block not used") if block_given?
|
56
|
+
each_with_index { |member, j| return j if member == node }
|
57
|
+
elsif block_given?
|
58
|
+
each_with_index { |member, j| return j if yield(member) }
|
59
|
+
end
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# Insert +datum+ before the first Node in this NodeSet
|
65
|
+
def before(datum)
|
66
|
+
first.before(datum)
|
67
|
+
end
|
68
|
+
|
69
|
+
###
|
70
|
+
# Insert +datum+ after the last Node in this NodeSet
|
71
|
+
def after(datum)
|
72
|
+
last.after(datum)
|
73
|
+
end
|
74
|
+
|
75
|
+
alias_method :<<, :push
|
76
|
+
alias_method :remove, :unlink
|
77
|
+
|
78
|
+
###
|
79
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
80
|
+
#
|
81
|
+
# Search this node set for CSS +rules+. +rules+ must be one or more CSS
|
82
|
+
# selectors. For example:
|
83
|
+
#
|
84
|
+
# For more information see Nokogiri::XML::Searchable#css
|
85
|
+
def css(*args)
|
86
|
+
rules, handler, ns, _ = extract_params(args)
|
87
|
+
paths = css_rules_to_xpath(rules, ns)
|
88
|
+
|
89
|
+
inject(NodeSet.new(document)) do |set, node|
|
90
|
+
set + xpath_internal(node, paths, handler, ns, nil)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
###
|
95
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
96
|
+
#
|
97
|
+
# Search this node set for XPath +paths+. +paths+ must be one or more XPath
|
98
|
+
# queries.
|
99
|
+
#
|
100
|
+
# For more information see Nokogiri::XML::Searchable#xpath
|
101
|
+
def xpath(*args)
|
102
|
+
paths, handler, ns, binds = extract_params(args)
|
103
|
+
|
104
|
+
inject(NodeSet.new(document)) do |set, node|
|
105
|
+
set + xpath_internal(node, paths, handler, ns, binds)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
###
|
110
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
111
|
+
#
|
112
|
+
# Search this object for +paths+, and return only the first
|
113
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
114
|
+
#
|
115
|
+
# See Searchable#search for more information.
|
116
|
+
#
|
117
|
+
# Or, if passed an integer, index into the NodeSet:
|
118
|
+
#
|
119
|
+
# node_set.at(3) # same as node_set[3]
|
120
|
+
#
|
121
|
+
def at(*args)
|
122
|
+
if args.length == 1 && args.first.is_a?(Numeric)
|
123
|
+
return self[args.first]
|
124
|
+
end
|
125
|
+
|
126
|
+
super
|
127
|
+
end
|
128
|
+
alias_method :%, :at
|
129
|
+
|
130
|
+
###
|
131
|
+
# Filter this list for nodes that match +expr+
|
132
|
+
def filter(expr)
|
133
|
+
find_all { |node| node.matches?(expr) }
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
# Add the class attribute +name+ to all Node objects in the
|
138
|
+
# NodeSet.
|
139
|
+
#
|
140
|
+
# See Nokogiri::XML::Node#add_class for more information.
|
141
|
+
def add_class(name)
|
142
|
+
each do |el|
|
143
|
+
el.add_class(name)
|
144
|
+
end
|
145
|
+
self
|
146
|
+
end
|
147
|
+
|
148
|
+
###
|
149
|
+
# Append the class attribute +name+ to all Node objects in the
|
150
|
+
# NodeSet.
|
151
|
+
#
|
152
|
+
# See Nokogiri::XML::Node#append_class for more information.
|
153
|
+
def append_class(name)
|
154
|
+
each do |el|
|
155
|
+
el.append_class(name)
|
156
|
+
end
|
157
|
+
self
|
158
|
+
end
|
159
|
+
|
160
|
+
###
|
161
|
+
# Remove the class attribute +name+ from all Node objects in the
|
162
|
+
# NodeSet.
|
163
|
+
#
|
164
|
+
# See Nokogiri::XML::Node#remove_class for more information.
|
165
|
+
def remove_class(name = nil)
|
166
|
+
each do |el|
|
167
|
+
el.remove_class(name)
|
168
|
+
end
|
169
|
+
self
|
170
|
+
end
|
171
|
+
|
172
|
+
###
|
173
|
+
# Set attributes on each Node in the NodeSet, or get an
|
174
|
+
# attribute from the first Node in the NodeSet.
|
175
|
+
#
|
176
|
+
# To get an attribute from the first Node in a NodeSet:
|
177
|
+
#
|
178
|
+
# node_set.attr("href") # => "https://www.nokogiri.org"
|
179
|
+
#
|
180
|
+
# Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
|
181
|
+
#
|
182
|
+
# To set an attribute on each node, +key+ can either be an
|
183
|
+
# attribute name, or a Hash of attribute names and values. When
|
184
|
+
# called as a setter, +#attr+ returns the NodeSet.
|
185
|
+
#
|
186
|
+
# If +key+ is an attribute name, then either +value+ or +block+
|
187
|
+
# must be passed.
|
188
|
+
#
|
189
|
+
# If +key+ is a Hash then attributes will be set for each
|
190
|
+
# key/value pair:
|
191
|
+
#
|
192
|
+
# node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
|
193
|
+
#
|
194
|
+
# If +value+ is passed, it will be used as the attribute value
|
195
|
+
# for all nodes:
|
196
|
+
#
|
197
|
+
# node_set.attr("href", "https://www.nokogiri.org")
|
198
|
+
#
|
199
|
+
# If +block+ is passed, it will be called on each Node object in
|
200
|
+
# the NodeSet and the return value used as the attribute value
|
201
|
+
# for that node:
|
202
|
+
#
|
203
|
+
# node_set.attr("class") { |node| node.name }
|
204
|
+
#
|
205
|
+
def attr(key, value = nil, &block)
|
206
|
+
unless key.is_a?(Hash) || (key && (value || block))
|
207
|
+
return first&.attribute(key)
|
208
|
+
end
|
209
|
+
|
210
|
+
hash = key.is_a?(Hash) ? key : { key => value }
|
211
|
+
|
212
|
+
hash.each do |k, v|
|
213
|
+
each do |node|
|
214
|
+
node[k] = v || yield(node)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
self
|
219
|
+
end
|
220
|
+
alias_method :set, :attr
|
221
|
+
alias_method :attribute, :attr
|
222
|
+
|
223
|
+
###
|
224
|
+
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
225
|
+
def remove_attr(name)
|
226
|
+
each { |el| el.delete(name) }
|
227
|
+
self
|
228
|
+
end
|
229
|
+
alias_method :remove_attribute, :remove_attr
|
230
|
+
|
231
|
+
###
|
232
|
+
# Iterate over each node, yielding to +block+
|
233
|
+
def each
|
234
|
+
return to_enum unless block_given?
|
235
|
+
|
236
|
+
0.upto(length - 1) do |x|
|
237
|
+
yield self[x]
|
238
|
+
end
|
239
|
+
self
|
240
|
+
end
|
241
|
+
|
242
|
+
###
|
243
|
+
# Get the inner text of all contained Node objects
|
244
|
+
#
|
245
|
+
# Note: This joins the text of all Node objects in the NodeSet:
|
246
|
+
#
|
247
|
+
# doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
|
248
|
+
# doc.css('d').text # => "foobar"
|
249
|
+
#
|
250
|
+
# Instead, if you want to return the text of all nodes in the NodeSet:
|
251
|
+
#
|
252
|
+
# doc.css('d').map(&:text) # => ["foo", "bar"]
|
253
|
+
#
|
254
|
+
# See Nokogiri::XML::Node#content for more information.
|
255
|
+
def inner_text
|
256
|
+
collect(&:inner_text).join("")
|
257
|
+
end
|
258
|
+
alias_method :text, :inner_text
|
259
|
+
|
260
|
+
###
|
261
|
+
# Get the inner html of all contained Node objects
|
262
|
+
def inner_html(*args)
|
263
|
+
collect { |j| j.inner_html(*args) }.join("")
|
264
|
+
end
|
265
|
+
|
266
|
+
# :call-seq:
|
267
|
+
# wrap(markup) -> self
|
268
|
+
# wrap(node) -> self
|
269
|
+
#
|
270
|
+
# Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
|
271
|
+
#
|
272
|
+
# [Parameters]
|
273
|
+
# - *markup* (String)
|
274
|
+
# Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
|
275
|
+
# node's parent, if it exists, is used as the context node for parsing; otherwise the
|
276
|
+
# associated document is used. If the parsed fragment has multiple roots, the first root
|
277
|
+
# node is used as the wrapper.
|
278
|
+
# - *node* (Nokogiri::XML::Node)
|
279
|
+
# An element that is `#dup`ed and used as the wrapper.
|
280
|
+
#
|
281
|
+
# [Returns] +self+, to support chaining.
|
282
|
+
#
|
283
|
+
# ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
|
284
|
+
# NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
|
285
|
+
# use by passing a +Node+ instead.
|
286
|
+
#
|
287
|
+
# Also see Node#wrap
|
288
|
+
#
|
289
|
+
# *Example* with a +String+ argument:
|
290
|
+
#
|
291
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
292
|
+
# <html><body>
|
293
|
+
# <a>a</a>
|
294
|
+
# <a>b</a>
|
295
|
+
# <a>c</a>
|
296
|
+
# <a>d</a>
|
297
|
+
# </body></html>
|
298
|
+
# HTML
|
299
|
+
# doc.css("a").wrap("<div></div>")
|
300
|
+
# doc.to_html
|
301
|
+
# # => <html><head></head><body>
|
302
|
+
# # <div><a>a</a></div>
|
303
|
+
# # <div><a>b</a></div>
|
304
|
+
# # <div><a>c</a></div>
|
305
|
+
# # <div><a>d</a></div>
|
306
|
+
# # </body></html>
|
307
|
+
#
|
308
|
+
# *Example* with a +Node+ argument
|
309
|
+
#
|
310
|
+
# 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
|
311
|
+
# having to reparse the wrapper markup for each node.
|
312
|
+
#
|
313
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
314
|
+
# <html><body>
|
315
|
+
# <a>a</a>
|
316
|
+
# <a>b</a>
|
317
|
+
# <a>c</a>
|
318
|
+
# <a>d</a>
|
319
|
+
# </body></html>
|
320
|
+
# HTML
|
321
|
+
# doc.css("a").wrap(doc.create_element("div"))
|
322
|
+
# doc.to_html
|
323
|
+
# # => <html><head></head><body>
|
324
|
+
# # <div><a>a</a></div>
|
325
|
+
# # <div><a>b</a></div>
|
326
|
+
# # <div><a>c</a></div>
|
327
|
+
# # <div><a>d</a></div>
|
328
|
+
# # </body></html>
|
329
|
+
#
|
330
|
+
def wrap(node_or_tags)
|
331
|
+
map { |node| node.wrap(node_or_tags) }
|
332
|
+
self
|
333
|
+
end
|
334
|
+
|
335
|
+
###
|
336
|
+
# Convert this NodeSet to a string.
|
337
|
+
def to_s
|
338
|
+
map(&:to_s).join
|
339
|
+
end
|
340
|
+
|
341
|
+
###
|
342
|
+
# Convert this NodeSet to HTML
|
343
|
+
def to_html(*args)
|
344
|
+
if Nokogiri.jruby?
|
345
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
346
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
347
|
+
args.insert(0, options)
|
348
|
+
end
|
349
|
+
if empty?
|
350
|
+
encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
|
351
|
+
encoding ||= document.encoding
|
352
|
+
encoding.nil? ? "" : "".encode(encoding)
|
353
|
+
else
|
354
|
+
map { |x| x.to_html(*args) }.join
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
###
|
359
|
+
# Convert this NodeSet to XHTML
|
360
|
+
def to_xhtml(*args)
|
361
|
+
map { |x| x.to_xhtml(*args) }.join
|
362
|
+
end
|
363
|
+
|
364
|
+
###
|
365
|
+
# Convert this NodeSet to XML
|
366
|
+
def to_xml(*args)
|
367
|
+
map { |x| x.to_xml(*args) }.join
|
368
|
+
end
|
369
|
+
|
370
|
+
alias_method :size, :length
|
371
|
+
alias_method :to_ary, :to_a
|
372
|
+
|
373
|
+
###
|
374
|
+
# Removes the last element from set and returns it, or +nil+ if
|
375
|
+
# the set is empty
|
376
|
+
def pop
|
377
|
+
return if length == 0
|
378
|
+
|
379
|
+
delete(last)
|
380
|
+
end
|
381
|
+
|
382
|
+
###
|
383
|
+
# Returns the first element of the NodeSet and removes it. Returns
|
384
|
+
# +nil+ if the set is empty.
|
385
|
+
def shift
|
386
|
+
return if length == 0
|
387
|
+
|
388
|
+
delete(first)
|
389
|
+
end
|
390
|
+
|
391
|
+
###
|
392
|
+
# Equality -- Two NodeSets are equal if the contain the same number
|
393
|
+
# of elements and if each element is equal to the corresponding
|
394
|
+
# element in the other NodeSet
|
395
|
+
def ==(other)
|
396
|
+
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
397
|
+
return false unless length == other.length
|
398
|
+
|
399
|
+
each_with_index do |node, i|
|
400
|
+
return false unless node == other[i]
|
401
|
+
end
|
402
|
+
true
|
403
|
+
end
|
404
|
+
|
405
|
+
###
|
406
|
+
# Returns a new NodeSet containing all the children of all the nodes in
|
407
|
+
# the NodeSet
|
408
|
+
def children
|
409
|
+
node_set = NodeSet.new(document)
|
410
|
+
each do |node|
|
411
|
+
node.children.each { |n| node_set.push(n) }
|
412
|
+
end
|
413
|
+
node_set
|
414
|
+
end
|
415
|
+
|
416
|
+
###
|
417
|
+
# Returns a new NodeSet containing all the nodes in the NodeSet
|
418
|
+
# in reverse order
|
419
|
+
def reverse
|
420
|
+
node_set = NodeSet.new(document)
|
421
|
+
(length - 1).downto(0) do |x|
|
422
|
+
node_set.push(self[x])
|
423
|
+
end
|
424
|
+
node_set
|
425
|
+
end
|
426
|
+
|
427
|
+
###
|
428
|
+
# Return a nicely formatted string representation
|
429
|
+
def inspect
|
430
|
+
"[#{map(&:inspect).join(", ")}]"
|
431
|
+
end
|
432
|
+
|
433
|
+
alias_method :+, :|
|
434
|
+
|
435
|
+
#
|
436
|
+
# :call-seq: deconstruct() → Array
|
437
|
+
#
|
438
|
+
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
439
|
+
#
|
440
|
+
# Since v1.14.0
|
441
|
+
#
|
442
|
+
def deconstruct
|
443
|
+
to_a
|
444
|
+
end
|
445
|
+
|
446
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
6
|
+
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
66
|
+
#
|
67
|
+
class ParseOptions
|
68
|
+
# Strict parsing
|
69
|
+
STRICT = 0
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
73
|
+
RECOVER = 1 << 0
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
80
|
+
NOENT = 1 << 1
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
85
|
+
DTDLOAD = 1 << 2
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
88
|
+
DTDATTR = 1 << 3
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
91
|
+
DTDVALID = 1 << 4
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
94
|
+
NOERROR = 1 << 5
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
97
|
+
NOWARNING = 1 << 6
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
100
|
+
PEDANTIC = 1 << 7
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
103
|
+
NOBLANKS = 1 << 8
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
106
|
+
SAX1 = 1 << 9
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
109
|
+
XINCLUDE = 1 << 10
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
115
|
+
NONET = 1 << 11
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
118
|
+
NODICT = 1 << 12
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
121
|
+
NSCLEAN = 1 << 13
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
124
|
+
NOCDATA = 1 << 14
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
127
|
+
NOXINCNODE = 1 << 15
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
133
|
+
COMPACT = 1 << 16
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
136
|
+
OLD10 = 1 << 17
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
139
|
+
NOBASEFIX = 1 << 18
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
144
|
+
HUGE = 1 << 19
|
145
|
+
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
149
|
+
BIG_LINES = 1 << 22
|
150
|
+
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
152
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
155
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
158
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
161
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
162
|
+
|
163
|
+
attr_accessor :options
|
164
|
+
|
165
|
+
def initialize(options = STRICT)
|
166
|
+
@options = options
|
167
|
+
end
|
168
|
+
|
169
|
+
constants.each do |constant|
|
170
|
+
next if constant.to_sym == :STRICT
|
171
|
+
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
173
|
+
def #{constant.downcase}
|
174
|
+
@options |= #{constant}
|
175
|
+
self
|
176
|
+
end
|
177
|
+
|
178
|
+
def no#{constant.downcase}
|
179
|
+
@options &= ~#{constant}
|
180
|
+
self
|
181
|
+
end
|
182
|
+
|
183
|
+
def #{constant.downcase}?
|
184
|
+
#{constant} & @options == #{constant}
|
185
|
+
end
|
186
|
+
RUBY
|
187
|
+
end
|
188
|
+
|
189
|
+
def strict
|
190
|
+
@options &= ~RECOVER
|
191
|
+
self
|
192
|
+
end
|
193
|
+
|
194
|
+
def strict?
|
195
|
+
@options & RECOVER == STRICT
|
196
|
+
end
|
197
|
+
|
198
|
+
def ==(other)
|
199
|
+
other.to_i == to_i
|
200
|
+
end
|
201
|
+
|
202
|
+
alias_method :to_i, :options
|
203
|
+
|
204
|
+
def inspect
|
205
|
+
options = []
|
206
|
+
self.class.constants.each do |k|
|
207
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
208
|
+
end
|
209
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# :nodoc: all
|
6
|
+
module PP
|
7
|
+
module CharacterData
|
8
|
+
def pretty_print(pp)
|
9
|
+
nice_name = self.class.name.split("::").last
|
10
|
+
pp.group(2, "#(#{nice_name} ", ")") do
|
11
|
+
pp.pp(text)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|