nokogiri 1.12.5 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +41 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +23 -14
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -66
- data/ext/nokogiri/extconf.rb +159 -63
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +2 -2
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +3 -9
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.c +38 -51
- data/ext/nokogiri/nokogiri.h +26 -14
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +3 -3
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +53 -44
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +11 -11
- data/ext/nokogiri/xml_element_content.c +3 -3
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +28 -14
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +80 -14
- data/ext/nokogiri/xml_node.c +982 -396
- data/ext/nokogiri/xml_node_set.c +4 -6
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +133 -32
- data/ext/nokogiri/xml_relax_ng.c +1 -3
- data/ext/nokogiri/xml_sax_parser.c +23 -17
- data/ext/nokogiri/xml_sax_parser_context.c +11 -9
- data/ext/nokogiri/xml_sax_push_parser.c +1 -3
- data/ext/nokogiri/xml_schema.c +4 -6
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +2 -2
- data/ext/nokogiri/xml_xpath_context.c +144 -114
- data/ext/nokogiri/xslt_stylesheet.c +122 -23
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +2 -2
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +8 -16
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +184 -85
- data/lib/nokogiri/css.rb +44 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +56 -164
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +12 -5
- data/lib/nokogiri/html5/document.rb +126 -32
- data/lib/nokogiri/html5/document_fragment.rb +14 -4
- data/lib/nokogiri/html5/node.rb +12 -7
- data/lib/nokogiri/html5.rb +138 -222
- data/lib/nokogiri/jruby/dependencies.rb +2 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +32 -24
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +54 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +35 -33
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +232 -143
- data/lib/nokogiri/xml/document_fragment.rb +88 -42
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -8
- data/lib/nokogiri/xml/node.rb +708 -383
- data/lib/nokogiri/xml/node_set.rb +134 -59
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +140 -56
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +26 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +38 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +22 -27
- data/lib/xsd/xmlparser/nokogiri.rb +28 -25
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +20 -171
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,4 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
module XML
|
4
6
|
####
|
@@ -12,10 +14,10 @@ module Nokogiri
|
|
12
14
|
# The Document this NodeSet is associated with
|
13
15
|
attr_accessor :document
|
14
16
|
|
15
|
-
|
17
|
+
alias_method :clone, :dup
|
16
18
|
|
17
19
|
# Create a NodeSet with +document+ defaulting to +list+
|
18
|
-
def initialize
|
20
|
+
def initialize(document, list = [])
|
19
21
|
@document = document
|
20
22
|
document.decorate(self)
|
21
23
|
list.each { |x| self << x }
|
@@ -24,8 +26,9 @@ module Nokogiri
|
|
24
26
|
|
25
27
|
###
|
26
28
|
# Get the first element of the NodeSet.
|
27
|
-
def first
|
29
|
+
def first(n = nil)
|
28
30
|
return self[0] unless n
|
31
|
+
|
29
32
|
list = []
|
30
33
|
[n, length].min.times { |i| list << self[i] }
|
31
34
|
list
|
@@ -47,7 +50,7 @@ module Nokogiri
|
|
47
50
|
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
48
51
|
def index(node = nil)
|
49
52
|
if node
|
50
|
-
warn
|
53
|
+
warn("given block not used") if block_given?
|
51
54
|
each_with_index { |member, j| return j if member == node }
|
52
55
|
elsif block_given?
|
53
56
|
each_with_index { |member, j| return j if yield(member) }
|
@@ -57,18 +60,18 @@ module Nokogiri
|
|
57
60
|
|
58
61
|
###
|
59
62
|
# Insert +datum+ before the first Node in this NodeSet
|
60
|
-
def before
|
61
|
-
first.before
|
63
|
+
def before(datum)
|
64
|
+
first.before(datum)
|
62
65
|
end
|
63
66
|
|
64
67
|
###
|
65
68
|
# Insert +datum+ after the last Node in this NodeSet
|
66
|
-
def after
|
67
|
-
last.after
|
69
|
+
def after(datum)
|
70
|
+
last.after(datum)
|
68
71
|
end
|
69
72
|
|
70
|
-
|
71
|
-
|
73
|
+
alias_method :<<, :push
|
74
|
+
alias_method :remove, :unlink
|
72
75
|
|
73
76
|
###
|
74
77
|
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
@@ -77,7 +80,7 @@ module Nokogiri
|
|
77
80
|
# selectors. For example:
|
78
81
|
#
|
79
82
|
# For more information see Nokogiri::XML::Searchable#css
|
80
|
-
def css
|
83
|
+
def css(*args)
|
81
84
|
rules, handler, ns, _ = extract_params(args)
|
82
85
|
paths = css_rules_to_xpath(rules, ns)
|
83
86
|
|
@@ -93,7 +96,7 @@ module Nokogiri
|
|
93
96
|
# queries.
|
94
97
|
#
|
95
98
|
# For more information see Nokogiri::XML::Searchable#xpath
|
96
|
-
def xpath
|
99
|
+
def xpath(*args)
|
97
100
|
paths, handler, ns, binds = extract_params(args)
|
98
101
|
|
99
102
|
inject(NodeSet.new(document)) do |set, node|
|
@@ -101,13 +104,6 @@ module Nokogiri
|
|
101
104
|
end
|
102
105
|
end
|
103
106
|
|
104
|
-
###
|
105
|
-
# Search this NodeSet's nodes' immediate children using CSS selector +selector+
|
106
|
-
def > selector
|
107
|
-
ns = document.root.namespaces
|
108
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
109
|
-
end
|
110
|
-
|
111
107
|
###
|
112
108
|
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
113
109
|
#
|
@@ -120,18 +116,18 @@ module Nokogiri
|
|
120
116
|
#
|
121
117
|
# node_set.at(3) # same as node_set[3]
|
122
118
|
#
|
123
|
-
def at
|
119
|
+
def at(*args)
|
124
120
|
if args.length == 1 && args.first.is_a?(Numeric)
|
125
121
|
return self[args.first]
|
126
122
|
end
|
127
123
|
|
128
124
|
super(*args)
|
129
125
|
end
|
130
|
-
|
126
|
+
alias_method :%, :at
|
131
127
|
|
132
128
|
###
|
133
129
|
# Filter this list for nodes that match +expr+
|
134
|
-
def filter
|
130
|
+
def filter(expr)
|
135
131
|
find_all { |node| node.matches?(expr) }
|
136
132
|
end
|
137
133
|
|
@@ -140,7 +136,7 @@ module Nokogiri
|
|
140
136
|
# NodeSet.
|
141
137
|
#
|
142
138
|
# See Nokogiri::XML::Node#add_class for more information.
|
143
|
-
def add_class
|
139
|
+
def add_class(name)
|
144
140
|
each do |el|
|
145
141
|
el.add_class(name)
|
146
142
|
end
|
@@ -152,7 +148,7 @@ module Nokogiri
|
|
152
148
|
# NodeSet.
|
153
149
|
#
|
154
150
|
# See Nokogiri::XML::Node#append_class for more information.
|
155
|
-
def append_class
|
151
|
+
def append_class(name)
|
156
152
|
each do |el|
|
157
153
|
el.append_class(name)
|
158
154
|
end
|
@@ -164,7 +160,7 @@ module Nokogiri
|
|
164
160
|
# NodeSet.
|
165
161
|
#
|
166
162
|
# See Nokogiri::XML::Node#remove_class for more information.
|
167
|
-
def remove_class
|
163
|
+
def remove_class(name = nil)
|
168
164
|
each do |el|
|
169
165
|
el.remove_class(name)
|
170
166
|
end
|
@@ -204,31 +200,31 @@ module Nokogiri
|
|
204
200
|
#
|
205
201
|
# node_set.attr("class") { |node| node.name }
|
206
202
|
#
|
207
|
-
def attr
|
203
|
+
def attr(key, value = nil, &block)
|
208
204
|
unless key.is_a?(Hash) || (key && (value || block))
|
209
|
-
return first
|
205
|
+
return first&.attribute(key)
|
210
206
|
end
|
211
207
|
|
212
208
|
hash = key.is_a?(Hash) ? key : { key => value }
|
213
209
|
|
214
|
-
hash.each do |k,v|
|
210
|
+
hash.each do |k, v|
|
215
211
|
each do |node|
|
216
|
-
node[k] = v ||
|
212
|
+
node[k] = v || yield(node)
|
217
213
|
end
|
218
214
|
end
|
219
215
|
|
220
216
|
self
|
221
217
|
end
|
222
|
-
|
223
|
-
|
218
|
+
alias_method :set, :attr
|
219
|
+
alias_method :attribute, :attr
|
224
220
|
|
225
221
|
###
|
226
222
|
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
227
|
-
def remove_attr
|
228
|
-
each { |el| el.delete
|
223
|
+
def remove_attr(name)
|
224
|
+
each { |el| el.delete(name) }
|
229
225
|
self
|
230
226
|
end
|
231
|
-
|
227
|
+
alias_method :remove_attribute, :remove_attr
|
232
228
|
|
233
229
|
###
|
234
230
|
# Iterate over each node, yielding to +block+
|
@@ -255,20 +251,83 @@ module Nokogiri
|
|
255
251
|
#
|
256
252
|
# See Nokogiri::XML::Node#content for more information.
|
257
253
|
def inner_text
|
258
|
-
collect(&:inner_text).join(
|
254
|
+
collect(&:inner_text).join("")
|
259
255
|
end
|
260
|
-
|
256
|
+
alias_method :text, :inner_text
|
261
257
|
|
262
258
|
###
|
263
259
|
# Get the inner html of all contained Node objects
|
264
|
-
def inner_html
|
265
|
-
collect{|j| j.inner_html(*args) }.join(
|
260
|
+
def inner_html(*args)
|
261
|
+
collect { |j| j.inner_html(*args) }.join("")
|
266
262
|
end
|
267
263
|
|
268
|
-
|
269
|
-
#
|
270
|
-
|
271
|
-
|
264
|
+
# :call-seq:
|
265
|
+
# wrap(markup) -> self
|
266
|
+
# wrap(node) -> self
|
267
|
+
#
|
268
|
+
# Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
|
269
|
+
#
|
270
|
+
# [Parameters]
|
271
|
+
# - *markup* (String)
|
272
|
+
# Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
|
273
|
+
# node's parent, if it exists, is used as the context node for parsing; otherwise the
|
274
|
+
# associated document is used. If the parsed fragment has multiple roots, the first root
|
275
|
+
# node is used as the wrapper.
|
276
|
+
# - *node* (Nokogiri::XML::Node)
|
277
|
+
# An element that is `#dup`ed and used as the wrapper.
|
278
|
+
#
|
279
|
+
# [Returns] +self+, to support chaining.
|
280
|
+
#
|
281
|
+
# ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
|
282
|
+
# NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
|
283
|
+
# use by passing a +Node+ instead.
|
284
|
+
#
|
285
|
+
# Also see Node#wrap
|
286
|
+
#
|
287
|
+
# *Example* with a +String+ argument:
|
288
|
+
#
|
289
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
290
|
+
# <html><body>
|
291
|
+
# <a>a</a>
|
292
|
+
# <a>b</a>
|
293
|
+
# <a>c</a>
|
294
|
+
# <a>d</a>
|
295
|
+
# </body></html>
|
296
|
+
# HTML
|
297
|
+
# doc.css("a").wrap("<div></div>")
|
298
|
+
# doc.to_html
|
299
|
+
# # => <html><head></head><body>
|
300
|
+
# # <div><a>a</a></div>
|
301
|
+
# # <div><a>b</a></div>
|
302
|
+
# # <div><a>c</a></div>
|
303
|
+
# # <div><a>d</a></div>
|
304
|
+
# # </body></html>
|
305
|
+
#
|
306
|
+
# *Example* with a +Node+ argument
|
307
|
+
#
|
308
|
+
# 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
|
309
|
+
# having to reparse the wrapper markup for each node.
|
310
|
+
#
|
311
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
312
|
+
# <html><body>
|
313
|
+
# <a>a</a>
|
314
|
+
# <a>b</a>
|
315
|
+
# <a>c</a>
|
316
|
+
# <a>d</a>
|
317
|
+
# </body></html>
|
318
|
+
# HTML
|
319
|
+
# doc.css("a").wrap(doc.create_element("div"))
|
320
|
+
# doc.to_html
|
321
|
+
# # => <html><head></head><body>
|
322
|
+
# # <div><a>a</a></div>
|
323
|
+
# # <div><a>b</a></div>
|
324
|
+
# # <div><a>c</a></div>
|
325
|
+
# # <div><a>d</a></div>
|
326
|
+
# # </body></html>
|
327
|
+
#
|
328
|
+
def wrap(node_or_tags)
|
329
|
+
map { |node| node.wrap(node_or_tags) }
|
330
|
+
self
|
272
331
|
end
|
273
332
|
|
274
333
|
###
|
@@ -279,38 +338,43 @@ module Nokogiri
|
|
279
338
|
|
280
339
|
###
|
281
340
|
# Convert this NodeSet to HTML
|
282
|
-
def to_html
|
341
|
+
def to_html(*args)
|
283
342
|
if Nokogiri.jruby?
|
284
343
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
285
|
-
|
286
|
-
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
287
|
-
end
|
344
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
288
345
|
args.insert(0, options)
|
289
346
|
end
|
290
|
-
|
347
|
+
if empty?
|
348
|
+
encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
|
349
|
+
encoding ||= document.encoding
|
350
|
+
encoding.nil? ? "" : "".encode(encoding)
|
351
|
+
else
|
352
|
+
map { |x| x.to_html(*args) }.join
|
353
|
+
end
|
291
354
|
end
|
292
355
|
|
293
356
|
###
|
294
357
|
# Convert this NodeSet to XHTML
|
295
|
-
def to_xhtml
|
358
|
+
def to_xhtml(*args)
|
296
359
|
map { |x| x.to_xhtml(*args) }.join
|
297
360
|
end
|
298
361
|
|
299
362
|
###
|
300
363
|
# Convert this NodeSet to XML
|
301
|
-
def to_xml
|
364
|
+
def to_xml(*args)
|
302
365
|
map { |x| x.to_xml(*args) }.join
|
303
366
|
end
|
304
367
|
|
305
|
-
|
306
|
-
|
368
|
+
alias_method :size, :length
|
369
|
+
alias_method :to_ary, :to_a
|
307
370
|
|
308
371
|
###
|
309
372
|
# Removes the last element from set and returns it, or +nil+ if
|
310
373
|
# the set is empty
|
311
374
|
def pop
|
312
375
|
return nil if length == 0
|
313
|
-
|
376
|
+
|
377
|
+
delete(last)
|
314
378
|
end
|
315
379
|
|
316
380
|
###
|
@@ -318,16 +382,18 @@ module Nokogiri
|
|
318
382
|
# +nil+ if the set is empty.
|
319
383
|
def shift
|
320
384
|
return nil if length == 0
|
321
|
-
|
385
|
+
|
386
|
+
delete(first)
|
322
387
|
end
|
323
388
|
|
324
389
|
###
|
325
390
|
# Equality -- Two NodeSets are equal if the contain the same number
|
326
391
|
# of elements and if each element is equal to the corresponding
|
327
392
|
# element in the other NodeSet
|
328
|
-
def ==
|
393
|
+
def ==(other)
|
329
394
|
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
330
395
|
return false unless length == other.length
|
396
|
+
|
331
397
|
each_with_index do |node, i|
|
332
398
|
return false unless node == other[i]
|
333
399
|
end
|
@@ -351,7 +417,7 @@ module Nokogiri
|
|
351
417
|
def reverse
|
352
418
|
node_set = NodeSet.new(document)
|
353
419
|
(length - 1).downto(0) do |x|
|
354
|
-
node_set.push
|
420
|
+
node_set.push(self[x])
|
355
421
|
end
|
356
422
|
node_set
|
357
423
|
end
|
@@ -359,14 +425,23 @@ module Nokogiri
|
|
359
425
|
###
|
360
426
|
# Return a nicely formated string representation
|
361
427
|
def inspect
|
362
|
-
"[#{map(&:inspect).join
|
428
|
+
"[#{map(&:inspect).join(", ")}]"
|
363
429
|
end
|
364
430
|
|
365
|
-
|
431
|
+
alias_method :+, :|
|
366
432
|
|
367
|
-
#
|
368
|
-
|
433
|
+
#
|
434
|
+
# :call-seq: deconstruct() → Array
|
435
|
+
#
|
436
|
+
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
|
+
#
|
438
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
439
|
+
#
|
440
|
+
def deconstruct
|
441
|
+
to_a
|
442
|
+
end
|
369
443
|
|
444
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
370
445
|
end
|
371
446
|
end
|
372
447
|
end
|
@@ -1,7 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
4
6
|
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
5
17
|
end
|
6
18
|
end
|
7
19
|
end
|
@@ -1,91 +1,175 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
module XML
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
# You can
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
27
66
|
#
|
28
67
|
class ParseOptions
|
29
68
|
# Strict parsing
|
30
69
|
STRICT = 0
|
31
|
-
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
32
73
|
RECOVER = 1 << 0
|
33
|
-
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
34
80
|
NOENT = 1 << 1
|
35
|
-
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
36
85
|
DTDLOAD = 1 << 2
|
37
|
-
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
38
88
|
DTDATTR = 1 << 3
|
39
|
-
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
40
91
|
DTDVALID = 1 << 4
|
41
|
-
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
42
94
|
NOERROR = 1 << 5
|
43
|
-
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
44
97
|
NOWARNING = 1 << 6
|
45
|
-
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
46
100
|
PEDANTIC = 1 << 7
|
47
|
-
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
48
103
|
NOBLANKS = 1 << 8
|
49
|
-
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
50
106
|
SAX1 = 1 << 9
|
51
|
-
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
52
109
|
XINCLUDE = 1 << 10
|
53
|
-
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
54
115
|
NONET = 1 << 11
|
55
|
-
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
56
118
|
NODICT = 1 << 12
|
57
|
-
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
58
121
|
NSCLEAN = 1 << 13
|
59
|
-
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
60
124
|
NOCDATA = 1 << 14
|
61
|
-
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
62
127
|
NOXINCNODE = 1 << 15
|
63
|
-
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
64
133
|
COMPACT = 1 << 16
|
65
|
-
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
66
136
|
OLD10 = 1 << 17
|
67
|
-
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
68
139
|
NOBASEFIX = 1 << 18
|
69
|
-
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ There may be a performance penalty when this option is set.
|
70
144
|
HUGE = 1 << 19
|
71
145
|
|
72
|
-
#
|
73
|
-
|
74
|
-
#
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
149
|
+
BIG_LINES = 1 << 22
|
150
|
+
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
152
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
155
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
158
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
161
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
80
162
|
|
81
163
|
attr_accessor :options
|
82
|
-
|
164
|
+
|
165
|
+
def initialize(options = STRICT)
|
83
166
|
@options = options
|
84
167
|
end
|
85
168
|
|
86
169
|
constants.each do |constant|
|
87
170
|
next if constant.to_sym == :STRICT
|
88
|
-
|
171
|
+
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
89
173
|
def #{constant.downcase}
|
90
174
|
@options |= #{constant}
|
91
175
|
self
|
@@ -99,7 +183,7 @@ module Nokogiri
|
|
99
183
|
def #{constant.downcase}?
|
100
184
|
#{constant} & @options == #{constant}
|
101
185
|
end
|
102
|
-
|
186
|
+
RUBY
|
103
187
|
end
|
104
188
|
|
105
189
|
def strict
|
@@ -115,14 +199,14 @@ module Nokogiri
|
|
115
199
|
other.to_i == to_i
|
116
200
|
end
|
117
201
|
|
118
|
-
|
202
|
+
alias_method :to_i, :options
|
119
203
|
|
120
204
|
def inspect
|
121
205
|
options = []
|
122
206
|
self.class.constants.each do |k|
|
123
207
|
options << k.downcase if send(:"#{k.downcase}?")
|
124
208
|
end
|
125
|
-
super.sub(/>$/, " " + options.join(
|
209
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
126
210
|
end
|
127
211
|
end
|
128
212
|
end
|