nokolexbor 0.7.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/nokolexbor/3.1/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.2/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.3/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.4/nokolexbor.so +0 -0
- data/lib/nokolexbor/4.0/nokolexbor.so +0 -0
- data/lib/nokolexbor/builder.rb +223 -0
- data/lib/nokolexbor/document.rb +168 -0
- data/lib/nokolexbor/document_fragment.rb +42 -0
- data/lib/nokolexbor/node.rb +775 -0
- data/lib/nokolexbor/node_set.rb +293 -0
- data/lib/nokolexbor/version.rb +5 -0
- data/lib/nokolexbor/xpath.rb +69 -0
- data/lib/nokolexbor/xpath_context.rb +14 -0
- data/lib/nokolexbor.rb +50 -0
- metadata +85 -0
|
@@ -0,0 +1,775 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Nokolexbor
|
|
4
|
+
class Node
|
|
5
|
+
include Enumerable
|
|
6
|
+
|
|
7
|
+
ELEMENT_NODE = 1
|
|
8
|
+
ATTRIBUTE_NODE = 2
|
|
9
|
+
TEXT_NODE = 3
|
|
10
|
+
CDATA_SECTION_NODE = 4
|
|
11
|
+
ENTITY_REF_NODE = 5
|
|
12
|
+
ENTITY_NODE = 6
|
|
13
|
+
PI_NODE = 7
|
|
14
|
+
COMMENT_NODE = 8
|
|
15
|
+
DOCUMENT_NODE = 9
|
|
16
|
+
DOCUMENT_TYPE_NODE = 10
|
|
17
|
+
DOCUMENT_FRAG_NODE = 11
|
|
18
|
+
NOTATION_NODE = 12
|
|
19
|
+
|
|
20
|
+
# @return [Document] The associated {Document} of this node
|
|
21
|
+
attr_reader :document
|
|
22
|
+
|
|
23
|
+
LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
|
|
24
|
+
|
|
25
|
+
# @return true if this is a {Comment}
|
|
26
|
+
def comment?
|
|
27
|
+
type == COMMENT_NODE
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return true if this is a {CDATA}
|
|
31
|
+
def cdata?
|
|
32
|
+
type == CDATA_SECTION_NODE
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return true if this is a {ProcessingInstruction}
|
|
36
|
+
def processing_instruction?
|
|
37
|
+
type == PI_NODE
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @return true if this is a {Text}
|
|
41
|
+
def text?
|
|
42
|
+
type == TEXT_NODE
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @return true if this is a {DocumentFragment}
|
|
46
|
+
def fragment?
|
|
47
|
+
type == DOCUMENT_FRAG_NODE
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @return true if this is an {Element}
|
|
51
|
+
def element?
|
|
52
|
+
type == ELEMENT_NODE
|
|
53
|
+
end
|
|
54
|
+
alias_method :elem?, :element?
|
|
55
|
+
|
|
56
|
+
# @return true if this is a {Document}
|
|
57
|
+
def document?
|
|
58
|
+
is_a?(Nokolexbor::Document)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Get the path to this node as a CSS expression
|
|
62
|
+
def css_path
|
|
63
|
+
path.split(%r{/}).filter_map do |part|
|
|
64
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
|
65
|
+
end.join(" > ")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Get a list of ancestor Node of this Node
|
|
69
|
+
#
|
|
70
|
+
# @param [String, nil] selector The selector to match ancestors
|
|
71
|
+
#
|
|
72
|
+
# @return [NodeSet] A set of matched ancestor nodes
|
|
73
|
+
def ancestors(selector = nil)
|
|
74
|
+
return NodeSet.new(@document) unless respond_to?(:parent)
|
|
75
|
+
return NodeSet.new(@document) unless parent
|
|
76
|
+
|
|
77
|
+
parents = [parent]
|
|
78
|
+
|
|
79
|
+
while parents.last.respond_to?(:parent)
|
|
80
|
+
break unless (ctx_parent = parents.last.parent)
|
|
81
|
+
|
|
82
|
+
parents << ctx_parent
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
return NodeSet.new(@document, parents) unless selector
|
|
86
|
+
|
|
87
|
+
root = parents.last
|
|
88
|
+
search_results = root.search(selector)
|
|
89
|
+
|
|
90
|
+
NodeSet.new(@document, parents.find_all do |parent|
|
|
91
|
+
search_results.include?(parent)
|
|
92
|
+
end)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Wrap this Node with another node.
|
|
96
|
+
#
|
|
97
|
+
# @param node [String, Node] A string or a node
|
|
98
|
+
# - when {String}:
|
|
99
|
+
# The markup that is parsed and used as the wrapper. If the parsed
|
|
100
|
+
# fragment has multiple roots, the first root node is used as the wrapper.
|
|
101
|
+
# - when {Node}:
|
|
102
|
+
# An element that is cloned and used as the wrapper.
|
|
103
|
+
#
|
|
104
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
105
|
+
#
|
|
106
|
+
# @see NodeSet#wrap
|
|
107
|
+
#
|
|
108
|
+
# @example with a {String} argument:
|
|
109
|
+
#
|
|
110
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
|
111
|
+
# doc.at_css('a').wrap('<div></div>')
|
|
112
|
+
# doc.at_css('body').inner_html
|
|
113
|
+
# # => "<div><a>123</a></div>"
|
|
114
|
+
#
|
|
115
|
+
# @example with a {Node} argument:
|
|
116
|
+
#
|
|
117
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
|
118
|
+
# doc.at_css('a').wrap(doc.create_element('div'))
|
|
119
|
+
# doc.at_css('body').inner_html
|
|
120
|
+
# # => "<div><a>123</a></div>"
|
|
121
|
+
#
|
|
122
|
+
def wrap(node)
|
|
123
|
+
case node
|
|
124
|
+
when String
|
|
125
|
+
new_parent = fragment(node).child
|
|
126
|
+
when DocumentFragment
|
|
127
|
+
new_parent = node.child
|
|
128
|
+
when Node
|
|
129
|
+
new_parent = node.dup
|
|
130
|
+
else
|
|
131
|
+
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node.class}"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
if parent
|
|
135
|
+
add_sibling(:next, new_parent)
|
|
136
|
+
else
|
|
137
|
+
new_parent.remove
|
|
138
|
+
end
|
|
139
|
+
new_parent.add_child(self)
|
|
140
|
+
|
|
141
|
+
self
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
|
145
|
+
#
|
|
146
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
147
|
+
#
|
|
148
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
|
149
|
+
#
|
|
150
|
+
# @see #before
|
|
151
|
+
def add_previous_sibling(node_or_tags)
|
|
152
|
+
raise ArgumentError,
|
|
153
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
|
154
|
+
|
|
155
|
+
add_sibling(:previous, node_or_tags)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
|
159
|
+
#
|
|
160
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
161
|
+
#
|
|
162
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
|
163
|
+
#
|
|
164
|
+
# @see #after
|
|
165
|
+
def add_next_sibling(node_or_tags)
|
|
166
|
+
raise ArgumentError,
|
|
167
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
|
168
|
+
|
|
169
|
+
add_sibling(:next, node_or_tags)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
|
173
|
+
#
|
|
174
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
175
|
+
#
|
|
176
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
177
|
+
#
|
|
178
|
+
# @see #add_previous_sibling
|
|
179
|
+
def before(node_or_tags)
|
|
180
|
+
add_previous_sibling(node_or_tags)
|
|
181
|
+
self
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
|
185
|
+
#
|
|
186
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
187
|
+
#
|
|
188
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
189
|
+
#
|
|
190
|
+
# @see #add_next_sibling
|
|
191
|
+
def after(node_or_tags)
|
|
192
|
+
add_next_sibling(node_or_tags)
|
|
193
|
+
self
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
alias_method :next_sibling, :next
|
|
197
|
+
alias_method :previous_sibling, :previous
|
|
198
|
+
alias_method :next=, :add_next_sibling
|
|
199
|
+
alias_method :previous=, :add_previous_sibling
|
|
200
|
+
|
|
201
|
+
# Add +node_or_tags+ as a child of this Node.
|
|
202
|
+
#
|
|
203
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
204
|
+
#
|
|
205
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
206
|
+
#
|
|
207
|
+
# @see #add_child
|
|
208
|
+
def <<(node_or_tags)
|
|
209
|
+
add_child(node_or_tags)
|
|
210
|
+
self
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Add +node+ as the first child of this Node.
|
|
214
|
+
#
|
|
215
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
216
|
+
#
|
|
217
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
|
218
|
+
#
|
|
219
|
+
# @see #add_child
|
|
220
|
+
def prepend_child(node)
|
|
221
|
+
if (first = children.first)
|
|
222
|
+
# Mimic the error add_child would raise.
|
|
223
|
+
raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
|
|
224
|
+
|
|
225
|
+
first.add_sibling(:previous, node)
|
|
226
|
+
else
|
|
227
|
+
add_child(node)
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Traverse self and all children.
|
|
232
|
+
# @yield self and all children to +block+ recursively.
|
|
233
|
+
def traverse(&block)
|
|
234
|
+
children.each { |j| j.traverse(&block) }
|
|
235
|
+
yield(self)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# @param selector [String] The selector to match
|
|
239
|
+
#
|
|
240
|
+
# @return true if this Node matches +selector+
|
|
241
|
+
def matches?(selector)
|
|
242
|
+
ancestors.last.css(selector).any? { |node| node == self }
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Fetch this node's attributes.
|
|
246
|
+
#
|
|
247
|
+
# @return [Hash{String => Attribute}] Hash containing attributes belonging to +self+. The hash keys are String attribute names, and the hash values are {Nokolexbor::Attribute}.
|
|
248
|
+
def attributes
|
|
249
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
|
250
|
+
hash[node.name] = node
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Replace this Node with +node+.
|
|
255
|
+
#
|
|
256
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
|
257
|
+
#
|
|
258
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
|
259
|
+
#
|
|
260
|
+
# @see #swap
|
|
261
|
+
def replace(node)
|
|
262
|
+
ret = add_sibling(:previous, node)
|
|
263
|
+
remove
|
|
264
|
+
ret
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Swap this Node for +node+.
|
|
268
|
+
#
|
|
269
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
|
270
|
+
#
|
|
271
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
272
|
+
#
|
|
273
|
+
# @see #replace
|
|
274
|
+
def swap(node)
|
|
275
|
+
replace(node)
|
|
276
|
+
self
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Set the content of this Node.
|
|
280
|
+
#
|
|
281
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
|
282
|
+
#
|
|
283
|
+
# @see #inner_html=
|
|
284
|
+
def children=(node)
|
|
285
|
+
children.remove
|
|
286
|
+
add_child(node)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Set the parent Node of this Node.
|
|
290
|
+
#
|
|
291
|
+
# @param parent_node [Node] The parent node.
|
|
292
|
+
def parent=(parent_node)
|
|
293
|
+
parent_node.add_child(self)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# @return true if this Node's attributes include <value>
|
|
297
|
+
def value?(value)
|
|
298
|
+
values.include?(value)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Iterate over each attribute name and value pair of this Node.
|
|
302
|
+
#
|
|
303
|
+
# @yield [String,String] The name and value of the current attribute.
|
|
304
|
+
def each
|
|
305
|
+
attributes.each do |name, node|
|
|
306
|
+
yield [name, node.value]
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Create a {DocumentFragment} containing +tags+ that is relative to _this_
|
|
311
|
+
# context node.
|
|
312
|
+
#
|
|
313
|
+
# @return [DocumentFragment]
|
|
314
|
+
def fragment(tags)
|
|
315
|
+
Nokolexbor::DocumentFragment.new(document, tags, self)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
alias_method :inner_html=, :children=
|
|
319
|
+
|
|
320
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
|
321
|
+
# selectors.
|
|
322
|
+
#
|
|
323
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#xpath} or {#nokogiri_css}.
|
|
324
|
+
#
|
|
325
|
+
# @example
|
|
326
|
+
# node.css('title')
|
|
327
|
+
# node.css('body h1.bold')
|
|
328
|
+
# node.css('div + p.green', 'div#one')
|
|
329
|
+
#
|
|
330
|
+
# @return [NodeSet] The matched set of Nodes.
|
|
331
|
+
#
|
|
332
|
+
# @see #xpath
|
|
333
|
+
# @see #nokogiri_css
|
|
334
|
+
def css(*args)
|
|
335
|
+
css_impl(args.join(', '))
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Like {#css}, but returns the first match.
|
|
339
|
+
#
|
|
340
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#at_xpath} or {#nokogiri_at_css}.
|
|
341
|
+
#
|
|
342
|
+
# @return [Node, nil] The first matched Node.
|
|
343
|
+
#
|
|
344
|
+
# @see #css
|
|
345
|
+
# @see #nokogiri_at_css
|
|
346
|
+
def at_css(*args)
|
|
347
|
+
at_css_impl(args.join(', '))
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
|
351
|
+
# selectors. It supports a mixed syntax of CSS selectors and XPath.
|
|
352
|
+
#
|
|
353
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#css}.
|
|
354
|
+
#
|
|
355
|
+
# @return [NodeSet] The matched set of Nodes.
|
|
356
|
+
#
|
|
357
|
+
# @see #css
|
|
358
|
+
def nokogiri_css(*args)
|
|
359
|
+
rules, handler, ns, _ = extract_params(args)
|
|
360
|
+
|
|
361
|
+
nokogiri_css_internal(self, rules, handler, ns)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Like {#nokogiri_css}, but returns the first match.
|
|
365
|
+
#
|
|
366
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#at_css}.
|
|
367
|
+
#
|
|
368
|
+
# @return [Node, nil] The first matched Node.
|
|
369
|
+
#
|
|
370
|
+
# @see #nokogiri_at_css
|
|
371
|
+
# @see #at_css
|
|
372
|
+
def nokogiri_at_css(*args)
|
|
373
|
+
nokogiri_css(*args).first
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
|
377
|
+
# queries.
|
|
378
|
+
#
|
|
379
|
+
# It works the same way as {Nokogiri::Node#xpath}.
|
|
380
|
+
#
|
|
381
|
+
# @example
|
|
382
|
+
# node.xpath('.//title')
|
|
383
|
+
#
|
|
384
|
+
# @return [NodeSet] The matched set of Nodes.
|
|
385
|
+
def xpath(*args)
|
|
386
|
+
paths, handler, ns, binds = extract_params(args)
|
|
387
|
+
|
|
388
|
+
xpath_internal(self, paths, handler, ns, binds)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Like {#xpath}, but returns the first match.
|
|
392
|
+
#
|
|
393
|
+
# It works the same way as {Nokogiri::Node#at_xpath}.
|
|
394
|
+
#
|
|
395
|
+
# @return [Node, nil] The first matched Node.
|
|
396
|
+
#
|
|
397
|
+
# @see #xpath
|
|
398
|
+
def at_xpath(*args)
|
|
399
|
+
xpath(*args).first
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Search this object for +paths+. +paths+ must be one or more XPath or CSS selectors.
|
|
403
|
+
#
|
|
404
|
+
# @return [NodeSet] The matched set of Nodes.
|
|
405
|
+
def search(*args)
|
|
406
|
+
paths, handler, ns, binds = extract_params(args)
|
|
407
|
+
|
|
408
|
+
if paths.size == 1 && !LOOKS_LIKE_XPATH.match?(paths.first)
|
|
409
|
+
return css(paths.first)
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
xpath(*(paths + [ns, handler, binds].compact))
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
alias_method :/, :search
|
|
416
|
+
|
|
417
|
+
# Like {#search}, but returns the first match.
|
|
418
|
+
#
|
|
419
|
+
# @return [Node, nil] The first matched Node.
|
|
420
|
+
#
|
|
421
|
+
# @see #search
|
|
422
|
+
def at(*args)
|
|
423
|
+
paths, handler, ns, binds = extract_params(args)
|
|
424
|
+
|
|
425
|
+
if paths.size == 1 && !LOOKS_LIKE_XPATH.match?(paths.first)
|
|
426
|
+
return at_css(paths.first)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
at_xpath(*(paths + [ns, handler, binds].compact))
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
alias_method :%, :at
|
|
433
|
+
|
|
434
|
+
# Fetch CSS class names of a Node.
|
|
435
|
+
#
|
|
436
|
+
# This is a convenience function and is equivalent to:
|
|
437
|
+
#
|
|
438
|
+
# node.kwattr_values("class")
|
|
439
|
+
#
|
|
440
|
+
# @see #kwattr_values
|
|
441
|
+
# @see #add_class
|
|
442
|
+
# @see #append_class
|
|
443
|
+
# @see #remove_class
|
|
444
|
+
#
|
|
445
|
+
# @return [Array]
|
|
446
|
+
# The CSS classes present in the Node's "class" attribute. If the
|
|
447
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
|
448
|
+
#
|
|
449
|
+
# @example
|
|
450
|
+
# node.classes # => ["section", "title", "header"]
|
|
451
|
+
def classes
|
|
452
|
+
kwattr_values("class")
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Ensure CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
|
456
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
|
457
|
+
# "class" attribute are not removed. Compare with {#append_class}.
|
|
458
|
+
#
|
|
459
|
+
# This is a convenience function and is equivalent to:
|
|
460
|
+
#
|
|
461
|
+
# node.kwattr_add("class", names)
|
|
462
|
+
#
|
|
463
|
+
# @see #kwattr_add
|
|
464
|
+
# @see #classes
|
|
465
|
+
# @see #append_class
|
|
466
|
+
# @see #remove_class
|
|
467
|
+
#
|
|
468
|
+
# @param [String, Array<String>] names
|
|
469
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
|
470
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
|
471
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
|
472
|
+
# exists, one is created.
|
|
473
|
+
#
|
|
474
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
475
|
+
#
|
|
476
|
+
# @example
|
|
477
|
+
# node.add_class("section") # => <div class="section"></div>
|
|
478
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
|
479
|
+
# node.add_class("section header") # => <div class="section header"></div>
|
|
480
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
|
481
|
+
def add_class(names)
|
|
482
|
+
kwattr_add("class", names)
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
# Add CSS classes to +self+, regardless of duplication. Compare with {#add_class}.
|
|
486
|
+
#
|
|
487
|
+
# This is a convenience function and is equivalent to:
|
|
488
|
+
#
|
|
489
|
+
# node.kwattr_append("class", names)
|
|
490
|
+
#
|
|
491
|
+
# @see #kwattr_append
|
|
492
|
+
# @see #classes
|
|
493
|
+
# @see #add_class
|
|
494
|
+
# @see #remove_class
|
|
495
|
+
#
|
|
496
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
497
|
+
def append_class(names)
|
|
498
|
+
kwattr_append("class", names)
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
# Remove CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
|
502
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
|
503
|
+
#
|
|
504
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
|
505
|
+
# attribute is deleted from the node.
|
|
506
|
+
#
|
|
507
|
+
# This is a convenience function and is equivalent to:
|
|
508
|
+
#
|
|
509
|
+
# node.kwattr_remove("class", css_classes)
|
|
510
|
+
#
|
|
511
|
+
# @see #kwattr_remove
|
|
512
|
+
# @see #classes
|
|
513
|
+
# @see #add_class
|
|
514
|
+
# @see #append_class
|
|
515
|
+
#
|
|
516
|
+
# @param names [String, Array<String>]
|
|
517
|
+
# CSS class names to be removed from the Node's
|
|
518
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
|
519
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
|
520
|
+
# the "class" attribute is deleted.
|
|
521
|
+
#
|
|
522
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
523
|
+
#
|
|
524
|
+
# @example
|
|
525
|
+
# node.remove_class("section")
|
|
526
|
+
# node.remove_class(["section", "float"])
|
|
527
|
+
def remove_class(names = nil)
|
|
528
|
+
kwattr_remove("class", names)
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
# Fetch values from a keyword attribute of a Node.
|
|
532
|
+
#
|
|
533
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
|
534
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
|
535
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
|
536
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
|
537
|
+
#
|
|
538
|
+
# @see #kwattr_add
|
|
539
|
+
# @#kwattr_append
|
|
540
|
+
# @#kwattr_remove
|
|
541
|
+
#
|
|
542
|
+
# @param attribute_name [String]
|
|
543
|
+
# The name of the keyword attribute to be inspected.
|
|
544
|
+
#
|
|
545
|
+
# @return [Array<String>]
|
|
546
|
+
# The values present in the Node's +attribute_name+ attribute. If the
|
|
547
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
|
548
|
+
def kwattr_values(attribute_name)
|
|
549
|
+
keywordify(attr(attribute_name) || [])
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# Ensure that values are present in a keyword attribute.
|
|
553
|
+
#
|
|
554
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
|
555
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
|
556
|
+
# with {#kwattr_append}.
|
|
557
|
+
#
|
|
558
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
|
559
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
|
560
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
|
561
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
|
562
|
+
#
|
|
563
|
+
# @see #add_class
|
|
564
|
+
# @see #kwattr_values
|
|
565
|
+
# @see #kwattr_append
|
|
566
|
+
# @see #kwattr_remove
|
|
567
|
+
#
|
|
568
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
|
569
|
+
# @param keywords [String, Array<String>]
|
|
570
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
|
571
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
|
572
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
|
573
|
+
# it is created.
|
|
574
|
+
#
|
|
575
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
576
|
+
def kwattr_add(attribute_name, keywords)
|
|
577
|
+
keywords = keywordify(keywords)
|
|
578
|
+
current_kws = kwattr_values(attribute_name)
|
|
579
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
|
580
|
+
set_attr(attribute_name, new_kws)
|
|
581
|
+
self
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
|
585
|
+
# {#kwattr_add}.
|
|
586
|
+
#
|
|
587
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
|
588
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
|
589
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
|
590
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
|
591
|
+
#
|
|
592
|
+
# @see #add_class
|
|
593
|
+
# @see #kwattr_values
|
|
594
|
+
# @see #kwattr_add
|
|
595
|
+
# @see #kwattr_remove
|
|
596
|
+
#
|
|
597
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
|
598
|
+
# @param keywords [String, Array<String>]
|
|
599
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
|
600
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
|
601
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
|
602
|
+
# it is created.
|
|
603
|
+
#
|
|
604
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
605
|
+
def kwattr_append(attribute_name, keywords)
|
|
606
|
+
keywords = keywordify(keywords)
|
|
607
|
+
current_kws = kwattr_values(attribute_name)
|
|
608
|
+
new_kws = (current_kws + keywords).join(" ")
|
|
609
|
+
set_attr(attribute_name, new_kws)
|
|
610
|
+
self
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
|
614
|
+
# attribute are removed, including any multiple entries.
|
|
615
|
+
#
|
|
616
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
|
617
|
+
# deleted from the node.
|
|
618
|
+
#
|
|
619
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
|
620
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
|
621
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
|
622
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
|
623
|
+
#
|
|
624
|
+
# @see #remove_class
|
|
625
|
+
# @see #kwattr_values
|
|
626
|
+
# @see #kwattr_add
|
|
627
|
+
# @see #kwattr_append
|
|
628
|
+
#
|
|
629
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
|
630
|
+
# @param keywords [String, Array<String>]
|
|
631
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
|
632
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
|
633
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
|
634
|
+
# it is created.
|
|
635
|
+
#
|
|
636
|
+
# @return [Node] +self+, to support chaining of calls.
|
|
637
|
+
def kwattr_remove(attribute_name, keywords)
|
|
638
|
+
if keywords.nil?
|
|
639
|
+
remove_attr(attribute_name)
|
|
640
|
+
return self
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
keywords = keywordify(keywords)
|
|
644
|
+
current_kws = kwattr_values(attribute_name)
|
|
645
|
+
new_kws = current_kws - keywords
|
|
646
|
+
if new_kws.empty?
|
|
647
|
+
remove_attr(attribute_name)
|
|
648
|
+
else
|
|
649
|
+
set_attr(attribute_name, new_kws.join(" "))
|
|
650
|
+
end
|
|
651
|
+
self
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# Serialize Node and write to +io+.
|
|
655
|
+
def write_to(io, *options)
|
|
656
|
+
io.write(to_html(*options))
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
alias_method :write_html_to, :write_to
|
|
660
|
+
|
|
661
|
+
private
|
|
662
|
+
|
|
663
|
+
def keywordify(keywords)
|
|
664
|
+
case keywords
|
|
665
|
+
when Enumerable
|
|
666
|
+
keywords
|
|
667
|
+
when String
|
|
668
|
+
keywords.scan(/\S+/)
|
|
669
|
+
else
|
|
670
|
+
raise ArgumentError,
|
|
671
|
+
"Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
|
|
672
|
+
end
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
def nokogiri_css_internal(node, rules, handler, ns)
|
|
676
|
+
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
def xpath_internal(node, paths, handler, ns, binds)
|
|
680
|
+
# document = node.document
|
|
681
|
+
# return NodeSet.new(document) unless document
|
|
682
|
+
|
|
683
|
+
if paths.length == 1
|
|
684
|
+
return xpath_impl(node, paths.first, handler, ns, binds)
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
NodeSet.new(@document) do |combined|
|
|
688
|
+
paths.each do |path|
|
|
689
|
+
xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
end
|
|
693
|
+
|
|
694
|
+
def xpath_impl(node, path, handler, ns, binds)
|
|
695
|
+
ctx = XPathContext.new(node)
|
|
696
|
+
ctx.register_namespaces(ns)
|
|
697
|
+
# path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
|
|
698
|
+
|
|
699
|
+
binds&.each do |key, value|
|
|
700
|
+
ctx.register_variable(key.to_s, value)
|
|
701
|
+
end
|
|
702
|
+
|
|
703
|
+
ctx.evaluate(path, handler)
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
def css_rules_to_xpath(rules, ns)
|
|
707
|
+
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
def ensure_nokogiri
|
|
711
|
+
unless defined?(Nokogiri) && defined?(Nokogiri::CSS)
|
|
712
|
+
require 'nokogiri'
|
|
713
|
+
end
|
|
714
|
+
rescue LoadError
|
|
715
|
+
fail('nokogiri_css and nokogiri_at_css require Nokogiri to be installed')
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
def xpath_query_from_css_rule(rule, ns)
|
|
719
|
+
ensure_nokogiri
|
|
720
|
+
|
|
721
|
+
unless defined?(Gem)
|
|
722
|
+
require 'rubygems'
|
|
723
|
+
end
|
|
724
|
+
|
|
725
|
+
v_1_17_0 = Gem::Version.new("1.17.0")
|
|
726
|
+
current_version = Gem::Version.new(Nokogiri::VERSION)
|
|
727
|
+
|
|
728
|
+
if current_version < v_1_17_0
|
|
729
|
+
if defined? Nokogiri::CSS::XPathVisitor::BuiltinsConfig
|
|
730
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
|
731
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
|
732
|
+
doctype: :html4,
|
|
733
|
+
)
|
|
734
|
+
else
|
|
735
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
|
736
|
+
end
|
|
737
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
|
738
|
+
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
|
739
|
+
visitor: visitor, })
|
|
740
|
+
end.join(" | ")
|
|
741
|
+
else
|
|
742
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
|
743
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
|
744
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
|
745
|
+
doctype: :html4,
|
|
746
|
+
prefix: implied_xpath_context,
|
|
747
|
+
namespaces: ns,
|
|
748
|
+
)
|
|
749
|
+
Nokogiri::CSS.xpath_for(rule.to_s, visitor: visitor)
|
|
750
|
+
end.join(" | ")
|
|
751
|
+
end
|
|
752
|
+
end
|
|
753
|
+
|
|
754
|
+
def extract_params(params)
|
|
755
|
+
handler = params.find do |param|
|
|
756
|
+
![Hash, String, Symbol].include?(param.class)
|
|
757
|
+
end
|
|
758
|
+
params -= [handler] if handler
|
|
759
|
+
|
|
760
|
+
hashes = []
|
|
761
|
+
while Hash === params.last || params.last.nil?
|
|
762
|
+
hashes << params.pop
|
|
763
|
+
break if params.empty?
|
|
764
|
+
end
|
|
765
|
+
ns, binds = hashes.reverse
|
|
766
|
+
|
|
767
|
+
# ns ||= (document.root&.namespaces || {})
|
|
768
|
+
ns ||= {}
|
|
769
|
+
|
|
770
|
+
[params, handler, ns, binds]
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
|
|
774
|
+
end
|
|
775
|
+
end
|