nokolexbor 0.3.4-arm64-darwin → 0.3.6-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/nokolexbor/2.6/nokolexbor.bundle +0 -0
- data/lib/nokolexbor/2.7/nokolexbor.bundle +0 -0
- data/lib/nokolexbor/3.0/nokolexbor.bundle +0 -0
- data/lib/nokolexbor/3.1/nokolexbor.bundle +0 -0
- data/lib/nokolexbor/3.2/nokolexbor.bundle +0 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +367 -18
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06e747c8e64eddc8246e3bdf6aedcb64b230f6880092baaffe706149a5351c10
|
4
|
+
data.tar.gz: ca2f344f83e4708c513a716711683834084908fc8da2c515c6dc0d74956f5123
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fd46ba8e840166d8bf583c3a41ae5f11b2acf3380d6eddd40c0d8ebd6e753510fc528437f7f0e8281ef07d9430014871e15bae7c65ea4d627fdc3cc48a94350
|
7
|
+
data.tar.gz: 5433db940e5958a17c1c142ef172fca4b4d1745e392aaf125446f70a30bd4c8f80bef70f7182e66dcca756a5afa7ffd672554ca99f169bf15e409d1d92b2a300
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nokolexbor/document.rb
CHANGED
@@ -2,6 +2,33 @@
|
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
4
|
class Document < Nokolexbor::Node
|
5
|
+
# Create an {Element} with +name+ belonging to this document, optionally setting contents or
|
6
|
+
# attributes.
|
7
|
+
#
|
8
|
+
# @param name [String]
|
9
|
+
# @param contents_or_attrs [#to_s, Hash]
|
10
|
+
#
|
11
|
+
# @return [Element]
|
12
|
+
#
|
13
|
+
# @example An empty element without attributes
|
14
|
+
# doc.create_element("div")
|
15
|
+
# # => <div></div>
|
16
|
+
#
|
17
|
+
# @example An element with contents
|
18
|
+
# doc.create_element("div", "contents")
|
19
|
+
# # => <div>contents</div>
|
20
|
+
#
|
21
|
+
# @example An element with attributes
|
22
|
+
# doc.create_element("div", {"class" => "container"})
|
23
|
+
# # => <div class='container'></div>
|
24
|
+
#
|
25
|
+
# @example An element with contents and attributes
|
26
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
27
|
+
# # => <div class='container'>contents</div>
|
28
|
+
#
|
29
|
+
# @example Passing a block to mutate the element
|
30
|
+
# doc.create_element("div") { |node| node["class"] = "blue" }
|
31
|
+
# # => <div class='blue'></div>
|
5
32
|
def create_element(name, *contents_or_attrs, &block)
|
6
33
|
elm = Nokolexbor::Element.new(name, self, &block)
|
7
34
|
contents_or_attrs.each do |arg|
|
@@ -11,32 +38,43 @@ module Nokolexbor
|
|
11
38
|
elm[k.to_s] = v.to_s
|
12
39
|
end
|
13
40
|
else
|
14
|
-
elm.content = arg
|
41
|
+
elm.content = arg.to_s
|
15
42
|
end
|
16
43
|
end
|
17
44
|
elm
|
18
45
|
end
|
19
46
|
|
20
|
-
# Create a Text
|
47
|
+
# Create a {Text} with +string+.
|
48
|
+
#
|
49
|
+
# @return [Text]
|
21
50
|
def create_text_node(string, &block)
|
22
51
|
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
52
|
end
|
24
53
|
|
25
|
-
# Create a CDATA
|
54
|
+
# Create a {CDATA} containing +string+.
|
55
|
+
#
|
56
|
+
# @return [CDATA]
|
26
57
|
def create_cdata(string, &block)
|
27
58
|
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
59
|
end
|
29
60
|
|
30
|
-
# Create a Comment
|
61
|
+
# Create a {Comment} containing +string+.
|
62
|
+
#
|
63
|
+
# @return [Comment]
|
31
64
|
def create_comment(string, &block)
|
32
65
|
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
66
|
end
|
34
67
|
|
35
|
-
# A reference to +self
|
68
|
+
# A reference to +self+.
|
69
|
+
#
|
70
|
+
# @return [Document]
|
36
71
|
def document
|
37
72
|
self
|
38
73
|
end
|
39
74
|
|
75
|
+
# Get the meta tag encoding for this document. If there is no meta tag, nil is returned.
|
76
|
+
#
|
77
|
+
# @return [String]
|
40
78
|
def meta_encoding
|
41
79
|
if (meta = at_css("meta[charset]"))
|
42
80
|
meta[:charset]
|
@@ -45,6 +83,15 @@ module Nokolexbor
|
|
45
83
|
end
|
46
84
|
end
|
47
85
|
|
86
|
+
# Set the meta tag encoding for this document.
|
87
|
+
#
|
88
|
+
# If an meta encoding tag is already present, its content is
|
89
|
+
# replaced with the given text.
|
90
|
+
#
|
91
|
+
# Otherwise, this method tries to create one at an appropriate
|
92
|
+
# place supplying head and/or html elements as necessary, which
|
93
|
+
# is inside a head element if any, and before any text node or
|
94
|
+
# content element (typically <body>) if any.
|
48
95
|
def meta_encoding=(encoding)
|
49
96
|
if (meta = meta_content_type)
|
50
97
|
meta["content"] = format("text/html; charset=%s", encoding)
|
@@ -2,10 +2,17 @@
|
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
4
|
class DocumentFragment < Nokolexbor::Node
|
5
|
+
# Create a {DocumentFragment} from +tags+.
|
6
|
+
#
|
7
|
+
# @return [DocumentFragment]
|
5
8
|
def self.parse(tags)
|
6
9
|
new(Nokolexbor::Document.new, tags, nil)
|
7
10
|
end
|
8
11
|
|
12
|
+
# Create a new {DocumentFragment} from +tags+.
|
13
|
+
#
|
14
|
+
# If +ctx+ is present, it is used as a context node for the
|
15
|
+
# subtree created.
|
9
16
|
def initialize(document, tags = nil, ctx = nil)
|
10
17
|
return self unless tags
|
11
18
|
|
@@ -15,6 +22,7 @@ module Nokolexbor
|
|
15
22
|
nil
|
16
23
|
end
|
17
24
|
|
25
|
+
# @return [String] The name of {DocumentFragment}
|
18
26
|
def name
|
19
27
|
"#document-fragment"
|
20
28
|
end
|
@@ -24,6 +32,9 @@ module Nokolexbor
|
|
24
32
|
alias_method :to_s, :outer_html
|
25
33
|
alias_method :serialize, :outer_html
|
26
34
|
|
35
|
+
# Create a {DocumentFragment} from +data+.
|
36
|
+
#
|
37
|
+
# @return [DocumentFragment]
|
27
38
|
def fragment(data)
|
28
39
|
document.fragment(data)
|
29
40
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -17,38 +17,51 @@ module Nokolexbor
|
|
17
17
|
DOCUMENT_FRAG_NODE = 11
|
18
18
|
NOTATION_NODE = 12
|
19
19
|
|
20
|
+
# @return [Document] The associated {Document} of this node
|
20
21
|
attr_reader :document
|
21
22
|
|
22
23
|
LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
|
23
24
|
|
25
|
+
# @return true if this is a {Comment}
|
24
26
|
def comment?
|
25
27
|
type == COMMENT_NODE
|
26
28
|
end
|
27
29
|
|
30
|
+
# @return true if this is a {CDATA}
|
28
31
|
def cdata?
|
29
32
|
type == CDATA_SECTION_NODE
|
30
33
|
end
|
31
34
|
|
35
|
+
# @return true if this is a {ProcessingInstruction}
|
32
36
|
def processing_instruction?
|
33
37
|
type == PI_NODE
|
34
38
|
end
|
35
39
|
|
40
|
+
# @return true if this is a {Text}
|
36
41
|
def text?
|
37
42
|
type == TEXT_NODE
|
38
43
|
end
|
39
44
|
|
45
|
+
# @return true if this is a {DocumentFragment}
|
40
46
|
def fragment?
|
41
47
|
type == DOCUMENT_FRAG_NODE
|
42
48
|
end
|
43
49
|
|
50
|
+
# @return true if this is an {Element}
|
44
51
|
def element?
|
45
52
|
type == ELEMENT_NODE
|
46
53
|
end
|
47
54
|
|
55
|
+
# @return true if this is a {Document}
|
48
56
|
def document?
|
49
57
|
is_a?(Nokolexbor::Document)
|
50
58
|
end
|
51
59
|
|
60
|
+
# Get a list of ancestor Node of this Node
|
61
|
+
#
|
62
|
+
# @param [String, nil] selector The selector to match ancestors
|
63
|
+
#
|
64
|
+
# @return [NodeSet] A set of matched ancestor nodes
|
52
65
|
def ancestors(selector = nil)
|
53
66
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
54
67
|
return NodeSet.new(@document) unless parent
|
@@ -71,10 +84,39 @@ module Nokolexbor
|
|
71
84
|
end)
|
72
85
|
end
|
73
86
|
|
87
|
+
# Wrap this Node with another node.
|
88
|
+
#
|
89
|
+
# @param node [String, Node] A string or a node
|
90
|
+
# - when {String}:
|
91
|
+
# The markup that is parsed and used as the wrapper. If the parsed
|
92
|
+
# fragment has multiple roots, the first root node is used as the wrapper.
|
93
|
+
# - when {Node}:
|
94
|
+
# An element that is cloned and used as the wrapper.
|
95
|
+
#
|
96
|
+
# @return [Node] +self+, to support chaining of calls.
|
97
|
+
#
|
98
|
+
# @see NodeSet#wrap
|
99
|
+
#
|
100
|
+
# @example with a {String} argument:
|
101
|
+
#
|
102
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
103
|
+
# doc.at_css('a').wrap('<div></div>')
|
104
|
+
# doc.at_css('body').inner_html
|
105
|
+
# # => "<div><a>123</a></div>"
|
106
|
+
#
|
107
|
+
# @example with a {Node} argument:
|
108
|
+
#
|
109
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
110
|
+
# doc.at_css('a').wrap(doc.create_element('div'))
|
111
|
+
# doc.at_css('body').inner_html
|
112
|
+
# # => "<div><a>123</a></div>"
|
113
|
+
#
|
74
114
|
def wrap(node)
|
75
115
|
case node
|
76
116
|
when String
|
77
117
|
new_parent = fragment(node).child
|
118
|
+
when DocumentFragment
|
119
|
+
new_parent = node.child
|
78
120
|
when Node
|
79
121
|
new_parent = node.dup
|
80
122
|
else
|
@@ -91,6 +133,13 @@ module Nokolexbor
|
|
91
133
|
self
|
92
134
|
end
|
93
135
|
|
136
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
137
|
+
#
|
138
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
139
|
+
#
|
140
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
141
|
+
#
|
142
|
+
# @see #before
|
94
143
|
def add_previous_sibling(node_or_tags)
|
95
144
|
raise ArgumentError,
|
96
145
|
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -98,6 +147,13 @@ module Nokolexbor
|
|
98
147
|
add_sibling(:previous, node_or_tags)
|
99
148
|
end
|
100
149
|
|
150
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
151
|
+
#
|
152
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
153
|
+
#
|
154
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
155
|
+
#
|
156
|
+
# @see #after
|
101
157
|
def add_next_sibling(node_or_tags)
|
102
158
|
raise ArgumentError,
|
103
159
|
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -105,11 +161,25 @@ module Nokolexbor
|
|
105
161
|
add_sibling(:next, node_or_tags)
|
106
162
|
end
|
107
163
|
|
164
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
165
|
+
#
|
166
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
167
|
+
#
|
168
|
+
# @return [Node] +self+, to support chaining of calls.
|
169
|
+
#
|
170
|
+
# @see #add_previous_sibling
|
108
171
|
def before(node_or_tags)
|
109
172
|
add_previous_sibling(node_or_tags)
|
110
173
|
self
|
111
174
|
end
|
112
175
|
|
176
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
177
|
+
#
|
178
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
179
|
+
#
|
180
|
+
# @return [Node] +self+, to support chaining of calls.
|
181
|
+
#
|
182
|
+
# @see #add_next_sibling
|
113
183
|
def after(node_or_tags)
|
114
184
|
add_next_sibling(node_or_tags)
|
115
185
|
self
|
@@ -120,11 +190,25 @@ module Nokolexbor
|
|
120
190
|
alias_method :next=, :add_next_sibling
|
121
191
|
alias_method :previous=, :add_previous_sibling
|
122
192
|
|
193
|
+
# Add +node_or_tags+ as a child of this Node.
|
194
|
+
#
|
195
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
196
|
+
#
|
197
|
+
# @return [Node] +self+, to support chaining of calls.
|
198
|
+
#
|
199
|
+
# @see #add_child
|
123
200
|
def <<(node_or_tags)
|
124
201
|
add_child(node_or_tags)
|
125
202
|
self
|
126
203
|
end
|
127
204
|
|
205
|
+
# Add +node+ as the first child of this Node.
|
206
|
+
#
|
207
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
208
|
+
#
|
209
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
210
|
+
#
|
211
|
+
# @see #add_child
|
128
212
|
def prepend_child(node)
|
129
213
|
if (first = children.first)
|
130
214
|
# Mimic the error add_child would raise.
|
@@ -136,83 +220,175 @@ module Nokolexbor
|
|
136
220
|
end
|
137
221
|
end
|
138
222
|
|
223
|
+
# Traverse self and all children.
|
224
|
+
# @yield self and all children to +block+ recursively.
|
139
225
|
def traverse(&block)
|
140
226
|
children.each { |j| j.traverse(&block) }
|
141
227
|
yield(self)
|
142
228
|
end
|
143
229
|
|
230
|
+
# @param selector [String] The selector to match
|
231
|
+
#
|
232
|
+
# @return true if this Node matches +selector+
|
144
233
|
def matches?(selector)
|
145
234
|
ancestors.last.css(selector).any? { |node| node == self }
|
146
235
|
end
|
147
236
|
|
237
|
+
# Fetch this node's attributes.
|
238
|
+
#
|
239
|
+
# @return [Hash{String => Attribute}] Hash containing attributes belonging to +self+. The hash keys are String attribute names, and the hash values are {Nokolexbor::Attribute}.
|
148
240
|
def attributes
|
149
241
|
attribute_nodes.each_with_object({}) do |node, hash|
|
150
242
|
hash[node.name] = node
|
151
243
|
end
|
152
244
|
end
|
153
245
|
|
246
|
+
# Replace this Node with +node+.
|
247
|
+
#
|
248
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
249
|
+
#
|
250
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
251
|
+
#
|
252
|
+
# @see #swap
|
154
253
|
def replace(node)
|
155
|
-
|
156
|
-
node.each { |n| add_sibling(:previous, n) }
|
157
|
-
else
|
158
|
-
add_sibling(:previous, node)
|
159
|
-
end
|
254
|
+
ret = add_sibling(:previous, node)
|
160
255
|
remove
|
256
|
+
ret
|
257
|
+
end
|
258
|
+
|
259
|
+
# Swap this Node for +node+.
|
260
|
+
#
|
261
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
262
|
+
#
|
263
|
+
# @return [Node] +self+, to support chaining of calls.
|
264
|
+
#
|
265
|
+
# @see #replace
|
266
|
+
def swap(node)
|
267
|
+
replace(node)
|
268
|
+
self
|
161
269
|
end
|
162
270
|
|
271
|
+
# Set the content of this Node.
|
272
|
+
#
|
273
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
274
|
+
#
|
275
|
+
# @see #inner_html=
|
163
276
|
def children=(node)
|
164
277
|
children.remove
|
165
|
-
|
166
|
-
node.each { |n| add_child(n) }
|
167
|
-
else
|
168
|
-
add_child(node)
|
169
|
-
end
|
278
|
+
add_child(node)
|
170
279
|
end
|
171
280
|
|
281
|
+
# Set the parent Node of this Node.
|
282
|
+
#
|
283
|
+
# @param parent_node [Node] The parent node.
|
172
284
|
def parent=(parent_node)
|
173
285
|
parent_node.add_child(self)
|
174
286
|
end
|
175
287
|
|
288
|
+
# Iterate over each attribute name and value pair of this Node.
|
289
|
+
#
|
290
|
+
# @yield [String,String] The name and value of the current attribute.
|
176
291
|
def each
|
177
292
|
attributes.each do |name, node|
|
178
293
|
yield [name, node.value]
|
179
294
|
end
|
180
295
|
end
|
181
296
|
|
297
|
+
# Create a {DocumentFragment} containing +tags+ that is relative to _this_
|
298
|
+
# context node.
|
299
|
+
#
|
300
|
+
# @return [DocumentFragment]
|
182
301
|
def fragment(tags)
|
183
302
|
Nokolexbor::DocumentFragment.new(document, tags, self)
|
184
303
|
end
|
185
304
|
|
186
305
|
alias_method :inner_html=, :children=
|
187
306
|
|
307
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
308
|
+
# selectors.
|
309
|
+
#
|
310
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#xpath} or {#nokogiri_css}.
|
311
|
+
#
|
312
|
+
# @example
|
313
|
+
# node.css('title')
|
314
|
+
# node.css('body h1.bold')
|
315
|
+
# node.css('div + p.green', 'div#one')
|
316
|
+
#
|
317
|
+
# @return [NodeSet] The matched set of Nodes.
|
318
|
+
#
|
319
|
+
# @see #xpath
|
320
|
+
# @see #nokogiri_css
|
188
321
|
def css(*args)
|
189
322
|
css_impl(args.join(', '))
|
190
323
|
end
|
191
324
|
|
325
|
+
# Like {#css}, but returns the first match.
|
326
|
+
#
|
327
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#at_xpath} or {#nokogiri_at_css}.
|
328
|
+
#
|
329
|
+
# @return [Node, nil] The first matched Node.
|
330
|
+
#
|
331
|
+
# @see #css
|
332
|
+
# @see #nokogiri_at_css
|
192
333
|
def at_css(*args)
|
193
334
|
at_css_impl(args.join(', '))
|
194
335
|
end
|
195
336
|
|
337
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
338
|
+
# selectors. It supports a mixed syntax of CSS selectors and XPath.
|
339
|
+
#
|
340
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#css}.
|
341
|
+
#
|
342
|
+
# @return [NodeSet] The matched set of Nodes.
|
343
|
+
#
|
344
|
+
# @see #css
|
196
345
|
def nokogiri_css(*args)
|
197
346
|
rules, handler, ns, _ = extract_params(args)
|
198
347
|
|
199
348
|
nokogiri_css_internal(self, rules, handler, ns)
|
200
349
|
end
|
201
350
|
|
351
|
+
# Like {#nokogiri_css}, but returns the first match.
|
352
|
+
#
|
353
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#at_css}.
|
354
|
+
#
|
355
|
+
# @return [Node, nil] The first matched Node.
|
356
|
+
#
|
357
|
+
# @see #nokogiri_at_css
|
358
|
+
# @see #at_css
|
202
359
|
def nokogiri_at_css(*args)
|
203
360
|
nokogiri_css(*args).first
|
204
361
|
end
|
205
362
|
|
363
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
364
|
+
# queries.
|
365
|
+
#
|
366
|
+
# It works the same way as {Nokogiri::Node#xpath}.
|
367
|
+
#
|
368
|
+
# @example
|
369
|
+
# node.xpath('.//title')
|
370
|
+
#
|
371
|
+
# @return [NodeSet] The matched set of Nodes.
|
206
372
|
def xpath(*args)
|
207
373
|
paths, handler, ns, binds = extract_params(args)
|
208
374
|
|
209
375
|
xpath_internal(self, paths, handler, ns, binds)
|
210
376
|
end
|
211
377
|
|
378
|
+
# Like {#xpath}, but returns the first match.
|
379
|
+
#
|
380
|
+
# It works the same way as {Nokogiri::Node#at_xpath}.
|
381
|
+
#
|
382
|
+
# @return [Node, nil] The first matched Node.
|
383
|
+
#
|
384
|
+
# @see #xpath
|
212
385
|
def at_xpath(*args)
|
213
386
|
xpath(*args).first
|
214
387
|
end
|
215
388
|
|
389
|
+
# Search this object for +paths+. +paths+ must be one or more XPath or CSS selectors.
|
390
|
+
#
|
391
|
+
# @return [NodeSet] The matched set of Nodes.
|
216
392
|
def search(*args)
|
217
393
|
paths, handler, ns, binds = extract_params(args)
|
218
394
|
|
@@ -225,6 +401,11 @@ module Nokolexbor
|
|
225
401
|
|
226
402
|
alias_method :/, :search
|
227
403
|
|
404
|
+
# Like {#search}, but returns the first match.
|
405
|
+
#
|
406
|
+
# @return [Node, nil] The first matched Node.
|
407
|
+
#
|
408
|
+
# @see #search
|
228
409
|
def at(*args)
|
229
410
|
paths, handler, ns, binds = extract_params(args)
|
230
411
|
|
@@ -237,26 +418,148 @@ module Nokolexbor
|
|
237
418
|
|
238
419
|
alias_method :%, :at
|
239
420
|
|
421
|
+
# Fetch CSS class names of a Node.
|
422
|
+
#
|
423
|
+
# This is a convenience function and is equivalent to:
|
424
|
+
#
|
425
|
+
# node.kwattr_values("class")
|
426
|
+
#
|
427
|
+
# @see #kwattr_values
|
428
|
+
# @see #add_class
|
429
|
+
# @see #append_class
|
430
|
+
# @see #remove_class
|
431
|
+
#
|
432
|
+
# @return [Array]
|
433
|
+
# The CSS classes present in the Node's "class" attribute. If the
|
434
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
435
|
+
#
|
436
|
+
# @example
|
437
|
+
# node.classes # => ["section", "title", "header"]
|
240
438
|
def classes
|
241
439
|
kwattr_values("class")
|
242
440
|
end
|
243
441
|
|
442
|
+
# Ensure CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
443
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
444
|
+
# "class" attribute are not removed. Compare with {#append_class}.
|
445
|
+
#
|
446
|
+
# This is a convenience function and is equivalent to:
|
447
|
+
#
|
448
|
+
# node.kwattr_add("class", names)
|
449
|
+
#
|
450
|
+
# @see #kwattr_add
|
451
|
+
# @see #classes
|
452
|
+
# @see #append_class
|
453
|
+
# @see #remove_class
|
454
|
+
#
|
455
|
+
# @param [String, Array<String>] names
|
456
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
457
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
458
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
459
|
+
# exists, one is created.
|
460
|
+
#
|
461
|
+
# @return [Node] +self+, to support chaining of calls.
|
462
|
+
#
|
463
|
+
# @example
|
464
|
+
# node.add_class("section") # => <div class="section"></div>
|
465
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
466
|
+
# node.add_class("section header") # => <div class="section header"></div>
|
467
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
244
468
|
def add_class(names)
|
245
469
|
kwattr_add("class", names)
|
246
470
|
end
|
247
471
|
|
472
|
+
# Add CSS classes to +self+, regardless of duplication. Compare with {#add_class}.
|
473
|
+
#
|
474
|
+
# This is a convenience function and is equivalent to:
|
475
|
+
#
|
476
|
+
# node.kwattr_append("class", names)
|
477
|
+
#
|
478
|
+
# @see #kwattr_append
|
479
|
+
# @see #classes
|
480
|
+
# @see #add_class
|
481
|
+
# @see #remove_class
|
482
|
+
#
|
483
|
+
# @return [Node] +self+, to support chaining of calls.
|
248
484
|
def append_class(names)
|
249
485
|
kwattr_append("class", names)
|
250
486
|
end
|
251
487
|
|
488
|
+
# Remove CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
489
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
490
|
+
#
|
491
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
492
|
+
# attribute is deleted from the node.
|
493
|
+
#
|
494
|
+
# This is a convenience function and is equivalent to:
|
495
|
+
#
|
496
|
+
# node.kwattr_remove("class", css_classes)
|
497
|
+
#
|
498
|
+
# @see #kwattr_remove
|
499
|
+
# @see #classes
|
500
|
+
# @see #add_class
|
501
|
+
# @see #append_class
|
502
|
+
#
|
503
|
+
# @param names [String, Array<String>]
|
504
|
+
# CSS class names to be removed from the Node's
|
505
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
506
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
507
|
+
# the "class" attribute is deleted.
|
508
|
+
#
|
509
|
+
# @return [Node] +self+, to support chaining of calls.
|
510
|
+
#
|
511
|
+
# @example
|
512
|
+
# node.remove_class("section")
|
513
|
+
# node.remove_class(["section", "float"])
|
252
514
|
def remove_class(names = nil)
|
253
515
|
kwattr_remove("class", names)
|
254
516
|
end
|
255
517
|
|
518
|
+
# Fetch values from a keyword attribute of a Node.
|
519
|
+
#
|
520
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
521
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
522
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
523
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
524
|
+
#
|
525
|
+
# @see #kwattr_add
|
526
|
+
# @#kwattr_append
|
527
|
+
# @#kwattr_remove
|
528
|
+
#
|
529
|
+
# @param attribute_name [String]
|
530
|
+
# The name of the keyword attribute to be inspected.
|
531
|
+
#
|
532
|
+
# @return [Array<String>]
|
533
|
+
# The values present in the Node's +attribute_name+ attribute. If the
|
534
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
256
535
|
def kwattr_values(attribute_name)
|
257
536
|
keywordify(attr(attribute_name) || [])
|
258
537
|
end
|
259
538
|
|
539
|
+
# Ensure that values are present in a keyword attribute.
|
540
|
+
#
|
541
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
542
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
543
|
+
# with {#kwattr_append}.
|
544
|
+
#
|
545
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
546
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
547
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
548
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
549
|
+
#
|
550
|
+
# @see #add_class
|
551
|
+
# @see #kwattr_values
|
552
|
+
# @see #kwattr_append
|
553
|
+
# @see #kwattr_remove
|
554
|
+
#
|
555
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
556
|
+
# @param keywords [String, Array<String>]
|
557
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
558
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
559
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
560
|
+
# it is created.
|
561
|
+
#
|
562
|
+
# @return [Node] +self+, to support chaining of calls.
|
260
563
|
def kwattr_add(attribute_name, keywords)
|
261
564
|
keywords = keywordify(keywords)
|
262
565
|
current_kws = kwattr_values(attribute_name)
|
@@ -265,6 +568,27 @@ module Nokolexbor
|
|
265
568
|
self
|
266
569
|
end
|
267
570
|
|
571
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
572
|
+
# {#kwattr_add}.
|
573
|
+
#
|
574
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
575
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
576
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
577
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
578
|
+
#
|
579
|
+
# @see #add_class
|
580
|
+
# @see #kwattr_values
|
581
|
+
# @see #kwattr_add
|
582
|
+
# @see #kwattr_remove
|
583
|
+
#
|
584
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
585
|
+
# @param keywords [String, Array<String>]
|
586
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
587
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
588
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
589
|
+
# it is created.
|
590
|
+
#
|
591
|
+
# @return [Node] +self+, to support chaining of calls.
|
268
592
|
def kwattr_append(attribute_name, keywords)
|
269
593
|
keywords = keywordify(keywords)
|
270
594
|
current_kws = kwattr_values(attribute_name)
|
@@ -273,6 +597,30 @@ module Nokolexbor
|
|
273
597
|
self
|
274
598
|
end
|
275
599
|
|
600
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
601
|
+
# attribute are removed, including any multiple entries.
|
602
|
+
#
|
603
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
604
|
+
# deleted from the node.
|
605
|
+
#
|
606
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
607
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
608
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
609
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
610
|
+
#
|
611
|
+
# @see #remove_class
|
612
|
+
# @see #kwattr_values
|
613
|
+
# @see #kwattr_add
|
614
|
+
# @see #kwattr_append
|
615
|
+
#
|
616
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
617
|
+
# @param keywords [String, Array<String>]
|
618
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
619
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
620
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
621
|
+
# it is created.
|
622
|
+
#
|
623
|
+
# @return [Node] +self+, to support chaining of calls.
|
276
624
|
def kwattr_remove(attribute_name, keywords)
|
277
625
|
if keywords.nil?
|
278
626
|
remove_attr(attribute_name)
|
@@ -290,6 +638,15 @@ module Nokolexbor
|
|
290
638
|
self
|
291
639
|
end
|
292
640
|
|
641
|
+
# Serialize Node and write to +io+.
|
642
|
+
def write_to(io, *options)
|
643
|
+
io.write(to_html(*options))
|
644
|
+
end
|
645
|
+
|
646
|
+
alias_method :write_html_to, :write_to
|
647
|
+
|
648
|
+
private
|
649
|
+
|
293
650
|
def keywordify(keywords)
|
294
651
|
case keywords
|
295
652
|
when Enumerable
|
@@ -302,14 +659,6 @@ module Nokolexbor
|
|
302
659
|
end
|
303
660
|
end
|
304
661
|
|
305
|
-
def write_to(io, *options)
|
306
|
-
io.write(to_html(*options))
|
307
|
-
end
|
308
|
-
|
309
|
-
alias_method :write_html_to, :write_to
|
310
|
-
|
311
|
-
private
|
312
|
-
|
313
662
|
def nokogiri_css_internal(node, rules, handler, ns)
|
314
663
|
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
315
664
|
end
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -4,6 +4,11 @@ module Nokolexbor
|
|
4
4
|
class NodeSet < Nokolexbor::Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
# Create a NodeSet with +document+ defaulting to +list+.
|
8
|
+
#
|
9
|
+
# @yield [Document]
|
10
|
+
#
|
11
|
+
# @return [Document]
|
7
12
|
def self.new(document, list = [])
|
8
13
|
obj = allocate
|
9
14
|
obj.instance_variable_set(:@document, document)
|
@@ -12,6 +17,9 @@ module Nokolexbor
|
|
12
17
|
obj
|
13
18
|
end
|
14
19
|
|
20
|
+
# Iterate over each node.
|
21
|
+
#
|
22
|
+
# @yield [Node]
|
15
23
|
def each
|
16
24
|
return to_enum unless block_given?
|
17
25
|
|
@@ -21,6 +29,11 @@ module Nokolexbor
|
|
21
29
|
self
|
22
30
|
end
|
23
31
|
|
32
|
+
# Get the first +n+ elements of the NodeSet.
|
33
|
+
#
|
34
|
+
# @param n [Numeric,nil]
|
35
|
+
#
|
36
|
+
# @return [Node,Array<Node>] {Node} if +n+ is nil, otherwise {Array<Node>}
|
24
37
|
def first(n = nil)
|
25
38
|
return self[0] unless n
|
26
39
|
|
@@ -29,14 +42,19 @@ module Nokolexbor
|
|
29
42
|
list
|
30
43
|
end
|
31
44
|
|
45
|
+
# Get the last element of the NodeSet.
|
46
|
+
#
|
47
|
+
# @return [Node,nil]
|
32
48
|
def last
|
33
49
|
self[-1]
|
34
50
|
end
|
35
51
|
|
52
|
+
# @return [Boolean] true if this NodeSet is empty.
|
36
53
|
def empty?
|
37
54
|
length == 0
|
38
55
|
end
|
39
56
|
|
57
|
+
# @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
|
40
58
|
def index(node = nil)
|
41
59
|
if node
|
42
60
|
each_with_index { |member, j| return j if member == node }
|
@@ -46,6 +64,9 @@ module Nokolexbor
|
|
46
64
|
nil
|
47
65
|
end
|
48
66
|
|
67
|
+
# Get the content of all contained Nodes.
|
68
|
+
#
|
69
|
+
# @return [String]
|
49
70
|
def content
|
50
71
|
self.map(&:content).join
|
51
72
|
end
|
@@ -54,10 +75,16 @@ module Nokolexbor
|
|
54
75
|
alias_method :inner_text, :content
|
55
76
|
alias_method :to_str, :content
|
56
77
|
|
78
|
+
# Get the inner html of all contained Nodes.
|
79
|
+
#
|
80
|
+
# @return [String]
|
57
81
|
def inner_html(*args)
|
58
82
|
self.map { |n| n.inner_html(*args) }.join
|
59
83
|
end
|
60
84
|
|
85
|
+
# Convert this NodeSet to HTML.
|
86
|
+
#
|
87
|
+
# @return [String]
|
61
88
|
def outer_html(*args)
|
62
89
|
self.map { |n| n.outer_html(*args) }.join
|
63
90
|
end
|
@@ -66,6 +93,9 @@ module Nokolexbor
|
|
66
93
|
alias_method :to_html, :outer_html
|
67
94
|
alias_method :serialize, :outer_html
|
68
95
|
|
96
|
+
# Remove all nodes in this NodeSet.
|
97
|
+
#
|
98
|
+
# @see Node#remove
|
69
99
|
def remove
|
70
100
|
self.each(&:remove)
|
71
101
|
end
|
@@ -73,22 +103,32 @@ module Nokolexbor
|
|
73
103
|
alias_method :unlink, :remove
|
74
104
|
alias_method :to_ary, :to_a
|
75
105
|
|
106
|
+
# Destroy all nodes in the NodeSet.
|
107
|
+
#
|
108
|
+
# @see Node#destroy
|
76
109
|
def destroy
|
77
110
|
self.each(&:destroy)
|
78
111
|
end
|
79
112
|
|
113
|
+
# @return [Node,nil] The last element of this NodeSet and removes it. Returns
|
114
|
+
# +nil+ if the set is empty.
|
80
115
|
def pop
|
81
116
|
return nil if length == 0
|
82
117
|
|
83
118
|
delete(last)
|
84
119
|
end
|
85
120
|
|
121
|
+
# @return [Node,nil] The first element of this NodeSet and removes it. Returns
|
122
|
+
# +nil+ if the set is empty.
|
86
123
|
def shift
|
87
124
|
return nil if length == 0
|
88
125
|
|
89
126
|
delete(first)
|
90
127
|
end
|
91
128
|
|
129
|
+
# @return [Boolean] true if two NodeSets contain the same number
|
130
|
+
# of elements and each element is equal to the corresponding
|
131
|
+
# element in the other NodeSet.
|
92
132
|
def ==(other)
|
93
133
|
return false unless other.is_a?(NodeSet)
|
94
134
|
return false unless length == other.length
|
@@ -99,6 +139,8 @@ module Nokolexbor
|
|
99
139
|
true
|
100
140
|
end
|
101
141
|
|
142
|
+
# @return [NodeSet] A new NodeSet containing all the children of all the nodes in
|
143
|
+
# the NodeSet.
|
102
144
|
def children
|
103
145
|
node_set = NodeSet.new(@document)
|
104
146
|
each do |node|
|
@@ -107,6 +149,8 @@ module Nokolexbor
|
|
107
149
|
node_set
|
108
150
|
end
|
109
151
|
|
152
|
+
# @return [NodeSet] A new NodeSet containing all the nodes in the NodeSet
|
153
|
+
# in reverse order.
|
110
154
|
def reverse
|
111
155
|
node_set = NodeSet.new(@document)
|
112
156
|
(length - 1).downto(0) do |x|
|
@@ -115,6 +159,17 @@ module Nokolexbor
|
|
115
159
|
node_set
|
116
160
|
end
|
117
161
|
|
162
|
+
# Wrap all nodes of this NodeSet with +node_or_tags+.
|
163
|
+
#
|
164
|
+
# @see Node#wrap
|
165
|
+
#
|
166
|
+
# @return [NodeSet] +self+, to support chaining.
|
167
|
+
def wrap(node_or_tags)
|
168
|
+
map { |node| node.wrap(node_or_tags) }
|
169
|
+
self
|
170
|
+
end
|
171
|
+
|
172
|
+
# (see Node#xpath)
|
118
173
|
def xpath(*args)
|
119
174
|
paths, handler, ns, binds = extract_params(args)
|
120
175
|
|
@@ -127,6 +182,7 @@ module Nokolexbor
|
|
127
182
|
end
|
128
183
|
end
|
129
184
|
|
185
|
+
# (see Node#nokogiri_css)
|
130
186
|
def nokogiri_css(*args)
|
131
187
|
rules, handler, ns, _ = extract_params(args)
|
132
188
|
paths = css_rules_to_xpath(rules, ns)
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- lib/nokolexbor/2.7/nokolexbor.bundle
|
51
51
|
- lib/nokolexbor/3.0/nokolexbor.bundle
|
52
52
|
- lib/nokolexbor/3.1/nokolexbor.bundle
|
53
|
+
- lib/nokolexbor/3.2/nokolexbor.bundle
|
53
54
|
- lib/nokolexbor/document.rb
|
54
55
|
- lib/nokolexbor/document_fragment.rb
|
55
56
|
- lib/nokolexbor/node.rb
|
@@ -61,7 +62,7 @@ homepage: https://github.com/serpapi/nokolexbor
|
|
61
62
|
licenses:
|
62
63
|
- MIT
|
63
64
|
metadata: {}
|
64
|
-
post_install_message:
|
65
|
+
post_install_message:
|
65
66
|
rdoc_options: []
|
66
67
|
require_paths:
|
67
68
|
- lib
|
@@ -72,15 +73,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
73
|
version: '2.6'
|
73
74
|
- - "<"
|
74
75
|
- !ruby/object:Gem::Version
|
75
|
-
version: 3.
|
76
|
+
version: 3.3.dev
|
76
77
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
78
|
requirements:
|
78
79
|
- - ">="
|
79
80
|
- !ruby/object:Gem::Version
|
80
81
|
version: '0'
|
81
82
|
requirements: []
|
82
|
-
rubygems_version: 3.3.
|
83
|
-
signing_key:
|
83
|
+
rubygems_version: 3.3.26
|
84
|
+
signing_key:
|
84
85
|
specification_version: 4
|
85
86
|
summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
|
86
87
|
test_files: []
|