nokolexbor 0.3.4-x86_64-linux → 0.3.6-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/nokolexbor/2.6/nokolexbor.so +0 -0
- data/lib/nokolexbor/2.7/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.0/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.1/nokolexbor.so +0 -0
- data/lib/nokolexbor/3.2/nokolexbor.so +0 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +367 -18
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17d2a58e0246fbce820bce1c61dee0410298417141d29cac0c2bf1a27c6f7489
|
4
|
+
data.tar.gz: 32483288bcf7f1387de0019d971b20328f10061c4366461b625e1155a7c8bfed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd247187201389bb0949c3b8e239e73f8df4c028f9eb18d0c346e0d12319582e50ae4410e205c47d1ff176517ca0eadaf983124c01fe8836c5144561952865be
|
7
|
+
data.tar.gz: acbd8d822bd2ce505bfe6e5d06458a72d8600af5e02bf789379ec995042a4b215f0747ca3658bb18cc821f2e02863021043f5f18f7a589ae1e9a564a8387aa13
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nokolexbor/document.rb
CHANGED
@@ -2,6 +2,33 @@
|
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
4
|
class Document < Nokolexbor::Node
|
5
|
+
# Create an {Element} with +name+ belonging to this document, optionally setting contents or
|
6
|
+
# attributes.
|
7
|
+
#
|
8
|
+
# @param name [String]
|
9
|
+
# @param contents_or_attrs [#to_s, Hash]
|
10
|
+
#
|
11
|
+
# @return [Element]
|
12
|
+
#
|
13
|
+
# @example An empty element without attributes
|
14
|
+
# doc.create_element("div")
|
15
|
+
# # => <div></div>
|
16
|
+
#
|
17
|
+
# @example An element with contents
|
18
|
+
# doc.create_element("div", "contents")
|
19
|
+
# # => <div>contents</div>
|
20
|
+
#
|
21
|
+
# @example An element with attributes
|
22
|
+
# doc.create_element("div", {"class" => "container"})
|
23
|
+
# # => <div class='container'></div>
|
24
|
+
#
|
25
|
+
# @example An element with contents and attributes
|
26
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
27
|
+
# # => <div class='container'>contents</div>
|
28
|
+
#
|
29
|
+
# @example Passing a block to mutate the element
|
30
|
+
# doc.create_element("div") { |node| node["class"] = "blue" }
|
31
|
+
# # => <div class='blue'></div>
|
5
32
|
def create_element(name, *contents_or_attrs, &block)
|
6
33
|
elm = Nokolexbor::Element.new(name, self, &block)
|
7
34
|
contents_or_attrs.each do |arg|
|
@@ -11,32 +38,43 @@ module Nokolexbor
|
|
11
38
|
elm[k.to_s] = v.to_s
|
12
39
|
end
|
13
40
|
else
|
14
|
-
elm.content = arg
|
41
|
+
elm.content = arg.to_s
|
15
42
|
end
|
16
43
|
end
|
17
44
|
elm
|
18
45
|
end
|
19
46
|
|
20
|
-
# Create a Text
|
47
|
+
# Create a {Text} with +string+.
|
48
|
+
#
|
49
|
+
# @return [Text]
|
21
50
|
def create_text_node(string, &block)
|
22
51
|
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
52
|
end
|
24
53
|
|
25
|
-
# Create a CDATA
|
54
|
+
# Create a {CDATA} containing +string+.
|
55
|
+
#
|
56
|
+
# @return [CDATA]
|
26
57
|
def create_cdata(string, &block)
|
27
58
|
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
59
|
end
|
29
60
|
|
30
|
-
# Create a Comment
|
61
|
+
# Create a {Comment} containing +string+.
|
62
|
+
#
|
63
|
+
# @return [Comment]
|
31
64
|
def create_comment(string, &block)
|
32
65
|
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
66
|
end
|
34
67
|
|
35
|
-
# A reference to +self
|
68
|
+
# A reference to +self+.
|
69
|
+
#
|
70
|
+
# @return [Document]
|
36
71
|
def document
|
37
72
|
self
|
38
73
|
end
|
39
74
|
|
75
|
+
# Get the meta tag encoding for this document. If there is no meta tag, nil is returned.
|
76
|
+
#
|
77
|
+
# @return [String]
|
40
78
|
def meta_encoding
|
41
79
|
if (meta = at_css("meta[charset]"))
|
42
80
|
meta[:charset]
|
@@ -45,6 +83,15 @@ module Nokolexbor
|
|
45
83
|
end
|
46
84
|
end
|
47
85
|
|
86
|
+
# Set the meta tag encoding for this document.
|
87
|
+
#
|
88
|
+
# If an meta encoding tag is already present, its content is
|
89
|
+
# replaced with the given text.
|
90
|
+
#
|
91
|
+
# Otherwise, this method tries to create one at an appropriate
|
92
|
+
# place supplying head and/or html elements as necessary, which
|
93
|
+
# is inside a head element if any, and before any text node or
|
94
|
+
# content element (typically <body>) if any.
|
48
95
|
def meta_encoding=(encoding)
|
49
96
|
if (meta = meta_content_type)
|
50
97
|
meta["content"] = format("text/html; charset=%s", encoding)
|
@@ -2,10 +2,17 @@
|
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
4
|
class DocumentFragment < Nokolexbor::Node
|
5
|
+
# Create a {DocumentFragment} from +tags+.
|
6
|
+
#
|
7
|
+
# @return [DocumentFragment]
|
5
8
|
def self.parse(tags)
|
6
9
|
new(Nokolexbor::Document.new, tags, nil)
|
7
10
|
end
|
8
11
|
|
12
|
+
# Create a new {DocumentFragment} from +tags+.
|
13
|
+
#
|
14
|
+
# If +ctx+ is present, it is used as a context node for the
|
15
|
+
# subtree created.
|
9
16
|
def initialize(document, tags = nil, ctx = nil)
|
10
17
|
return self unless tags
|
11
18
|
|
@@ -15,6 +22,7 @@ module Nokolexbor
|
|
15
22
|
nil
|
16
23
|
end
|
17
24
|
|
25
|
+
# @return [String] The name of {DocumentFragment}
|
18
26
|
def name
|
19
27
|
"#document-fragment"
|
20
28
|
end
|
@@ -24,6 +32,9 @@ module Nokolexbor
|
|
24
32
|
alias_method :to_s, :outer_html
|
25
33
|
alias_method :serialize, :outer_html
|
26
34
|
|
35
|
+
# Create a {DocumentFragment} from +data+.
|
36
|
+
#
|
37
|
+
# @return [DocumentFragment]
|
27
38
|
def fragment(data)
|
28
39
|
document.fragment(data)
|
29
40
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -17,38 +17,51 @@ module Nokolexbor
|
|
17
17
|
DOCUMENT_FRAG_NODE = 11
|
18
18
|
NOTATION_NODE = 12
|
19
19
|
|
20
|
+
# @return [Document] The associated {Document} of this node
|
20
21
|
attr_reader :document
|
21
22
|
|
22
23
|
LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
|
23
24
|
|
25
|
+
# @return true if this is a {Comment}
|
24
26
|
def comment?
|
25
27
|
type == COMMENT_NODE
|
26
28
|
end
|
27
29
|
|
30
|
+
# @return true if this is a {CDATA}
|
28
31
|
def cdata?
|
29
32
|
type == CDATA_SECTION_NODE
|
30
33
|
end
|
31
34
|
|
35
|
+
# @return true if this is a {ProcessingInstruction}
|
32
36
|
def processing_instruction?
|
33
37
|
type == PI_NODE
|
34
38
|
end
|
35
39
|
|
40
|
+
# @return true if this is a {Text}
|
36
41
|
def text?
|
37
42
|
type == TEXT_NODE
|
38
43
|
end
|
39
44
|
|
45
|
+
# @return true if this is a {DocumentFragment}
|
40
46
|
def fragment?
|
41
47
|
type == DOCUMENT_FRAG_NODE
|
42
48
|
end
|
43
49
|
|
50
|
+
# @return true if this is an {Element}
|
44
51
|
def element?
|
45
52
|
type == ELEMENT_NODE
|
46
53
|
end
|
47
54
|
|
55
|
+
# @return true if this is a {Document}
|
48
56
|
def document?
|
49
57
|
is_a?(Nokolexbor::Document)
|
50
58
|
end
|
51
59
|
|
60
|
+
# Get a list of ancestor Node of this Node
|
61
|
+
#
|
62
|
+
# @param [String, nil] selector The selector to match ancestors
|
63
|
+
#
|
64
|
+
# @return [NodeSet] A set of matched ancestor nodes
|
52
65
|
def ancestors(selector = nil)
|
53
66
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
54
67
|
return NodeSet.new(@document) unless parent
|
@@ -71,10 +84,39 @@ module Nokolexbor
|
|
71
84
|
end)
|
72
85
|
end
|
73
86
|
|
87
|
+
# Wrap this Node with another node.
|
88
|
+
#
|
89
|
+
# @param node [String, Node] A string or a node
|
90
|
+
# - when {String}:
|
91
|
+
# The markup that is parsed and used as the wrapper. If the parsed
|
92
|
+
# fragment has multiple roots, the first root node is used as the wrapper.
|
93
|
+
# - when {Node}:
|
94
|
+
# An element that is cloned and used as the wrapper.
|
95
|
+
#
|
96
|
+
# @return [Node] +self+, to support chaining of calls.
|
97
|
+
#
|
98
|
+
# @see NodeSet#wrap
|
99
|
+
#
|
100
|
+
# @example with a {String} argument:
|
101
|
+
#
|
102
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
103
|
+
# doc.at_css('a').wrap('<div></div>')
|
104
|
+
# doc.at_css('body').inner_html
|
105
|
+
# # => "<div><a>123</a></div>"
|
106
|
+
#
|
107
|
+
# @example with a {Node} argument:
|
108
|
+
#
|
109
|
+
# doc = Nokolexbor::HTML('<body><a>123</a></body>')
|
110
|
+
# doc.at_css('a').wrap(doc.create_element('div'))
|
111
|
+
# doc.at_css('body').inner_html
|
112
|
+
# # => "<div><a>123</a></div>"
|
113
|
+
#
|
74
114
|
def wrap(node)
|
75
115
|
case node
|
76
116
|
when String
|
77
117
|
new_parent = fragment(node).child
|
118
|
+
when DocumentFragment
|
119
|
+
new_parent = node.child
|
78
120
|
when Node
|
79
121
|
new_parent = node.dup
|
80
122
|
else
|
@@ -91,6 +133,13 @@ module Nokolexbor
|
|
91
133
|
self
|
92
134
|
end
|
93
135
|
|
136
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
137
|
+
#
|
138
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
139
|
+
#
|
140
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
141
|
+
#
|
142
|
+
# @see #before
|
94
143
|
def add_previous_sibling(node_or_tags)
|
95
144
|
raise ArgumentError,
|
96
145
|
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -98,6 +147,13 @@ module Nokolexbor
|
|
98
147
|
add_sibling(:previous, node_or_tags)
|
99
148
|
end
|
100
149
|
|
150
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
151
|
+
#
|
152
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
153
|
+
#
|
154
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node_or_tags+ is a {Node}), or {NodeSet} (if +node_or_tags+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
155
|
+
#
|
156
|
+
# @see #after
|
101
157
|
def add_next_sibling(node_or_tags)
|
102
158
|
raise ArgumentError,
|
103
159
|
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
@@ -105,11 +161,25 @@ module Nokolexbor
|
|
105
161
|
add_sibling(:next, node_or_tags)
|
106
162
|
end
|
107
163
|
|
164
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
165
|
+
#
|
166
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
167
|
+
#
|
168
|
+
# @return [Node] +self+, to support chaining of calls.
|
169
|
+
#
|
170
|
+
# @see #add_previous_sibling
|
108
171
|
def before(node_or_tags)
|
109
172
|
add_previous_sibling(node_or_tags)
|
110
173
|
self
|
111
174
|
end
|
112
175
|
|
176
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
177
|
+
#
|
178
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
179
|
+
#
|
180
|
+
# @return [Node] +self+, to support chaining of calls.
|
181
|
+
#
|
182
|
+
# @see #add_next_sibling
|
113
183
|
def after(node_or_tags)
|
114
184
|
add_next_sibling(node_or_tags)
|
115
185
|
self
|
@@ -120,11 +190,25 @@ module Nokolexbor
|
|
120
190
|
alias_method :next=, :add_next_sibling
|
121
191
|
alias_method :previous=, :add_previous_sibling
|
122
192
|
|
193
|
+
# Add +node_or_tags+ as a child of this Node.
|
194
|
+
#
|
195
|
+
# @param node_or_tags [Node, DocumentFragment, NodeSet, String] The node to be added.
|
196
|
+
#
|
197
|
+
# @return [Node] +self+, to support chaining of calls.
|
198
|
+
#
|
199
|
+
# @see #add_child
|
123
200
|
def <<(node_or_tags)
|
124
201
|
add_child(node_or_tags)
|
125
202
|
self
|
126
203
|
end
|
127
204
|
|
205
|
+
# Add +node+ as the first child of this Node.
|
206
|
+
#
|
207
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
208
|
+
#
|
209
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
210
|
+
#
|
211
|
+
# @see #add_child
|
128
212
|
def prepend_child(node)
|
129
213
|
if (first = children.first)
|
130
214
|
# Mimic the error add_child would raise.
|
@@ -136,83 +220,175 @@ module Nokolexbor
|
|
136
220
|
end
|
137
221
|
end
|
138
222
|
|
223
|
+
# Traverse self and all children.
|
224
|
+
# @yield self and all children to +block+ recursively.
|
139
225
|
def traverse(&block)
|
140
226
|
children.each { |j| j.traverse(&block) }
|
141
227
|
yield(self)
|
142
228
|
end
|
143
229
|
|
230
|
+
# @param selector [String] The selector to match
|
231
|
+
#
|
232
|
+
# @return true if this Node matches +selector+
|
144
233
|
def matches?(selector)
|
145
234
|
ancestors.last.css(selector).any? { |node| node == self }
|
146
235
|
end
|
147
236
|
|
237
|
+
# Fetch this node's attributes.
|
238
|
+
#
|
239
|
+
# @return [Hash{String => Attribute}] Hash containing attributes belonging to +self+. The hash keys are String attribute names, and the hash values are {Nokolexbor::Attribute}.
|
148
240
|
def attributes
|
149
241
|
attribute_nodes.each_with_object({}) do |node, hash|
|
150
242
|
hash[node.name] = node
|
151
243
|
end
|
152
244
|
end
|
153
245
|
|
246
|
+
# Replace this Node with +node+.
|
247
|
+
#
|
248
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
249
|
+
#
|
250
|
+
# @return [Node,NodeSet] The reparented {Node} (if +node+ is a {Node}), or {NodeSet} (if +node+ is a {DocumentFragment}, {NodeSet}, or {String}).
|
251
|
+
#
|
252
|
+
# @see #swap
|
154
253
|
def replace(node)
|
155
|
-
|
156
|
-
node.each { |n| add_sibling(:previous, n) }
|
157
|
-
else
|
158
|
-
add_sibling(:previous, node)
|
159
|
-
end
|
254
|
+
ret = add_sibling(:previous, node)
|
160
255
|
remove
|
256
|
+
ret
|
257
|
+
end
|
258
|
+
|
259
|
+
# Swap this Node for +node+.
|
260
|
+
#
|
261
|
+
# @param node [Node, DocumentFragment, NodeSet, String]
|
262
|
+
#
|
263
|
+
# @return [Node] +self+, to support chaining of calls.
|
264
|
+
#
|
265
|
+
# @see #replace
|
266
|
+
def swap(node)
|
267
|
+
replace(node)
|
268
|
+
self
|
161
269
|
end
|
162
270
|
|
271
|
+
# Set the content of this Node.
|
272
|
+
#
|
273
|
+
# @param node [Node, DocumentFragment, NodeSet, String] The node to be added.
|
274
|
+
#
|
275
|
+
# @see #inner_html=
|
163
276
|
def children=(node)
|
164
277
|
children.remove
|
165
|
-
|
166
|
-
node.each { |n| add_child(n) }
|
167
|
-
else
|
168
|
-
add_child(node)
|
169
|
-
end
|
278
|
+
add_child(node)
|
170
279
|
end
|
171
280
|
|
281
|
+
# Set the parent Node of this Node.
|
282
|
+
#
|
283
|
+
# @param parent_node [Node] The parent node.
|
172
284
|
def parent=(parent_node)
|
173
285
|
parent_node.add_child(self)
|
174
286
|
end
|
175
287
|
|
288
|
+
# Iterate over each attribute name and value pair of this Node.
|
289
|
+
#
|
290
|
+
# @yield [String,String] The name and value of the current attribute.
|
176
291
|
def each
|
177
292
|
attributes.each do |name, node|
|
178
293
|
yield [name, node.value]
|
179
294
|
end
|
180
295
|
end
|
181
296
|
|
297
|
+
# Create a {DocumentFragment} containing +tags+ that is relative to _this_
|
298
|
+
# context node.
|
299
|
+
#
|
300
|
+
# @return [DocumentFragment]
|
182
301
|
def fragment(tags)
|
183
302
|
Nokolexbor::DocumentFragment.new(document, tags, self)
|
184
303
|
end
|
185
304
|
|
186
305
|
alias_method :inner_html=, :children=
|
187
306
|
|
307
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
308
|
+
# selectors.
|
309
|
+
#
|
310
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#xpath} or {#nokogiri_css}.
|
311
|
+
#
|
312
|
+
# @example
|
313
|
+
# node.css('title')
|
314
|
+
# node.css('body h1.bold')
|
315
|
+
# node.css('div + p.green', 'div#one')
|
316
|
+
#
|
317
|
+
# @return [NodeSet] The matched set of Nodes.
|
318
|
+
#
|
319
|
+
# @see #xpath
|
320
|
+
# @see #nokogiri_css
|
188
321
|
def css(*args)
|
189
322
|
css_impl(args.join(', '))
|
190
323
|
end
|
191
324
|
|
325
|
+
# Like {#css}, but returns the first match.
|
326
|
+
#
|
327
|
+
# This method uses Lexbor as the selector engine. Its performance is much higher than {#at_xpath} or {#nokogiri_at_css}.
|
328
|
+
#
|
329
|
+
# @return [Node, nil] The first matched Node.
|
330
|
+
#
|
331
|
+
# @see #css
|
332
|
+
# @see #nokogiri_at_css
|
192
333
|
def at_css(*args)
|
193
334
|
at_css_impl(args.join(', '))
|
194
335
|
end
|
195
336
|
|
337
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
338
|
+
# selectors. It supports a mixed syntax of CSS selectors and XPath.
|
339
|
+
#
|
340
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#css}.
|
341
|
+
#
|
342
|
+
# @return [NodeSet] The matched set of Nodes.
|
343
|
+
#
|
344
|
+
# @see #css
|
196
345
|
def nokogiri_css(*args)
|
197
346
|
rules, handler, ns, _ = extract_params(args)
|
198
347
|
|
199
348
|
nokogiri_css_internal(self, rules, handler, ns)
|
200
349
|
end
|
201
350
|
|
351
|
+
# Like {#nokogiri_css}, but returns the first match.
|
352
|
+
#
|
353
|
+
# This method uses libxml2 as the selector engine. It works the same way as {Nokogiri::Node#at_css}.
|
354
|
+
#
|
355
|
+
# @return [Node, nil] The first matched Node.
|
356
|
+
#
|
357
|
+
# @see #nokogiri_at_css
|
358
|
+
# @see #at_css
|
202
359
|
def nokogiri_at_css(*args)
|
203
360
|
nokogiri_css(*args).first
|
204
361
|
end
|
205
362
|
|
363
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
364
|
+
# queries.
|
365
|
+
#
|
366
|
+
# It works the same way as {Nokogiri::Node#xpath}.
|
367
|
+
#
|
368
|
+
# @example
|
369
|
+
# node.xpath('.//title')
|
370
|
+
#
|
371
|
+
# @return [NodeSet] The matched set of Nodes.
|
206
372
|
def xpath(*args)
|
207
373
|
paths, handler, ns, binds = extract_params(args)
|
208
374
|
|
209
375
|
xpath_internal(self, paths, handler, ns, binds)
|
210
376
|
end
|
211
377
|
|
378
|
+
# Like {#xpath}, but returns the first match.
|
379
|
+
#
|
380
|
+
# It works the same way as {Nokogiri::Node#at_xpath}.
|
381
|
+
#
|
382
|
+
# @return [Node, nil] The first matched Node.
|
383
|
+
#
|
384
|
+
# @see #xpath
|
212
385
|
def at_xpath(*args)
|
213
386
|
xpath(*args).first
|
214
387
|
end
|
215
388
|
|
389
|
+
# Search this object for +paths+. +paths+ must be one or more XPath or CSS selectors.
|
390
|
+
#
|
391
|
+
# @return [NodeSet] The matched set of Nodes.
|
216
392
|
def search(*args)
|
217
393
|
paths, handler, ns, binds = extract_params(args)
|
218
394
|
|
@@ -225,6 +401,11 @@ module Nokolexbor
|
|
225
401
|
|
226
402
|
alias_method :/, :search
|
227
403
|
|
404
|
+
# Like {#search}, but returns the first match.
|
405
|
+
#
|
406
|
+
# @return [Node, nil] The first matched Node.
|
407
|
+
#
|
408
|
+
# @see #search
|
228
409
|
def at(*args)
|
229
410
|
paths, handler, ns, binds = extract_params(args)
|
230
411
|
|
@@ -237,26 +418,148 @@ module Nokolexbor
|
|
237
418
|
|
238
419
|
alias_method :%, :at
|
239
420
|
|
421
|
+
# Fetch CSS class names of a Node.
|
422
|
+
#
|
423
|
+
# This is a convenience function and is equivalent to:
|
424
|
+
#
|
425
|
+
# node.kwattr_values("class")
|
426
|
+
#
|
427
|
+
# @see #kwattr_values
|
428
|
+
# @see #add_class
|
429
|
+
# @see #append_class
|
430
|
+
# @see #remove_class
|
431
|
+
#
|
432
|
+
# @return [Array]
|
433
|
+
# The CSS classes present in the Node's "class" attribute. If the
|
434
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
435
|
+
#
|
436
|
+
# @example
|
437
|
+
# node.classes # => ["section", "title", "header"]
|
240
438
|
def classes
|
241
439
|
kwattr_values("class")
|
242
440
|
end
|
243
441
|
|
442
|
+
# Ensure CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
443
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
444
|
+
# "class" attribute are not removed. Compare with {#append_class}.
|
445
|
+
#
|
446
|
+
# This is a convenience function and is equivalent to:
|
447
|
+
#
|
448
|
+
# node.kwattr_add("class", names)
|
449
|
+
#
|
450
|
+
# @see #kwattr_add
|
451
|
+
# @see #classes
|
452
|
+
# @see #append_class
|
453
|
+
# @see #remove_class
|
454
|
+
#
|
455
|
+
# @param [String, Array<String>] names
|
456
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
457
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
458
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
459
|
+
# exists, one is created.
|
460
|
+
#
|
461
|
+
# @return [Node] +self+, to support chaining of calls.
|
462
|
+
#
|
463
|
+
# @example
|
464
|
+
# node.add_class("section") # => <div class="section"></div>
|
465
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
466
|
+
# node.add_class("section header") # => <div class="section header"></div>
|
467
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
244
468
|
def add_class(names)
|
245
469
|
kwattr_add("class", names)
|
246
470
|
end
|
247
471
|
|
472
|
+
# Add CSS classes to +self+, regardless of duplication. Compare with {#add_class}.
|
473
|
+
#
|
474
|
+
# This is a convenience function and is equivalent to:
|
475
|
+
#
|
476
|
+
# node.kwattr_append("class", names)
|
477
|
+
#
|
478
|
+
# @see #kwattr_append
|
479
|
+
# @see #classes
|
480
|
+
# @see #add_class
|
481
|
+
# @see #remove_class
|
482
|
+
#
|
483
|
+
# @return [Node] +self+, to support chaining of calls.
|
248
484
|
def append_class(names)
|
249
485
|
kwattr_append("class", names)
|
250
486
|
end
|
251
487
|
|
488
|
+
# Remove CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
489
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
490
|
+
#
|
491
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
492
|
+
# attribute is deleted from the node.
|
493
|
+
#
|
494
|
+
# This is a convenience function and is equivalent to:
|
495
|
+
#
|
496
|
+
# node.kwattr_remove("class", css_classes)
|
497
|
+
#
|
498
|
+
# @see #kwattr_remove
|
499
|
+
# @see #classes
|
500
|
+
# @see #add_class
|
501
|
+
# @see #append_class
|
502
|
+
#
|
503
|
+
# @param names [String, Array<String>]
|
504
|
+
# CSS class names to be removed from the Node's
|
505
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
506
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
507
|
+
# the "class" attribute is deleted.
|
508
|
+
#
|
509
|
+
# @return [Node] +self+, to support chaining of calls.
|
510
|
+
#
|
511
|
+
# @example
|
512
|
+
# node.remove_class("section")
|
513
|
+
# node.remove_class(["section", "float"])
|
252
514
|
def remove_class(names = nil)
|
253
515
|
kwattr_remove("class", names)
|
254
516
|
end
|
255
517
|
|
518
|
+
# Fetch values from a keyword attribute of a Node.
|
519
|
+
#
|
520
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
521
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
522
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
523
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
524
|
+
#
|
525
|
+
# @see #kwattr_add
|
526
|
+
# @#kwattr_append
|
527
|
+
# @#kwattr_remove
|
528
|
+
#
|
529
|
+
# @param attribute_name [String]
|
530
|
+
# The name of the keyword attribute to be inspected.
|
531
|
+
#
|
532
|
+
# @return [Array<String>]
|
533
|
+
# The values present in the Node's +attribute_name+ attribute. If the
|
534
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
256
535
|
def kwattr_values(attribute_name)
|
257
536
|
keywordify(attr(attribute_name) || [])
|
258
537
|
end
|
259
538
|
|
539
|
+
# Ensure that values are present in a keyword attribute.
|
540
|
+
#
|
541
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
542
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
543
|
+
# with {#kwattr_append}.
|
544
|
+
#
|
545
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
546
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
547
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
548
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
549
|
+
#
|
550
|
+
# @see #add_class
|
551
|
+
# @see #kwattr_values
|
552
|
+
# @see #kwattr_append
|
553
|
+
# @see #kwattr_remove
|
554
|
+
#
|
555
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
556
|
+
# @param keywords [String, Array<String>]
|
557
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
558
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
559
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
560
|
+
# it is created.
|
561
|
+
#
|
562
|
+
# @return [Node] +self+, to support chaining of calls.
|
260
563
|
def kwattr_add(attribute_name, keywords)
|
261
564
|
keywords = keywordify(keywords)
|
262
565
|
current_kws = kwattr_values(attribute_name)
|
@@ -265,6 +568,27 @@ module Nokolexbor
|
|
265
568
|
self
|
266
569
|
end
|
267
570
|
|
571
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
572
|
+
# {#kwattr_add}.
|
573
|
+
#
|
574
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
575
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
576
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
577
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
578
|
+
#
|
579
|
+
# @see #add_class
|
580
|
+
# @see #kwattr_values
|
581
|
+
# @see #kwattr_add
|
582
|
+
# @see #kwattr_remove
|
583
|
+
#
|
584
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
585
|
+
# @param keywords [String, Array<String>]
|
586
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
587
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
588
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
589
|
+
# it is created.
|
590
|
+
#
|
591
|
+
# @return [Node] +self+, to support chaining of calls.
|
268
592
|
def kwattr_append(attribute_name, keywords)
|
269
593
|
keywords = keywordify(keywords)
|
270
594
|
current_kws = kwattr_values(attribute_name)
|
@@ -273,6 +597,30 @@ module Nokolexbor
|
|
273
597
|
self
|
274
598
|
end
|
275
599
|
|
600
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
601
|
+
# attribute are removed, including any multiple entries.
|
602
|
+
#
|
603
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
604
|
+
# deleted from the node.
|
605
|
+
#
|
606
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
607
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
608
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
609
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
610
|
+
#
|
611
|
+
# @see #remove_class
|
612
|
+
# @see #kwattr_values
|
613
|
+
# @see #kwattr_add
|
614
|
+
# @see #kwattr_append
|
615
|
+
#
|
616
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
617
|
+
# @param keywords [String, Array<String>]
|
618
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
619
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
620
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
621
|
+
# it is created.
|
622
|
+
#
|
623
|
+
# @return [Node] +self+, to support chaining of calls.
|
276
624
|
def kwattr_remove(attribute_name, keywords)
|
277
625
|
if keywords.nil?
|
278
626
|
remove_attr(attribute_name)
|
@@ -290,6 +638,15 @@ module Nokolexbor
|
|
290
638
|
self
|
291
639
|
end
|
292
640
|
|
641
|
+
# Serialize Node and write to +io+.
|
642
|
+
def write_to(io, *options)
|
643
|
+
io.write(to_html(*options))
|
644
|
+
end
|
645
|
+
|
646
|
+
alias_method :write_html_to, :write_to
|
647
|
+
|
648
|
+
private
|
649
|
+
|
293
650
|
def keywordify(keywords)
|
294
651
|
case keywords
|
295
652
|
when Enumerable
|
@@ -302,14 +659,6 @@ module Nokolexbor
|
|
302
659
|
end
|
303
660
|
end
|
304
661
|
|
305
|
-
def write_to(io, *options)
|
306
|
-
io.write(to_html(*options))
|
307
|
-
end
|
308
|
-
|
309
|
-
alias_method :write_html_to, :write_to
|
310
|
-
|
311
|
-
private
|
312
|
-
|
313
662
|
def nokogiri_css_internal(node, rules, handler, ns)
|
314
663
|
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
315
664
|
end
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -4,6 +4,11 @@ module Nokolexbor
|
|
4
4
|
class NodeSet < Nokolexbor::Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
# Create a NodeSet with +document+ defaulting to +list+.
|
8
|
+
#
|
9
|
+
# @yield [Document]
|
10
|
+
#
|
11
|
+
# @return [Document]
|
7
12
|
def self.new(document, list = [])
|
8
13
|
obj = allocate
|
9
14
|
obj.instance_variable_set(:@document, document)
|
@@ -12,6 +17,9 @@ module Nokolexbor
|
|
12
17
|
obj
|
13
18
|
end
|
14
19
|
|
20
|
+
# Iterate over each node.
|
21
|
+
#
|
22
|
+
# @yield [Node]
|
15
23
|
def each
|
16
24
|
return to_enum unless block_given?
|
17
25
|
|
@@ -21,6 +29,11 @@ module Nokolexbor
|
|
21
29
|
self
|
22
30
|
end
|
23
31
|
|
32
|
+
# Get the first +n+ elements of the NodeSet.
|
33
|
+
#
|
34
|
+
# @param n [Numeric,nil]
|
35
|
+
#
|
36
|
+
# @return [Node,Array<Node>] {Node} if +n+ is nil, otherwise {Array<Node>}
|
24
37
|
def first(n = nil)
|
25
38
|
return self[0] unless n
|
26
39
|
|
@@ -29,14 +42,19 @@ module Nokolexbor
|
|
29
42
|
list
|
30
43
|
end
|
31
44
|
|
45
|
+
# Get the last element of the NodeSet.
|
46
|
+
#
|
47
|
+
# @return [Node,nil]
|
32
48
|
def last
|
33
49
|
self[-1]
|
34
50
|
end
|
35
51
|
|
52
|
+
# @return [Boolean] true if this NodeSet is empty.
|
36
53
|
def empty?
|
37
54
|
length == 0
|
38
55
|
end
|
39
56
|
|
57
|
+
# @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
|
40
58
|
def index(node = nil)
|
41
59
|
if node
|
42
60
|
each_with_index { |member, j| return j if member == node }
|
@@ -46,6 +64,9 @@ module Nokolexbor
|
|
46
64
|
nil
|
47
65
|
end
|
48
66
|
|
67
|
+
# Get the content of all contained Nodes.
|
68
|
+
#
|
69
|
+
# @return [String]
|
49
70
|
def content
|
50
71
|
self.map(&:content).join
|
51
72
|
end
|
@@ -54,10 +75,16 @@ module Nokolexbor
|
|
54
75
|
alias_method :inner_text, :content
|
55
76
|
alias_method :to_str, :content
|
56
77
|
|
78
|
+
# Get the inner html of all contained Nodes.
|
79
|
+
#
|
80
|
+
# @return [String]
|
57
81
|
def inner_html(*args)
|
58
82
|
self.map { |n| n.inner_html(*args) }.join
|
59
83
|
end
|
60
84
|
|
85
|
+
# Convert this NodeSet to HTML.
|
86
|
+
#
|
87
|
+
# @return [String]
|
61
88
|
def outer_html(*args)
|
62
89
|
self.map { |n| n.outer_html(*args) }.join
|
63
90
|
end
|
@@ -66,6 +93,9 @@ module Nokolexbor
|
|
66
93
|
alias_method :to_html, :outer_html
|
67
94
|
alias_method :serialize, :outer_html
|
68
95
|
|
96
|
+
# Remove all nodes in this NodeSet.
|
97
|
+
#
|
98
|
+
# @see Node#remove
|
69
99
|
def remove
|
70
100
|
self.each(&:remove)
|
71
101
|
end
|
@@ -73,22 +103,32 @@ module Nokolexbor
|
|
73
103
|
alias_method :unlink, :remove
|
74
104
|
alias_method :to_ary, :to_a
|
75
105
|
|
106
|
+
# Destroy all nodes in the NodeSet.
|
107
|
+
#
|
108
|
+
# @see Node#destroy
|
76
109
|
def destroy
|
77
110
|
self.each(&:destroy)
|
78
111
|
end
|
79
112
|
|
113
|
+
# @return [Node,nil] The last element of this NodeSet and removes it. Returns
|
114
|
+
# +nil+ if the set is empty.
|
80
115
|
def pop
|
81
116
|
return nil if length == 0
|
82
117
|
|
83
118
|
delete(last)
|
84
119
|
end
|
85
120
|
|
121
|
+
# @return [Node,nil] The first element of this NodeSet and removes it. Returns
|
122
|
+
# +nil+ if the set is empty.
|
86
123
|
def shift
|
87
124
|
return nil if length == 0
|
88
125
|
|
89
126
|
delete(first)
|
90
127
|
end
|
91
128
|
|
129
|
+
# @return [Boolean] true if two NodeSets contain the same number
|
130
|
+
# of elements and each element is equal to the corresponding
|
131
|
+
# element in the other NodeSet.
|
92
132
|
def ==(other)
|
93
133
|
return false unless other.is_a?(NodeSet)
|
94
134
|
return false unless length == other.length
|
@@ -99,6 +139,8 @@ module Nokolexbor
|
|
99
139
|
true
|
100
140
|
end
|
101
141
|
|
142
|
+
# @return [NodeSet] A new NodeSet containing all the children of all the nodes in
|
143
|
+
# the NodeSet.
|
102
144
|
def children
|
103
145
|
node_set = NodeSet.new(@document)
|
104
146
|
each do |node|
|
@@ -107,6 +149,8 @@ module Nokolexbor
|
|
107
149
|
node_set
|
108
150
|
end
|
109
151
|
|
152
|
+
# @return [NodeSet] A new NodeSet containing all the nodes in the NodeSet
|
153
|
+
# in reverse order.
|
110
154
|
def reverse
|
111
155
|
node_set = NodeSet.new(@document)
|
112
156
|
(length - 1).downto(0) do |x|
|
@@ -115,6 +159,17 @@ module Nokolexbor
|
|
115
159
|
node_set
|
116
160
|
end
|
117
161
|
|
162
|
+
# Wrap all nodes of this NodeSet with +node_or_tags+.
|
163
|
+
#
|
164
|
+
# @see Node#wrap
|
165
|
+
#
|
166
|
+
# @return [NodeSet] +self+, to support chaining.
|
167
|
+
def wrap(node_or_tags)
|
168
|
+
map { |node| node.wrap(node_or_tags) }
|
169
|
+
self
|
170
|
+
end
|
171
|
+
|
172
|
+
# (see Node#xpath)
|
118
173
|
def xpath(*args)
|
119
174
|
paths, handler, ns, binds = extract_params(args)
|
120
175
|
|
@@ -127,6 +182,7 @@ module Nokolexbor
|
|
127
182
|
end
|
128
183
|
end
|
129
184
|
|
185
|
+
# (see Node#nokogiri_css)
|
130
186
|
def nokogiri_css(*args)
|
131
187
|
rules, handler, ns, _ = extract_params(args)
|
132
188
|
paths = css_rules_to_xpath(rules, ns)
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- lib/nokolexbor/2.7/nokolexbor.so
|
51
51
|
- lib/nokolexbor/3.0/nokolexbor.so
|
52
52
|
- lib/nokolexbor/3.1/nokolexbor.so
|
53
|
+
- lib/nokolexbor/3.2/nokolexbor.so
|
53
54
|
- lib/nokolexbor/document.rb
|
54
55
|
- lib/nokolexbor/document_fragment.rb
|
55
56
|
- lib/nokolexbor/node.rb
|
@@ -61,7 +62,7 @@ homepage: https://github.com/serpapi/nokolexbor
|
|
61
62
|
licenses:
|
62
63
|
- MIT
|
63
64
|
metadata: {}
|
64
|
-
post_install_message:
|
65
|
+
post_install_message:
|
65
66
|
rdoc_options: []
|
66
67
|
require_paths:
|
67
68
|
- lib
|
@@ -72,15 +73,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
73
|
version: '2.6'
|
73
74
|
- - "<"
|
74
75
|
- !ruby/object:Gem::Version
|
75
|
-
version: 3.
|
76
|
+
version: 3.3.dev
|
76
77
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
78
|
requirements:
|
78
79
|
- - ">="
|
79
80
|
- !ruby/object:Gem::Version
|
80
81
|
version: '0'
|
81
82
|
requirements: []
|
82
|
-
rubygems_version: 3.3.
|
83
|
-
signing_key:
|
83
|
+
rubygems_version: 3.3.26
|
84
|
+
signing_key:
|
84
85
|
specification_version: 4
|
85
86
|
summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
|
86
87
|
test_files: []
|