nokolexbor 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/libxml/tree.h +6 -1
- data/ext/nokolexbor/nl_node.c +22 -0
- data/ext/nokolexbor/xml_tree.c +224 -0
- data/ext/nokolexbor/xml_xpath.c +0 -6
- data/lib/nokolexbor/node.rb +13 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a8709af6c858df3d9fc1e3de0254f1d44ab308722eabe05a376ecd047eb7ec
|
4
|
+
data.tar.gz: 5bc0dea72106ead6c1382ef37a730f4b5e7d8ddf273380ef31e37d4d22394106
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a025216fbd78c4b399b0938b4750d1eba72b3ed7e78d511222497a94ae904e8ed04a607641ab7d3dc4002d15763b47f4b8a4631575f82be022e6a719e08d0a7d
|
7
|
+
data.tar.gz: 61a219e89f430b2d726b8a676674fc9c5dfa924c8e75e878ba4637a135b04c25dc3cb3cc8e9a437cc6c8fc8099ede8fb14d506766c40df1bba5261e5fbabeb56
|
@@ -22,6 +22,11 @@
|
|
22
22
|
extern "C" {
|
23
23
|
#endif
|
24
24
|
|
25
|
+
static size_t tmp_len;
|
26
|
+
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
27
|
+
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
28
|
+
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
29
|
+
|
25
30
|
/*
|
26
31
|
* Some of the basic types pointer to structures:
|
27
32
|
*/
|
@@ -918,7 +923,7 @@ XMLPUBFUN long XMLCALL
|
|
918
923
|
xmlGetLineNo (const xmlNode *node);
|
919
924
|
#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED)
|
920
925
|
XMLPUBFUN xmlChar * XMLCALL
|
921
|
-
|
926
|
+
nl_xmlGetNodePath (const lxb_dom_node_t *node);
|
922
927
|
#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED) */
|
923
928
|
XMLPUBFUN lxb_dom_node_t_ptr XMLCALL
|
924
929
|
nl_xmlDocGetRootElement (const lxb_dom_document_t *doc);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
|
+
#include "libxml/tree.h"
|
2
3
|
|
3
4
|
#define SORT_NAME nl_css_result
|
4
5
|
#define SORT_TYPE lxb_dom_node_t *
|
@@ -871,6 +872,9 @@ nl_node_destroy(VALUE self)
|
|
871
872
|
static VALUE
|
872
873
|
nl_node_equals(VALUE self, VALUE other)
|
873
874
|
{
|
875
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNode)) {
|
876
|
+
return false;
|
877
|
+
}
|
874
878
|
lxb_dom_node_t *node1 = nl_rb_node_unwrap(self);
|
875
879
|
lxb_dom_node_t *node2 = nl_rb_node_unwrap(other);
|
876
880
|
return node1 == node2 ? Qtrue : Qfalse;
|
@@ -1141,6 +1145,22 @@ nl_node_source_location(VALUE self)
|
|
1141
1145
|
return ULONG2NUM(node->source_location);
|
1142
1146
|
}
|
1143
1147
|
|
1148
|
+
/**
|
1149
|
+
* @return [String] The path associated with this Node.
|
1150
|
+
*/
|
1151
|
+
static VALUE
|
1152
|
+
nl_node_path(VALUE self)
|
1153
|
+
{
|
1154
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
1155
|
+
char* path = nl_xmlGetNodePath(node);
|
1156
|
+
if (path == NULL) {
|
1157
|
+
return Qnil;
|
1158
|
+
}
|
1159
|
+
VALUE ret = rb_utf8_str_new_cstr(path);
|
1160
|
+
nl_xmlFree(path);
|
1161
|
+
return ret;
|
1162
|
+
}
|
1163
|
+
|
1144
1164
|
void Init_nl_node(void)
|
1145
1165
|
{
|
1146
1166
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
@@ -1186,6 +1206,7 @@ void Init_nl_node(void)
|
|
1186
1206
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
1187
1207
|
rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
|
1188
1208
|
rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
|
1209
|
+
rb_define_method(cNokolexborNode, "path", nl_node_path, 0);
|
1189
1210
|
|
1190
1211
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
1191
1212
|
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
@@ -1195,6 +1216,7 @@ void Init_nl_node(void)
|
|
1195
1216
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
1196
1217
|
rb_define_alias(cNokolexborNode, "elements", "element_children");
|
1197
1218
|
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
1219
|
+
rb_define_alias(cNokolexborNode, "node_name", "name");
|
1198
1220
|
rb_define_alias(cNokolexborNode, "text", "content");
|
1199
1221
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
1200
1222
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -157,4 +157,228 @@ nl_xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
|
157
157
|
void
|
158
158
|
nl_xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
|
159
159
|
// Should never be called
|
160
|
+
}
|
161
|
+
|
162
|
+
/**
|
163
|
+
* xmlGetNodePath:
|
164
|
+
* @node: a node
|
165
|
+
*
|
166
|
+
* Build a structure based Path for the given node
|
167
|
+
*
|
168
|
+
* Returns the new path or NULL in case of error. The caller must free
|
169
|
+
* the returned string
|
170
|
+
*/
|
171
|
+
xmlChar *
|
172
|
+
nl_xmlGetNodePath(const lxb_dom_node_t *node)
|
173
|
+
{
|
174
|
+
const lxb_dom_node_t *cur, *tmp, *next;
|
175
|
+
xmlChar *buffer = NULL, *temp;
|
176
|
+
size_t buf_len;
|
177
|
+
xmlChar *buf;
|
178
|
+
const char *sep;
|
179
|
+
const char *name;
|
180
|
+
char nametemp[100];
|
181
|
+
int occur = 0, generic;
|
182
|
+
|
183
|
+
if ((node == NULL) || (node->type == XML_NAMESPACE_DECL))
|
184
|
+
return (NULL);
|
185
|
+
|
186
|
+
buf_len = 500;
|
187
|
+
buffer = (xmlChar *) nl_xmlMallocAtomic(buf_len);
|
188
|
+
if (buffer == NULL) {
|
189
|
+
xmlTreeErrMemory("getting node path");
|
190
|
+
return (NULL);
|
191
|
+
}
|
192
|
+
buf = (xmlChar *) nl_xmlMallocAtomic(buf_len);
|
193
|
+
if (buf == NULL) {
|
194
|
+
xmlTreeErrMemory("getting node path");
|
195
|
+
nl_xmlFree(buffer);
|
196
|
+
return (NULL);
|
197
|
+
}
|
198
|
+
|
199
|
+
buffer[0] = 0;
|
200
|
+
cur = node;
|
201
|
+
do {
|
202
|
+
name = "";
|
203
|
+
sep = "?";
|
204
|
+
occur = 0;
|
205
|
+
const lxb_char_t* cur_name = NODE_NAME(cur);
|
206
|
+
const lxb_char_t* cur_ns_prefix = NODE_NS_PREFIX(cur);
|
207
|
+
if ((cur->type == LXB_DOM_NODE_TYPE_DOCUMENT) ||
|
208
|
+
(cur->type == XML_HTML_DOCUMENT_NODE)) {
|
209
|
+
if (buffer[0] == '/')
|
210
|
+
break;
|
211
|
+
sep = "/";
|
212
|
+
next = NULL;
|
213
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
214
|
+
generic = 0;
|
215
|
+
sep = "/";
|
216
|
+
name = (const char *) cur_name;
|
217
|
+
next = cur->parent;
|
218
|
+
|
219
|
+
/*
|
220
|
+
* Thumbler index computation
|
221
|
+
* TODO: the occurrence test seems bogus for namespaced names
|
222
|
+
*/
|
223
|
+
tmp = cur->prev;
|
224
|
+
while (tmp != NULL) {
|
225
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
|
226
|
+
(generic ||
|
227
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
|
228
|
+
((tmp->ns == cur->ns) ||
|
229
|
+
((tmp->ns != NULL) && (cur->ns != NULL) &&
|
230
|
+
(nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
|
231
|
+
occur++;
|
232
|
+
tmp = tmp->prev;
|
233
|
+
}
|
234
|
+
if (occur == 0) {
|
235
|
+
tmp = cur->next;
|
236
|
+
while (tmp != NULL && occur == 0) {
|
237
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
|
238
|
+
(generic ||
|
239
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
|
240
|
+
((tmp->ns == cur->ns) ||
|
241
|
+
((tmp->ns != NULL) && (cur->ns != NULL) &&
|
242
|
+
(nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
|
243
|
+
occur++;
|
244
|
+
tmp = tmp->next;
|
245
|
+
}
|
246
|
+
if (occur != 0)
|
247
|
+
occur = 1;
|
248
|
+
} else
|
249
|
+
occur++;
|
250
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_COMMENT) {
|
251
|
+
sep = "/";
|
252
|
+
name = "comment()";
|
253
|
+
next = cur->parent;
|
254
|
+
|
255
|
+
/*
|
256
|
+
* Thumbler index computation
|
257
|
+
*/
|
258
|
+
tmp = cur->prev;
|
259
|
+
while (tmp != NULL) {
|
260
|
+
if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
|
261
|
+
occur++;
|
262
|
+
tmp = tmp->prev;
|
263
|
+
}
|
264
|
+
if (occur == 0) {
|
265
|
+
tmp = cur->next;
|
266
|
+
while (tmp != NULL && occur == 0) {
|
267
|
+
if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
|
268
|
+
occur++;
|
269
|
+
tmp = tmp->next;
|
270
|
+
}
|
271
|
+
if (occur != 0)
|
272
|
+
occur = 1;
|
273
|
+
} else
|
274
|
+
occur++;
|
275
|
+
} else if ((cur->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
276
|
+
(cur->type == LXB_DOM_NODE_TYPE_CDATA_SECTION)) {
|
277
|
+
sep = "/";
|
278
|
+
name = "text()";
|
279
|
+
next = cur->parent;
|
280
|
+
|
281
|
+
/*
|
282
|
+
* Thumbler index computation
|
283
|
+
*/
|
284
|
+
tmp = cur->prev;
|
285
|
+
while (tmp != NULL) {
|
286
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
287
|
+
(tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
|
288
|
+
occur++;
|
289
|
+
tmp = tmp->prev;
|
290
|
+
}
|
291
|
+
/*
|
292
|
+
* Evaluate if this is the only text- or CDATA-section-node;
|
293
|
+
* if yes, then we'll get "text()", otherwise "text()[1]".
|
294
|
+
*/
|
295
|
+
if (occur == 0) {
|
296
|
+
tmp = cur->next;
|
297
|
+
while (tmp != NULL) {
|
298
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
299
|
+
(tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
|
300
|
+
{
|
301
|
+
occur = 1;
|
302
|
+
break;
|
303
|
+
}
|
304
|
+
tmp = tmp->next;
|
305
|
+
}
|
306
|
+
} else
|
307
|
+
occur++;
|
308
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
|
309
|
+
sep = "/";
|
310
|
+
snprintf(nametemp, sizeof(nametemp) - 1,
|
311
|
+
"processing-instruction('%s')", (char *)cur_name);
|
312
|
+
nametemp[sizeof(nametemp) - 1] = 0;
|
313
|
+
name = nametemp;
|
314
|
+
|
315
|
+
next = cur->parent;
|
316
|
+
|
317
|
+
/*
|
318
|
+
* Thumbler index computation
|
319
|
+
*/
|
320
|
+
tmp = cur->prev;
|
321
|
+
while (tmp != NULL) {
|
322
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
|
323
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
|
324
|
+
occur++;
|
325
|
+
tmp = tmp->prev;
|
326
|
+
}
|
327
|
+
if (occur == 0) {
|
328
|
+
tmp = cur->next;
|
329
|
+
while (tmp != NULL && occur == 0) {
|
330
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
|
331
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
|
332
|
+
occur++;
|
333
|
+
tmp = tmp->next;
|
334
|
+
}
|
335
|
+
if (occur != 0)
|
336
|
+
occur = 1;
|
337
|
+
} else
|
338
|
+
occur++;
|
339
|
+
|
340
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
341
|
+
sep = "/@";
|
342
|
+
name = (const char *) lxb_dom_attr_qualified_name(cur, &tmp_len);
|
343
|
+
next = ((lxb_dom_attr_t_ptr)cur)->owner;
|
344
|
+
} else {
|
345
|
+
nl_xmlFree(buf);
|
346
|
+
nl_xmlFree(buffer);
|
347
|
+
return (NULL);
|
348
|
+
}
|
349
|
+
|
350
|
+
/*
|
351
|
+
* Make sure there is enough room
|
352
|
+
*/
|
353
|
+
if (nl_xmlStrlen(buffer) + sizeof(nametemp) + 20 > buf_len) {
|
354
|
+
buf_len =
|
355
|
+
2 * buf_len + nl_xmlStrlen(buffer) + sizeof(nametemp) + 20;
|
356
|
+
temp = (xmlChar *) nl_xmlRealloc(buffer, buf_len);
|
357
|
+
if (temp == NULL) {
|
358
|
+
xmlTreeErrMemory("getting node path");
|
359
|
+
nl_xmlFree(buf);
|
360
|
+
nl_xmlFree(buffer);
|
361
|
+
return (NULL);
|
362
|
+
}
|
363
|
+
buffer = temp;
|
364
|
+
temp = (xmlChar *) nl_xmlRealloc(buf, buf_len);
|
365
|
+
if (temp == NULL) {
|
366
|
+
xmlTreeErrMemory("getting node path");
|
367
|
+
nl_xmlFree(buf);
|
368
|
+
nl_xmlFree(buffer);
|
369
|
+
return (NULL);
|
370
|
+
}
|
371
|
+
buf = temp;
|
372
|
+
}
|
373
|
+
if (occur == 0)
|
374
|
+
snprintf((char *) buf, buf_len, "%s%s%s",
|
375
|
+
sep, name, (char *) buffer);
|
376
|
+
else
|
377
|
+
snprintf((char *) buf, buf_len, "%s%s[%d]%s",
|
378
|
+
sep, name, occur, (char *) buffer);
|
379
|
+
snprintf((char *) buffer, buf_len, "%s", (char *)buf);
|
380
|
+
cur = next;
|
381
|
+
} while (cur != NULL);
|
382
|
+
nl_xmlFree(buf);
|
383
|
+
return (buffer);
|
160
384
|
}
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -139,12 +139,6 @@
|
|
139
139
|
#define XPATH_MAX_RECURSION_DEPTH 5000
|
140
140
|
#endif
|
141
141
|
|
142
|
-
static size_t tmp_len;
|
143
|
-
|
144
|
-
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
145
|
-
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
146
|
-
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
147
|
-
|
148
142
|
/*
|
149
143
|
* TODO:
|
150
144
|
* There are a few spots where some tests are done which depend upon ascii
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -51,12 +51,20 @@ module Nokolexbor
|
|
51
51
|
def element?
|
52
52
|
type == ELEMENT_NODE
|
53
53
|
end
|
54
|
+
alias_method :elem?, :element?
|
54
55
|
|
55
56
|
# @return true if this is a {Document}
|
56
57
|
def document?
|
57
58
|
is_a?(Nokolexbor::Document)
|
58
59
|
end
|
59
60
|
|
61
|
+
# Get the path to this node as a CSS expression
|
62
|
+
def css_path
|
63
|
+
path.split(%r{/}).filter_map do |part|
|
64
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
65
|
+
end.join(" > ")
|
66
|
+
end
|
67
|
+
|
60
68
|
# Get a list of ancestor Node of this Node
|
61
69
|
#
|
62
70
|
# @param [String, nil] selector The selector to match ancestors
|
@@ -285,6 +293,11 @@ module Nokolexbor
|
|
285
293
|
parent_node.add_child(self)
|
286
294
|
end
|
287
295
|
|
296
|
+
# @return true if this Node's attributes include <value>
|
297
|
+
def value?(value)
|
298
|
+
values.include?(value)
|
299
|
+
end
|
300
|
+
|
288
301
|
# Iterate over each attribute name and value pair of this Node.
|
289
302
|
#
|
290
303
|
# @yield [String,String] The name and value of the current attribute.
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|