nokolexbor 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/libxml/tree.h +6 -1
- data/ext/nokolexbor/nl_node.c +22 -0
- data/ext/nokolexbor/xml_tree.c +224 -0
- data/ext/nokolexbor/xml_xpath.c +0 -6
- data/lib/nokolexbor/node.rb +13 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0005-lexbor-add-source-location-to-node.patch +12 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a8709af6c858df3d9fc1e3de0254f1d44ab308722eabe05a376ecd047eb7ec
|
4
|
+
data.tar.gz: 5bc0dea72106ead6c1382ef37a730f4b5e7d8ddf273380ef31e37d4d22394106
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a025216fbd78c4b399b0938b4750d1eba72b3ed7e78d511222497a94ae904e8ed04a607641ab7d3dc4002d15763b47f4b8a4631575f82be022e6a719e08d0a7d
|
7
|
+
data.tar.gz: 61a219e89f430b2d726b8a676674fc9c5dfa924c8e75e878ba4637a135b04c25dc3cb3cc8e9a437cc6c8fc8099ede8fb14d506766c40df1bba5261e5fbabeb56
|
@@ -22,6 +22,11 @@
|
|
22
22
|
extern "C" {
|
23
23
|
#endif
|
24
24
|
|
25
|
+
static size_t tmp_len;
|
26
|
+
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
27
|
+
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
28
|
+
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
29
|
+
|
25
30
|
/*
|
26
31
|
* Some of the basic types pointer to structures:
|
27
32
|
*/
|
@@ -918,7 +923,7 @@ XMLPUBFUN long XMLCALL
|
|
918
923
|
xmlGetLineNo (const xmlNode *node);
|
919
924
|
#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED)
|
920
925
|
XMLPUBFUN xmlChar * XMLCALL
|
921
|
-
|
926
|
+
nl_xmlGetNodePath (const lxb_dom_node_t *node);
|
922
927
|
#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED) */
|
923
928
|
XMLPUBFUN lxb_dom_node_t_ptr XMLCALL
|
924
929
|
nl_xmlDocGetRootElement (const lxb_dom_document_t *doc);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
|
+
#include "libxml/tree.h"
|
2
3
|
|
3
4
|
#define SORT_NAME nl_css_result
|
4
5
|
#define SORT_TYPE lxb_dom_node_t *
|
@@ -871,6 +872,9 @@ nl_node_destroy(VALUE self)
|
|
871
872
|
static VALUE
|
872
873
|
nl_node_equals(VALUE self, VALUE other)
|
873
874
|
{
|
875
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNode)) {
|
876
|
+
return false;
|
877
|
+
}
|
874
878
|
lxb_dom_node_t *node1 = nl_rb_node_unwrap(self);
|
875
879
|
lxb_dom_node_t *node2 = nl_rb_node_unwrap(other);
|
876
880
|
return node1 == node2 ? Qtrue : Qfalse;
|
@@ -1141,6 +1145,22 @@ nl_node_source_location(VALUE self)
|
|
1141
1145
|
return ULONG2NUM(node->source_location);
|
1142
1146
|
}
|
1143
1147
|
|
1148
|
+
/**
|
1149
|
+
* @return [String] The path associated with this Node.
|
1150
|
+
*/
|
1151
|
+
static VALUE
|
1152
|
+
nl_node_path(VALUE self)
|
1153
|
+
{
|
1154
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
1155
|
+
char* path = nl_xmlGetNodePath(node);
|
1156
|
+
if (path == NULL) {
|
1157
|
+
return Qnil;
|
1158
|
+
}
|
1159
|
+
VALUE ret = rb_utf8_str_new_cstr(path);
|
1160
|
+
nl_xmlFree(path);
|
1161
|
+
return ret;
|
1162
|
+
}
|
1163
|
+
|
1144
1164
|
void Init_nl_node(void)
|
1145
1165
|
{
|
1146
1166
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
@@ -1186,6 +1206,7 @@ void Init_nl_node(void)
|
|
1186
1206
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
1187
1207
|
rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
|
1188
1208
|
rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
|
1209
|
+
rb_define_method(cNokolexborNode, "path", nl_node_path, 0);
|
1189
1210
|
|
1190
1211
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
1191
1212
|
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
@@ -1195,6 +1216,7 @@ void Init_nl_node(void)
|
|
1195
1216
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
1196
1217
|
rb_define_alias(cNokolexborNode, "elements", "element_children");
|
1197
1218
|
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
1219
|
+
rb_define_alias(cNokolexborNode, "node_name", "name");
|
1198
1220
|
rb_define_alias(cNokolexborNode, "text", "content");
|
1199
1221
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
1200
1222
|
rb_define_alias(cNokolexborNode, "to_str", "content");
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -157,4 +157,228 @@ nl_xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
|
157
157
|
void
|
158
158
|
nl_xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
|
159
159
|
// Should never be called
|
160
|
+
}
|
161
|
+
|
162
|
+
/**
|
163
|
+
* xmlGetNodePath:
|
164
|
+
* @node: a node
|
165
|
+
*
|
166
|
+
* Build a structure based Path for the given node
|
167
|
+
*
|
168
|
+
* Returns the new path or NULL in case of error. The caller must free
|
169
|
+
* the returned string
|
170
|
+
*/
|
171
|
+
xmlChar *
|
172
|
+
nl_xmlGetNodePath(const lxb_dom_node_t *node)
|
173
|
+
{
|
174
|
+
const lxb_dom_node_t *cur, *tmp, *next;
|
175
|
+
xmlChar *buffer = NULL, *temp;
|
176
|
+
size_t buf_len;
|
177
|
+
xmlChar *buf;
|
178
|
+
const char *sep;
|
179
|
+
const char *name;
|
180
|
+
char nametemp[100];
|
181
|
+
int occur = 0, generic;
|
182
|
+
|
183
|
+
if ((node == NULL) || (node->type == XML_NAMESPACE_DECL))
|
184
|
+
return (NULL);
|
185
|
+
|
186
|
+
buf_len = 500;
|
187
|
+
buffer = (xmlChar *) nl_xmlMallocAtomic(buf_len);
|
188
|
+
if (buffer == NULL) {
|
189
|
+
xmlTreeErrMemory("getting node path");
|
190
|
+
return (NULL);
|
191
|
+
}
|
192
|
+
buf = (xmlChar *) nl_xmlMallocAtomic(buf_len);
|
193
|
+
if (buf == NULL) {
|
194
|
+
xmlTreeErrMemory("getting node path");
|
195
|
+
nl_xmlFree(buffer);
|
196
|
+
return (NULL);
|
197
|
+
}
|
198
|
+
|
199
|
+
buffer[0] = 0;
|
200
|
+
cur = node;
|
201
|
+
do {
|
202
|
+
name = "";
|
203
|
+
sep = "?";
|
204
|
+
occur = 0;
|
205
|
+
const lxb_char_t* cur_name = NODE_NAME(cur);
|
206
|
+
const lxb_char_t* cur_ns_prefix = NODE_NS_PREFIX(cur);
|
207
|
+
if ((cur->type == LXB_DOM_NODE_TYPE_DOCUMENT) ||
|
208
|
+
(cur->type == XML_HTML_DOCUMENT_NODE)) {
|
209
|
+
if (buffer[0] == '/')
|
210
|
+
break;
|
211
|
+
sep = "/";
|
212
|
+
next = NULL;
|
213
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
214
|
+
generic = 0;
|
215
|
+
sep = "/";
|
216
|
+
name = (const char *) cur_name;
|
217
|
+
next = cur->parent;
|
218
|
+
|
219
|
+
/*
|
220
|
+
* Thumbler index computation
|
221
|
+
* TODO: the occurrence test seems bogus for namespaced names
|
222
|
+
*/
|
223
|
+
tmp = cur->prev;
|
224
|
+
while (tmp != NULL) {
|
225
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
|
226
|
+
(generic ||
|
227
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
|
228
|
+
((tmp->ns == cur->ns) ||
|
229
|
+
((tmp->ns != NULL) && (cur->ns != NULL) &&
|
230
|
+
(nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
|
231
|
+
occur++;
|
232
|
+
tmp = tmp->prev;
|
233
|
+
}
|
234
|
+
if (occur == 0) {
|
235
|
+
tmp = cur->next;
|
236
|
+
while (tmp != NULL && occur == 0) {
|
237
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
|
238
|
+
(generic ||
|
239
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
|
240
|
+
((tmp->ns == cur->ns) ||
|
241
|
+
((tmp->ns != NULL) && (cur->ns != NULL) &&
|
242
|
+
(nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
|
243
|
+
occur++;
|
244
|
+
tmp = tmp->next;
|
245
|
+
}
|
246
|
+
if (occur != 0)
|
247
|
+
occur = 1;
|
248
|
+
} else
|
249
|
+
occur++;
|
250
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_COMMENT) {
|
251
|
+
sep = "/";
|
252
|
+
name = "comment()";
|
253
|
+
next = cur->parent;
|
254
|
+
|
255
|
+
/*
|
256
|
+
* Thumbler index computation
|
257
|
+
*/
|
258
|
+
tmp = cur->prev;
|
259
|
+
while (tmp != NULL) {
|
260
|
+
if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
|
261
|
+
occur++;
|
262
|
+
tmp = tmp->prev;
|
263
|
+
}
|
264
|
+
if (occur == 0) {
|
265
|
+
tmp = cur->next;
|
266
|
+
while (tmp != NULL && occur == 0) {
|
267
|
+
if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
|
268
|
+
occur++;
|
269
|
+
tmp = tmp->next;
|
270
|
+
}
|
271
|
+
if (occur != 0)
|
272
|
+
occur = 1;
|
273
|
+
} else
|
274
|
+
occur++;
|
275
|
+
} else if ((cur->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
276
|
+
(cur->type == LXB_DOM_NODE_TYPE_CDATA_SECTION)) {
|
277
|
+
sep = "/";
|
278
|
+
name = "text()";
|
279
|
+
next = cur->parent;
|
280
|
+
|
281
|
+
/*
|
282
|
+
* Thumbler index computation
|
283
|
+
*/
|
284
|
+
tmp = cur->prev;
|
285
|
+
while (tmp != NULL) {
|
286
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
287
|
+
(tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
|
288
|
+
occur++;
|
289
|
+
tmp = tmp->prev;
|
290
|
+
}
|
291
|
+
/*
|
292
|
+
* Evaluate if this is the only text- or CDATA-section-node;
|
293
|
+
* if yes, then we'll get "text()", otherwise "text()[1]".
|
294
|
+
*/
|
295
|
+
if (occur == 0) {
|
296
|
+
tmp = cur->next;
|
297
|
+
while (tmp != NULL) {
|
298
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
|
299
|
+
(tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
|
300
|
+
{
|
301
|
+
occur = 1;
|
302
|
+
break;
|
303
|
+
}
|
304
|
+
tmp = tmp->next;
|
305
|
+
}
|
306
|
+
} else
|
307
|
+
occur++;
|
308
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
|
309
|
+
sep = "/";
|
310
|
+
snprintf(nametemp, sizeof(nametemp) - 1,
|
311
|
+
"processing-instruction('%s')", (char *)cur_name);
|
312
|
+
nametemp[sizeof(nametemp) - 1] = 0;
|
313
|
+
name = nametemp;
|
314
|
+
|
315
|
+
next = cur->parent;
|
316
|
+
|
317
|
+
/*
|
318
|
+
* Thumbler index computation
|
319
|
+
*/
|
320
|
+
tmp = cur->prev;
|
321
|
+
while (tmp != NULL) {
|
322
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
|
323
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
|
324
|
+
occur++;
|
325
|
+
tmp = tmp->prev;
|
326
|
+
}
|
327
|
+
if (occur == 0) {
|
328
|
+
tmp = cur->next;
|
329
|
+
while (tmp != NULL && occur == 0) {
|
330
|
+
if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
|
331
|
+
(nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
|
332
|
+
occur++;
|
333
|
+
tmp = tmp->next;
|
334
|
+
}
|
335
|
+
if (occur != 0)
|
336
|
+
occur = 1;
|
337
|
+
} else
|
338
|
+
occur++;
|
339
|
+
|
340
|
+
} else if (cur->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
341
|
+
sep = "/@";
|
342
|
+
name = (const char *) lxb_dom_attr_qualified_name(cur, &tmp_len);
|
343
|
+
next = ((lxb_dom_attr_t_ptr)cur)->owner;
|
344
|
+
} else {
|
345
|
+
nl_xmlFree(buf);
|
346
|
+
nl_xmlFree(buffer);
|
347
|
+
return (NULL);
|
348
|
+
}
|
349
|
+
|
350
|
+
/*
|
351
|
+
* Make sure there is enough room
|
352
|
+
*/
|
353
|
+
if (nl_xmlStrlen(buffer) + sizeof(nametemp) + 20 > buf_len) {
|
354
|
+
buf_len =
|
355
|
+
2 * buf_len + nl_xmlStrlen(buffer) + sizeof(nametemp) + 20;
|
356
|
+
temp = (xmlChar *) nl_xmlRealloc(buffer, buf_len);
|
357
|
+
if (temp == NULL) {
|
358
|
+
xmlTreeErrMemory("getting node path");
|
359
|
+
nl_xmlFree(buf);
|
360
|
+
nl_xmlFree(buffer);
|
361
|
+
return (NULL);
|
362
|
+
}
|
363
|
+
buffer = temp;
|
364
|
+
temp = (xmlChar *) nl_xmlRealloc(buf, buf_len);
|
365
|
+
if (temp == NULL) {
|
366
|
+
xmlTreeErrMemory("getting node path");
|
367
|
+
nl_xmlFree(buf);
|
368
|
+
nl_xmlFree(buffer);
|
369
|
+
return (NULL);
|
370
|
+
}
|
371
|
+
buf = temp;
|
372
|
+
}
|
373
|
+
if (occur == 0)
|
374
|
+
snprintf((char *) buf, buf_len, "%s%s%s",
|
375
|
+
sep, name, (char *) buffer);
|
376
|
+
else
|
377
|
+
snprintf((char *) buf, buf_len, "%s%s[%d]%s",
|
378
|
+
sep, name, occur, (char *) buffer);
|
379
|
+
snprintf((char *) buffer, buf_len, "%s", (char *)buf);
|
380
|
+
cur = next;
|
381
|
+
} while (cur != NULL);
|
382
|
+
nl_xmlFree(buf);
|
383
|
+
return (buffer);
|
160
384
|
}
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -139,12 +139,6 @@
|
|
139
139
|
#define XPATH_MAX_RECURSION_DEPTH 5000
|
140
140
|
#endif
|
141
141
|
|
142
|
-
static size_t tmp_len;
|
143
|
-
|
144
|
-
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
145
|
-
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
146
|
-
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
147
|
-
|
148
142
|
/*
|
149
143
|
* TODO:
|
150
144
|
* There are a few spots where some tests are done which depend upon ascii
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -51,12 +51,20 @@ module Nokolexbor
|
|
51
51
|
def element?
|
52
52
|
type == ELEMENT_NODE
|
53
53
|
end
|
54
|
+
alias_method :elem?, :element?
|
54
55
|
|
55
56
|
# @return true if this is a {Document}
|
56
57
|
def document?
|
57
58
|
is_a?(Nokolexbor::Document)
|
58
59
|
end
|
59
60
|
|
61
|
+
# Get the path to this node as a CSS expression
|
62
|
+
def css_path
|
63
|
+
path.split(%r{/}).filter_map do |part|
|
64
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
65
|
+
end.join(" > ")
|
66
|
+
end
|
67
|
+
|
60
68
|
# Get a list of ancestor Node of this Node
|
61
69
|
#
|
62
70
|
# @param [String, nil] selector The selector to match ancestors
|
@@ -285,6 +293,11 @@ module Nokolexbor
|
|
285
293
|
parent_node.add_child(self)
|
286
294
|
end
|
287
295
|
|
296
|
+
# @return true if this Node's attributes include <value>
|
297
|
+
def value?(value)
|
298
|
+
values.include?(value)
|
299
|
+
end
|
300
|
+
|
288
301
|
# Iterate over each attribute name and value pair of this Node.
|
289
302
|
#
|
290
303
|
# @yield [String,String] The name and value of the current attribute.
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
diff --git a/source/lexbor/dom/interfaces/node.c b/source/lexbor/dom/interfaces/node.c
|
2
|
+
index cf6475d..9f1cf75 100755
|
3
|
+
--- a/source/lexbor/dom/interfaces/node.c
|
4
|
+
+++ b/source/lexbor/dom/interfaces/node.c
|
5
|
+
@@ -85,6 +85,7 @@ lxb_dom_node_interface_copy(lxb_dom_node_t *dst,
|
6
|
+
|
7
|
+
dst->type = src->type;
|
8
|
+
dst->user = src->user;
|
9
|
+
+ dst->source_location = src->source_location;
|
10
|
+
|
11
|
+
if (dst->owner_document == src->owner_document) {
|
12
|
+
dst->local_name = src->local_name;
|
1
13
|
diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h
|
2
14
|
index acd0c1c..f436257 100755
|
3
15
|
--- a/source/lexbor/dom/interfaces/node.h
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|