nokolexbor 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0055757bc7b9a92e9a2d37235722ca6a6945fb438855880ce10055dfa10d0fcb
4
- data.tar.gz: b42cb00bc57ac3db09e928ff0b4ae3b3f7fb950312ab64f2606436184785dcc0
3
+ metadata.gz: 48a8709af6c858df3d9fc1e3de0254f1d44ab308722eabe05a376ecd047eb7ec
4
+ data.tar.gz: 5bc0dea72106ead6c1382ef37a730f4b5e7d8ddf273380ef31e37d4d22394106
5
5
  SHA512:
6
- metadata.gz: 5527c86fea7d9efbbea6bcc1a2fcaaad506057ccf2d22d689f0de143b6c142f9bd1e8b33686ed2be1d01c37c1b02879c35e392a0ca81489f7f9cb125e569bf33
7
- data.tar.gz: 4af797ac972b1ddfaafceec1ded5de2541daa92a366568f768f63f978bf3215e9a03295f598f4e05eea253aa2af8098ff8de33431119e7aeecad8a223de0c6f4
6
+ metadata.gz: a025216fbd78c4b399b0938b4750d1eba72b3ed7e78d511222497a94ae904e8ed04a607641ab7d3dc4002d15763b47f4b8a4631575f82be022e6a719e08d0a7d
7
+ data.tar.gz: 61a219e89f430b2d726b8a676674fc9c5dfa924c8e75e878ba4637a135b04c25dc3cb3cc8e9a437cc6c8fc8099ede8fb14d506766c40df1bba5261e5fbabeb56
@@ -22,6 +22,11 @@
22
22
  extern "C" {
23
23
  #endif
24
24
 
25
+ static size_t tmp_len;
26
+ #define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
27
+ #define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
28
+ #define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
29
+
25
30
  /*
26
31
  * Some of the basic types pointer to structures:
27
32
  */
@@ -918,7 +923,7 @@ XMLPUBFUN long XMLCALL
918
923
  xmlGetLineNo (const xmlNode *node);
919
924
  #if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED)
920
925
  XMLPUBFUN xmlChar * XMLCALL
921
- xmlGetNodePath (const xmlNode *node);
926
+ nl_xmlGetNodePath (const lxb_dom_node_t *node);
922
927
  #endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED) */
923
928
  XMLPUBFUN lxb_dom_node_t_ptr XMLCALL
924
929
  nl_xmlDocGetRootElement (const lxb_dom_document_t *doc);
@@ -1,4 +1,5 @@
1
1
  #include "nokolexbor.h"
2
+ #include "libxml/tree.h"
2
3
 
3
4
  #define SORT_NAME nl_css_result
4
5
  #define SORT_TYPE lxb_dom_node_t *
@@ -871,6 +872,9 @@ nl_node_destroy(VALUE self)
871
872
  static VALUE
872
873
  nl_node_equals(VALUE self, VALUE other)
873
874
  {
875
+ if (!rb_obj_is_kind_of(other, cNokolexborNode)) {
876
+ return false;
877
+ }
874
878
  lxb_dom_node_t *node1 = nl_rb_node_unwrap(self);
875
879
  lxb_dom_node_t *node2 = nl_rb_node_unwrap(other);
876
880
  return node1 == node2 ? Qtrue : Qfalse;
@@ -1141,6 +1145,22 @@ nl_node_source_location(VALUE self)
1141
1145
  return ULONG2NUM(node->source_location);
1142
1146
  }
1143
1147
 
1148
+ /**
1149
+ * @return [String] The path associated with this Node.
1150
+ */
1151
+ static VALUE
1152
+ nl_node_path(VALUE self)
1153
+ {
1154
+ lxb_dom_node_t *node = nl_rb_node_unwrap(self);
1155
+ char* path = nl_xmlGetNodePath(node);
1156
+ if (path == NULL) {
1157
+ return Qnil;
1158
+ }
1159
+ VALUE ret = rb_utf8_str_new_cstr(path);
1160
+ nl_xmlFree(path);
1161
+ return ret;
1162
+ }
1163
+
1144
1164
  void Init_nl_node(void)
1145
1165
  {
1146
1166
  cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
@@ -1186,6 +1206,7 @@ void Init_nl_node(void)
1186
1206
  rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
1187
1207
  rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
1188
1208
  rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
1209
+ rb_define_method(cNokolexborNode, "path", nl_node_path, 0);
1189
1210
 
1190
1211
  rb_define_alias(cNokolexborNode, "attr", "[]");
1191
1212
  rb_define_alias(cNokolexborNode, "get_attribute", "[]");
@@ -1195,6 +1216,7 @@ void Init_nl_node(void)
1195
1216
  rb_define_alias(cNokolexborNode, "delete", "remove_attr");
1196
1217
  rb_define_alias(cNokolexborNode, "elements", "element_children");
1197
1218
  rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
1219
+ rb_define_alias(cNokolexborNode, "node_name", "name");
1198
1220
  rb_define_alias(cNokolexborNode, "text", "content");
1199
1221
  rb_define_alias(cNokolexborNode, "inner_text", "content");
1200
1222
  rb_define_alias(cNokolexborNode, "to_str", "content");
@@ -157,4 +157,228 @@ nl_xmlDocGetRootElement(const lxb_dom_document_t *doc) {
157
157
  void
158
158
  nl_xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
159
159
  // Should never be called
160
+ }
161
+
162
+ /**
163
+ * xmlGetNodePath:
164
+ * @node: a node
165
+ *
166
+ * Build a structure based Path for the given node
167
+ *
168
+ * Returns the new path or NULL in case of error. The caller must free
169
+ * the returned string
170
+ */
171
+ xmlChar *
172
+ nl_xmlGetNodePath(const lxb_dom_node_t *node)
173
+ {
174
+ const lxb_dom_node_t *cur, *tmp, *next;
175
+ xmlChar *buffer = NULL, *temp;
176
+ size_t buf_len;
177
+ xmlChar *buf;
178
+ const char *sep;
179
+ const char *name;
180
+ char nametemp[100];
181
+ int occur = 0, generic;
182
+
183
+ if ((node == NULL) || (node->type == XML_NAMESPACE_DECL))
184
+ return (NULL);
185
+
186
+ buf_len = 500;
187
+ buffer = (xmlChar *) nl_xmlMallocAtomic(buf_len);
188
+ if (buffer == NULL) {
189
+ xmlTreeErrMemory("getting node path");
190
+ return (NULL);
191
+ }
192
+ buf = (xmlChar *) nl_xmlMallocAtomic(buf_len);
193
+ if (buf == NULL) {
194
+ xmlTreeErrMemory("getting node path");
195
+ nl_xmlFree(buffer);
196
+ return (NULL);
197
+ }
198
+
199
+ buffer[0] = 0;
200
+ cur = node;
201
+ do {
202
+ name = "";
203
+ sep = "?";
204
+ occur = 0;
205
+ const lxb_char_t* cur_name = NODE_NAME(cur);
206
+ const lxb_char_t* cur_ns_prefix = NODE_NS_PREFIX(cur);
207
+ if ((cur->type == LXB_DOM_NODE_TYPE_DOCUMENT) ||
208
+ (cur->type == XML_HTML_DOCUMENT_NODE)) {
209
+ if (buffer[0] == '/')
210
+ break;
211
+ sep = "/";
212
+ next = NULL;
213
+ } else if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
214
+ generic = 0;
215
+ sep = "/";
216
+ name = (const char *) cur_name;
217
+ next = cur->parent;
218
+
219
+ /*
220
+ * Thumbler index computation
221
+ * TODO: the occurrence test seems bogus for namespaced names
222
+ */
223
+ tmp = cur->prev;
224
+ while (tmp != NULL) {
225
+ if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
226
+ (generic ||
227
+ (nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
228
+ ((tmp->ns == cur->ns) ||
229
+ ((tmp->ns != NULL) && (cur->ns != NULL) &&
230
+ (nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
231
+ occur++;
232
+ tmp = tmp->prev;
233
+ }
234
+ if (occur == 0) {
235
+ tmp = cur->next;
236
+ while (tmp != NULL && occur == 0) {
237
+ if ((tmp->type == LXB_DOM_NODE_TYPE_ELEMENT) &&
238
+ (generic ||
239
+ (nl_xmlStrEqual(cur_name, NODE_NAME(tmp)) &&
240
+ ((tmp->ns == cur->ns) ||
241
+ ((tmp->ns != NULL) && (cur->ns != NULL) &&
242
+ (nl_xmlStrEqual(cur_ns_prefix, NODE_NS_PREFIX(tmp))))))))
243
+ occur++;
244
+ tmp = tmp->next;
245
+ }
246
+ if (occur != 0)
247
+ occur = 1;
248
+ } else
249
+ occur++;
250
+ } else if (cur->type == LXB_DOM_NODE_TYPE_COMMENT) {
251
+ sep = "/";
252
+ name = "comment()";
253
+ next = cur->parent;
254
+
255
+ /*
256
+ * Thumbler index computation
257
+ */
258
+ tmp = cur->prev;
259
+ while (tmp != NULL) {
260
+ if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
261
+ occur++;
262
+ tmp = tmp->prev;
263
+ }
264
+ if (occur == 0) {
265
+ tmp = cur->next;
266
+ while (tmp != NULL && occur == 0) {
267
+ if (tmp->type == LXB_DOM_NODE_TYPE_COMMENT)
268
+ occur++;
269
+ tmp = tmp->next;
270
+ }
271
+ if (occur != 0)
272
+ occur = 1;
273
+ } else
274
+ occur++;
275
+ } else if ((cur->type == LXB_DOM_NODE_TYPE_TEXT) ||
276
+ (cur->type == LXB_DOM_NODE_TYPE_CDATA_SECTION)) {
277
+ sep = "/";
278
+ name = "text()";
279
+ next = cur->parent;
280
+
281
+ /*
282
+ * Thumbler index computation
283
+ */
284
+ tmp = cur->prev;
285
+ while (tmp != NULL) {
286
+ if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
287
+ (tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
288
+ occur++;
289
+ tmp = tmp->prev;
290
+ }
291
+ /*
292
+ * Evaluate if this is the only text- or CDATA-section-node;
293
+ * if yes, then we'll get "text()", otherwise "text()[1]".
294
+ */
295
+ if (occur == 0) {
296
+ tmp = cur->next;
297
+ while (tmp != NULL) {
298
+ if ((tmp->type == LXB_DOM_NODE_TYPE_TEXT) ||
299
+ (tmp->type == LXB_DOM_NODE_TYPE_CDATA_SECTION))
300
+ {
301
+ occur = 1;
302
+ break;
303
+ }
304
+ tmp = tmp->next;
305
+ }
306
+ } else
307
+ occur++;
308
+ } else if (cur->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
309
+ sep = "/";
310
+ snprintf(nametemp, sizeof(nametemp) - 1,
311
+ "processing-instruction('%s')", (char *)cur_name);
312
+ nametemp[sizeof(nametemp) - 1] = 0;
313
+ name = nametemp;
314
+
315
+ next = cur->parent;
316
+
317
+ /*
318
+ * Thumbler index computation
319
+ */
320
+ tmp = cur->prev;
321
+ while (tmp != NULL) {
322
+ if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
323
+ (nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
324
+ occur++;
325
+ tmp = tmp->prev;
326
+ }
327
+ if (occur == 0) {
328
+ tmp = cur->next;
329
+ while (tmp != NULL && occur == 0) {
330
+ if ((tmp->type == LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) &&
331
+ (nl_xmlStrEqual(cur_name, NODE_NAME(tmp))))
332
+ occur++;
333
+ tmp = tmp->next;
334
+ }
335
+ if (occur != 0)
336
+ occur = 1;
337
+ } else
338
+ occur++;
339
+
340
+ } else if (cur->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
341
+ sep = "/@";
342
+ name = (const char *) lxb_dom_attr_qualified_name(cur, &tmp_len);
343
+ next = ((lxb_dom_attr_t_ptr)cur)->owner;
344
+ } else {
345
+ nl_xmlFree(buf);
346
+ nl_xmlFree(buffer);
347
+ return (NULL);
348
+ }
349
+
350
+ /*
351
+ * Make sure there is enough room
352
+ */
353
+ if (nl_xmlStrlen(buffer) + sizeof(nametemp) + 20 > buf_len) {
354
+ buf_len =
355
+ 2 * buf_len + nl_xmlStrlen(buffer) + sizeof(nametemp) + 20;
356
+ temp = (xmlChar *) nl_xmlRealloc(buffer, buf_len);
357
+ if (temp == NULL) {
358
+ xmlTreeErrMemory("getting node path");
359
+ nl_xmlFree(buf);
360
+ nl_xmlFree(buffer);
361
+ return (NULL);
362
+ }
363
+ buffer = temp;
364
+ temp = (xmlChar *) nl_xmlRealloc(buf, buf_len);
365
+ if (temp == NULL) {
366
+ xmlTreeErrMemory("getting node path");
367
+ nl_xmlFree(buf);
368
+ nl_xmlFree(buffer);
369
+ return (NULL);
370
+ }
371
+ buf = temp;
372
+ }
373
+ if (occur == 0)
374
+ snprintf((char *) buf, buf_len, "%s%s%s",
375
+ sep, name, (char *) buffer);
376
+ else
377
+ snprintf((char *) buf, buf_len, "%s%s[%d]%s",
378
+ sep, name, occur, (char *) buffer);
379
+ snprintf((char *) buffer, buf_len, "%s", (char *)buf);
380
+ cur = next;
381
+ } while (cur != NULL);
382
+ nl_xmlFree(buf);
383
+ return (buffer);
160
384
  }
@@ -139,12 +139,6 @@
139
139
  #define XPATH_MAX_RECURSION_DEPTH 5000
140
140
  #endif
141
141
 
142
- static size_t tmp_len;
143
-
144
- #define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
145
- #define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
146
- #define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
147
-
148
142
  /*
149
143
  * TODO:
150
144
  * There are a few spots where some tests are done which depend upon ascii
@@ -51,12 +51,20 @@ module Nokolexbor
51
51
  def element?
52
52
  type == ELEMENT_NODE
53
53
  end
54
+ alias_method :elem?, :element?
54
55
 
55
56
  # @return true if this is a {Document}
56
57
  def document?
57
58
  is_a?(Nokolexbor::Document)
58
59
  end
59
60
 
61
+ # Get the path to this node as a CSS expression
62
+ def css_path
63
+ path.split(%r{/}).filter_map do |part|
64
+ part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
65
+ end.join(" > ")
66
+ end
67
+
60
68
  # Get a list of ancestor Node of this Node
61
69
  #
62
70
  # @param [String, nil] selector The selector to match ancestors
@@ -285,6 +293,11 @@ module Nokolexbor
285
293
  parent_node.add_child(self)
286
294
  end
287
295
 
296
+ # @return true if this Node's attributes include <value>
297
+ def value?(value)
298
+ values.include?(value)
299
+ end
300
+
288
301
  # Iterate over each attribute name and value pair of this Node.
289
302
  #
290
303
  # @yield [String,String] The name and value of the current attribute.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.4.1'
4
+ VERSION = '0.5.0'
5
5
  end
@@ -1,3 +1,15 @@
1
+ diff --git a/source/lexbor/dom/interfaces/node.c b/source/lexbor/dom/interfaces/node.c
2
+ index cf6475d..9f1cf75 100755
3
+ --- a/source/lexbor/dom/interfaces/node.c
4
+ +++ b/source/lexbor/dom/interfaces/node.c
5
+ @@ -85,6 +85,7 @@ lxb_dom_node_interface_copy(lxb_dom_node_t *dst,
6
+
7
+ dst->type = src->type;
8
+ dst->user = src->user;
9
+ + dst->source_location = src->source_location;
10
+
11
+ if (dst->owner_document == src->owner_document) {
12
+ dst->local_name = src->local_name;
1
13
  diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h
2
14
  index acd0c1c..f436257 100755
3
15
  --- a/source/lexbor/dom/interfaces/node.h
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-04 00:00:00.000000000 Z
11
+ date: 2023-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler