nokolexbor 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25a206e02eaf74545bf1e7abe7e43a53e38fd8626b337e8de7d6c207e352a686
4
- data.tar.gz: 2b9f7c8a339b2618dbb2de5824b249f0a8fa690713028a9e22b8627a8bb0163a
3
+ metadata.gz: 0055757bc7b9a92e9a2d37235722ca6a6945fb438855880ce10055dfa10d0fcb
4
+ data.tar.gz: b42cb00bc57ac3db09e928ff0b4ae3b3f7fb950312ab64f2606436184785dcc0
5
5
  SHA512:
6
- metadata.gz: 70e579aa1e64ae9cfcf71dd2d0bf3b77b634b69764d43c04aaa02b6db0c5b26cb992d0094ad4b2b9177ca2be46c6b32df38381224bd579bfb722c538075b436e
7
- data.tar.gz: c89997e12178393f171bb5e3735346d99ee67047fb985ec02b08e9433aadc6f18d68278a5256c99ab7174ddd436f9bb49fb97a97808923205803097b083f7ed3
6
+ metadata.gz: 5527c86fea7d9efbbea6bcc1a2fcaaad506057ccf2d22d689f0de143b6c142f9bd1e8b33686ed2be1d01c37c1b02879c35e392a0ca81489f7f9cb125e569bf33
7
+ data.tar.gz: 4af797ac972b1ddfaafceec1ded5de2541daa92a366568f768f63f978bf3215e9a03295f598f4e05eea253aa2af8098ff8de33431119e7aeecad8a223de0c6f4
@@ -802,6 +802,28 @@ nl_node_children(VALUE self)
802
802
  return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
803
803
  }
804
804
 
805
+ /**
806
+ * Get the element children of this node.
807
+ *
808
+ * @return [NodeSet] The set of this node's element children.
809
+ */
810
+ static VALUE
811
+ nl_node_element_children(VALUE self)
812
+ {
813
+ lxb_dom_node_t *node = nl_rb_node_unwrap(self);
814
+ lxb_dom_node_t *child = node->first_child;
815
+ lexbor_array_t *array = lexbor_array_create();
816
+
817
+ while (child != NULL) {
818
+ if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
819
+ lexbor_array_push(array, child);
820
+ }
821
+ child = child->next;
822
+ }
823
+
824
+ return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
825
+ }
826
+
805
827
  /**
806
828
  * Get the first child of this node.
807
829
  *
@@ -1031,20 +1053,8 @@ static VALUE
1031
1053
  nl_node_first_element_child(VALUE self)
1032
1054
  {
1033
1055
  lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
1034
- lxb_dom_node_t *cur;
1056
+ lxb_dom_node_t *cur = parent->first_child;
1035
1057
 
1036
- if (parent == NULL) {
1037
- return Qnil;
1038
- }
1039
- switch (parent->type) {
1040
- case LXB_DOM_NODE_TYPE_ELEMENT:
1041
- case LXB_DOM_NODE_TYPE_ENTITY:
1042
- case LXB_DOM_NODE_TYPE_DOCUMENT:
1043
- cur = parent->first_child;
1044
- break;
1045
- default:
1046
- return Qnil;
1047
- }
1048
1058
  while (cur != NULL) {
1049
1059
  if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
1050
1060
  return nl_rb_node_create(cur, nl_rb_document_get(self));
@@ -1061,20 +1071,8 @@ static VALUE
1061
1071
  nl_node_last_element_child(VALUE self)
1062
1072
  {
1063
1073
  lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
1064
- lxb_dom_node_t *cur;
1074
+ lxb_dom_node_t *cur = parent->last_child;
1065
1075
 
1066
- if (parent == NULL) {
1067
- return Qnil;
1068
- }
1069
- switch (parent->type) {
1070
- case LXB_DOM_NODE_TYPE_ELEMENT:
1071
- case LXB_DOM_NODE_TYPE_ENTITY:
1072
- case LXB_DOM_NODE_TYPE_DOCUMENT:
1073
- cur = parent->last_child;
1074
- break;
1075
- default:
1076
- return Qnil;
1077
- }
1078
1076
  while (cur != NULL) {
1079
1077
  if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
1080
1078
  return nl_rb_node_create(cur, nl_rb_document_get(self));
@@ -1133,6 +1131,16 @@ nl_node_inspect(int argc, VALUE *argv, VALUE self)
1133
1131
  return rb_call_super(argc, argv);
1134
1132
  }
1135
1133
 
1134
+ /**
1135
+ * @return [Integer] The node's location at the source HTML. Returns 0 if the node is not parsed from a HTML string.
1136
+ */
1137
+ static VALUE
1138
+ nl_node_source_location(VALUE self)
1139
+ {
1140
+ lxb_dom_node_t *node = nl_rb_node_unwrap(self);
1141
+ return ULONG2NUM(node->source_location);
1142
+ }
1143
+
1136
1144
  void Init_nl_node(void)
1137
1145
  {
1138
1146
  cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
@@ -1164,6 +1172,7 @@ void Init_nl_node(void)
1164
1172
  rb_define_method(cNokolexborNode, "next_element", nl_node_next_element, 0);
1165
1173
  rb_define_method(cNokolexborNode, "children", nl_node_children, 0);
1166
1174
  rb_define_method(cNokolexborNode, "child", nl_node_child, 0);
1175
+ rb_define_method(cNokolexborNode, "element_children", nl_node_element_children, 0);
1167
1176
  rb_define_method(cNokolexborNode, "remove", nl_node_remove, 0);
1168
1177
  rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
1169
1178
  rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
@@ -1176,6 +1185,7 @@ void Init_nl_node(void)
1176
1185
  rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
1177
1186
  rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
1178
1187
  rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
1188
+ rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
1179
1189
 
1180
1190
  rb_define_alias(cNokolexborNode, "attr", "[]");
1181
1191
  rb_define_alias(cNokolexborNode, "get_attribute", "[]");
@@ -1183,6 +1193,7 @@ void Init_nl_node(void)
1183
1193
  rb_define_alias(cNokolexborNode, "set_attribute", "[]=");
1184
1194
  rb_define_alias(cNokolexborNode, "has_attribute?", "key?");
1185
1195
  rb_define_alias(cNokolexborNode, "delete", "remove_attr");
1196
+ rb_define_alias(cNokolexborNode, "elements", "element_children");
1186
1197
  rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
1187
1198
  rb_define_alias(cNokolexborNode, "text", "content");
1188
1199
  rb_define_alias(cNokolexborNode, "inner_text", "content");
@@ -300,6 +300,64 @@ nl_node_set_union(VALUE self, VALUE other)
300
300
  return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
301
301
  }
302
302
 
303
+ /**
304
+ * @return [NodeSet] A new NodeSet with the common nodes only.
305
+ */
306
+ static VALUE
307
+ nl_node_set_intersection(VALUE self, VALUE other)
308
+ {
309
+ if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
310
+ rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
311
+ }
312
+
313
+ lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
314
+ lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
315
+
316
+ lexbor_array_t *new_array = lexbor_array_create();
317
+
318
+ for (size_t i = 0; i < self_array->length; i++) {
319
+ for (size_t j = 0; j < other_array->length; j++) {
320
+ if (self_array->list[i] == other_array->list[j]) {
321
+ lexbor_array_push(new_array, self_array->list[i]);
322
+ break;
323
+ }
324
+ }
325
+ }
326
+
327
+ return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
328
+ }
329
+
330
+ /**
331
+ * @return [NodeSet] A new NodeSet with the nodes in this NodeSet that aren't in +other+
332
+ */
333
+ static VALUE
334
+ nl_node_set_difference(VALUE self, VALUE other)
335
+ {
336
+ if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
337
+ rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
338
+ }
339
+
340
+ lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
341
+ lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
342
+
343
+ lexbor_array_t *new_array = lexbor_array_create();
344
+
345
+ for (size_t i = 0; i < self_array->length; i++) {
346
+ bool found = false;
347
+ for (size_t j = 0; j < other_array->length; j++) {
348
+ if (self_array->list[i] == other_array->list[j]) {
349
+ found = true;
350
+ break;
351
+ }
352
+ }
353
+ if (!found) {
354
+ lexbor_array_push(new_array, self_array->list[i]);
355
+ }
356
+ }
357
+
358
+ return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
359
+ }
360
+
303
361
  static lxb_status_t
304
362
  nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
305
363
  {
@@ -410,6 +468,8 @@ void Init_nl_node_set(void)
410
468
  rb_define_method(cNokolexborNodeSet, "[]", nl_node_set_slice, -1);
411
469
  rb_define_method(cNokolexborNodeSet, "push", nl_node_set_push, 1);
412
470
  rb_define_method(cNokolexborNodeSet, "|", nl_node_set_union, 1);
471
+ rb_define_method(cNokolexborNodeSet, "&", nl_node_set_intersection, 1);
472
+ rb_define_method(cNokolexborNodeSet, "-", nl_node_set_difference, 1);
413
473
  rb_define_method(cNokolexborNodeSet, "to_a", nl_node_set_to_array, 0);
414
474
  rb_define_method(cNokolexborNodeSet, "delete", nl_node_set_delete, 1);
415
475
  rb_define_method(cNokolexborNodeSet, "include?", nl_node_set_is_include, 1);
@@ -54,6 +54,16 @@ module Nokolexbor
54
54
  length == 0
55
55
  end
56
56
 
57
+ # Insert +node+ before the first Node in this NodeSet
58
+ def before(node)
59
+ first.before(node)
60
+ end
61
+
62
+ # Insert +node+ after the last Node in this NodeSet
63
+ def after(node)
64
+ last.after(node)
65
+ end
66
+
57
67
  # @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
58
68
  def index(node = nil)
59
69
  if node
@@ -169,6 +179,75 @@ module Nokolexbor
169
179
  self
170
180
  end
171
181
 
182
+ # Add the class attribute +name+ to all containing nodes.
183
+ #
184
+ # @see Node#add_class
185
+ def add_class(name)
186
+ each do |el|
187
+ el.add_class(name)
188
+ end
189
+ self
190
+ end
191
+
192
+ # Append the class attribute +name+ to all containing nodes.
193
+ #
194
+ # @see Node#append_class
195
+ def append_class(name)
196
+ each do |el|
197
+ el.append_class(name)
198
+ end
199
+ self
200
+ end
201
+
202
+ # Remove the class attribute +name+ from all containing nodes.
203
+ #
204
+ # @see Node#remove_class
205
+ def remove_class(name = nil)
206
+ each do |el|
207
+ el.remove_class(name)
208
+ end
209
+ self
210
+ end
211
+
212
+ # Remove the attributed named +name+ from all containing nodes.
213
+ #
214
+ # @see Node#remove_attr
215
+ def remove_attr(name)
216
+ each { |el| el.delete(name) }
217
+ self
218
+ end
219
+ alias_method :remove_attribute, :remove_attr
220
+
221
+ # Set attributes on each Node in the NodeSet, or get an
222
+ # attribute from the first Node in the NodeSet.
223
+ #
224
+ # @example Get an attribute from the first Node in a NodeSet.
225
+ # node_set.attr("href")
226
+ #
227
+ # @example Set attributes on each node.
228
+ # node_set.attr("href" => "http://example.com", "class" => "a")
229
+ # node_set.attr("href", "http://example.com")
230
+ # node_set.attr("href") { |node| "http://example.com" }
231
+ #
232
+ # @return [NodeSet] +self+, to support chaining of calls.
233
+ def attr(key, value = nil, &block)
234
+ unless key.is_a?(Hash) || (key && (value || block))
235
+ return first&.attribute(key)
236
+ end
237
+
238
+ hash = key.is_a?(Hash) ? key : { key => value }
239
+
240
+ hash.each do |k, v|
241
+ each do |node|
242
+ node[k] = v || yield(node)
243
+ end
244
+ end
245
+
246
+ self
247
+ end
248
+ alias_method :set, :attr
249
+ alias_method :attribute, :attr
250
+
172
251
  # (see Node#xpath)
173
252
  def xpath(*args)
174
253
  paths, handler, ns, binds = extract_params(args)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.3.7'
4
+ VERSION = '0.4.1'
5
5
  end
@@ -0,0 +1,24 @@
1
+ diff --git a/source/lexbor/dom/interfaces/document.c b/source/lexbor/dom/interfaces/document.c
2
+ index a2153f4..8a9c69f 100755
3
+ --- a/source/lexbor/dom/interfaces/document.c
4
+ +++ b/source/lexbor/dom/interfaces/document.c
5
+ @@ -12,6 +12,7 @@
6
+ #include "lexbor/dom/interfaces/cdata_section.h"
7
+ #include "lexbor/dom/interfaces/cdata_section.h"
8
+ #include "lexbor/dom/interfaces/processing_instruction.h"
9
+ +#include "lexbor/html/interfaces/template_element.h"
10
+
11
+
12
+ lxb_dom_document_t *
13
+ @@ -449,6 +450,11 @@ lxb_dom_document_import_node(lxb_dom_document_t *doc, lxb_dom_node_t *node,
14
+ return NULL;
15
+ }
16
+
17
+ + if (curr->local_name == LXB_TAG_TEMPLATE && curr->first_child != NULL && cnode->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
18
+ + lxb_dom_node_remove(curr->first_child);
19
+ + lxb_html_interface_template(curr)->content = cnode;
20
+ + }
21
+ +
22
+ lxb_dom_node_insert_child(curr, cnode);
23
+
24
+ if (node->first_child != NULL) {
@@ -0,0 +1,65 @@
1
+ diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h
2
+ index acd0c1c..f436257 100755
3
+ --- a/source/lexbor/dom/interfaces/node.h
4
+ +++ b/source/lexbor/dom/interfaces/node.h
5
+ @@ -46,6 +46,8 @@ struct lxb_dom_node {
6
+ uintptr_t prefix; /* lowercase: lalala */
7
+ uintptr_t ns; /* namespace */
8
+
9
+ + size_t source_location; /* char offset to the source HTML */
10
+ +
11
+ lxb_dom_document_t *owner_document;
12
+
13
+ lxb_dom_node_t *next;
14
+ diff --git a/source/lexbor/html/tokenizer.c b/source/lexbor/html/tokenizer.c
15
+ index 741bced..6343c65 100755
16
+ --- a/source/lexbor/html/tokenizer.c
17
+ +++ b/source/lexbor/html/tokenizer.c
18
+ @@ -309,6 +309,7 @@ lxb_html_tokenizer_chunk(lxb_html_tokenizer_t *tkz, const lxb_char_t *data,
19
+
20
+ tkz->is_eof = false;
21
+ tkz->status = LXB_STATUS_OK;
22
+ + tkz->first = data;
23
+ tkz->last = end;
24
+
25
+ while (data < end) {
26
+ diff --git a/source/lexbor/html/tokenizer.h b/source/lexbor/html/tokenizer.h
27
+ index ba9602f..08d0d9a 100755
28
+ --- a/source/lexbor/html/tokenizer.h
29
+ +++ b/source/lexbor/html/tokenizer.h
30
+ @@ -72,6 +72,7 @@ struct lxb_html_tokenizer {
31
+ lxb_char_t *pos;
32
+ const lxb_char_t *end;
33
+ const lxb_char_t *begin;
34
+ + const lxb_char_t *first;
35
+ const lxb_char_t *last;
36
+
37
+ /* Entities */
38
+ diff --git a/source/lexbor/html/tree.c b/source/lexbor/html/tree.c
39
+ index 8c42990..28c97cc 100755
40
+ --- a/source/lexbor/html/tree.c
41
+ +++ b/source/lexbor/html/tree.c
42
+ @@ -484,6 +484,7 @@ lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
43
+
44
+ attr->node.local_name = token_attr->name->attr_id;
45
+ attr->node.ns = ns;
46
+ + attr->node.source_location = token_attr->name_begin - tree->tkz_ref->first;
47
+
48
+ /* Fix for adjust MathML/SVG attributes */
49
+ if (tree->before_append_attr != NULL) {
50
+ diff --git a/source/lexbor/html/tree.h b/source/lexbor/html/tree.h
51
+ index 231239d..bc0249e 100755
52
+ --- a/source/lexbor/html/tree.h
53
+ +++ b/source/lexbor/html/tree.h
54
+ @@ -266,8 +266,10 @@ lxb_inline lxb_dom_node_t *
55
+ lxb_html_tree_create_node(lxb_html_tree_t *tree,
56
+ lxb_tag_id_t tag_id, lxb_ns_id_t ns)
57
+ {
58
+ - return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
59
+ + lxb_dom_node_t *node = (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
60
+ tag_id, ns);
61
+ + node->source_location = (tree->tkz_ref->token->begin > tree->tkz_ref->first ? tree->tkz_ref->token->begin : tree->tkz_ref->begin) - tree->tkz_ref->first;
62
+ + return node;
63
+ }
64
+
65
+ lxb_inline bool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-12 00:00:00.000000000 Z
11
+ date: 2023-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -130,6 +130,8 @@ files:
130
130
  - patches/0001-lexbor-support-text-pseudo-element.patch
131
131
  - patches/0002-lexbor-match-id-class-case-sensitive.patch
132
132
  - patches/0003-lexbor-attach-template-content-to-self.patch
133
+ - patches/0004-lexbor-fix-template-clone.patch
134
+ - patches/0005-lexbor-add-source-location-to-node.patch
133
135
  - vendor/lexbor/CMakeLists.txt
134
136
  - vendor/lexbor/config.cmake
135
137
  - vendor/lexbor/feature.cmake