nokolexbor 0.3.7 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_node.c +37 -26
- data/ext/nokolexbor/nl_node_set.c +60 -0
- data/lib/nokolexbor/node_set.rb +79 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0004-lexbor-fix-template-clone.patch +24 -0
- data/patches/0005-lexbor-add-source-location-to-node.patch +65 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0055757bc7b9a92e9a2d37235722ca6a6945fb438855880ce10055dfa10d0fcb
|
4
|
+
data.tar.gz: b42cb00bc57ac3db09e928ff0b4ae3b3f7fb950312ab64f2606436184785dcc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5527c86fea7d9efbbea6bcc1a2fcaaad506057ccf2d22d689f0de143b6c142f9bd1e8b33686ed2be1d01c37c1b02879c35e392a0ca81489f7f9cb125e569bf33
|
7
|
+
data.tar.gz: 4af797ac972b1ddfaafceec1ded5de2541daa92a366568f768f63f978bf3215e9a03295f598f4e05eea253aa2af8098ff8de33431119e7aeecad8a223de0c6f4
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -802,6 +802,28 @@ nl_node_children(VALUE self)
|
|
802
802
|
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
803
803
|
}
|
804
804
|
|
805
|
+
/**
|
806
|
+
* Get the element children of this node.
|
807
|
+
*
|
808
|
+
* @return [NodeSet] The set of this node's element children.
|
809
|
+
*/
|
810
|
+
static VALUE
|
811
|
+
nl_node_element_children(VALUE self)
|
812
|
+
{
|
813
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
814
|
+
lxb_dom_node_t *child = node->first_child;
|
815
|
+
lexbor_array_t *array = lexbor_array_create();
|
816
|
+
|
817
|
+
while (child != NULL) {
|
818
|
+
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
819
|
+
lexbor_array_push(array, child);
|
820
|
+
}
|
821
|
+
child = child->next;
|
822
|
+
}
|
823
|
+
|
824
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
825
|
+
}
|
826
|
+
|
805
827
|
/**
|
806
828
|
* Get the first child of this node.
|
807
829
|
*
|
@@ -1031,20 +1053,8 @@ static VALUE
|
|
1031
1053
|
nl_node_first_element_child(VALUE self)
|
1032
1054
|
{
|
1033
1055
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
1034
|
-
lxb_dom_node_t *cur;
|
1056
|
+
lxb_dom_node_t *cur = parent->first_child;
|
1035
1057
|
|
1036
|
-
if (parent == NULL) {
|
1037
|
-
return Qnil;
|
1038
|
-
}
|
1039
|
-
switch (parent->type) {
|
1040
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
1041
|
-
case LXB_DOM_NODE_TYPE_ENTITY:
|
1042
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
1043
|
-
cur = parent->first_child;
|
1044
|
-
break;
|
1045
|
-
default:
|
1046
|
-
return Qnil;
|
1047
|
-
}
|
1048
1058
|
while (cur != NULL) {
|
1049
1059
|
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
1050
1060
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
@@ -1061,20 +1071,8 @@ static VALUE
|
|
1061
1071
|
nl_node_last_element_child(VALUE self)
|
1062
1072
|
{
|
1063
1073
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
1064
|
-
lxb_dom_node_t *cur;
|
1074
|
+
lxb_dom_node_t *cur = parent->last_child;
|
1065
1075
|
|
1066
|
-
if (parent == NULL) {
|
1067
|
-
return Qnil;
|
1068
|
-
}
|
1069
|
-
switch (parent->type) {
|
1070
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
1071
|
-
case LXB_DOM_NODE_TYPE_ENTITY:
|
1072
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
1073
|
-
cur = parent->last_child;
|
1074
|
-
break;
|
1075
|
-
default:
|
1076
|
-
return Qnil;
|
1077
|
-
}
|
1078
1076
|
while (cur != NULL) {
|
1079
1077
|
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
1080
1078
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
@@ -1133,6 +1131,16 @@ nl_node_inspect(int argc, VALUE *argv, VALUE self)
|
|
1133
1131
|
return rb_call_super(argc, argv);
|
1134
1132
|
}
|
1135
1133
|
|
1134
|
+
/**
|
1135
|
+
* @return [Integer] The node's location at the source HTML. Returns 0 if the node is not parsed from a HTML string.
|
1136
|
+
*/
|
1137
|
+
static VALUE
|
1138
|
+
nl_node_source_location(VALUE self)
|
1139
|
+
{
|
1140
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
1141
|
+
return ULONG2NUM(node->source_location);
|
1142
|
+
}
|
1143
|
+
|
1136
1144
|
void Init_nl_node(void)
|
1137
1145
|
{
|
1138
1146
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
@@ -1164,6 +1172,7 @@ void Init_nl_node(void)
|
|
1164
1172
|
rb_define_method(cNokolexborNode, "next_element", nl_node_next_element, 0);
|
1165
1173
|
rb_define_method(cNokolexborNode, "children", nl_node_children, 0);
|
1166
1174
|
rb_define_method(cNokolexborNode, "child", nl_node_child, 0);
|
1175
|
+
rb_define_method(cNokolexborNode, "element_children", nl_node_element_children, 0);
|
1167
1176
|
rb_define_method(cNokolexborNode, "remove", nl_node_remove, 0);
|
1168
1177
|
rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
|
1169
1178
|
rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
|
@@ -1176,6 +1185,7 @@ void Init_nl_node(void)
|
|
1176
1185
|
rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
|
1177
1186
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
1178
1187
|
rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
|
1188
|
+
rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
|
1179
1189
|
|
1180
1190
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
1181
1191
|
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
@@ -1183,6 +1193,7 @@ void Init_nl_node(void)
|
|
1183
1193
|
rb_define_alias(cNokolexborNode, "set_attribute", "[]=");
|
1184
1194
|
rb_define_alias(cNokolexborNode, "has_attribute?", "key?");
|
1185
1195
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
1196
|
+
rb_define_alias(cNokolexborNode, "elements", "element_children");
|
1186
1197
|
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
1187
1198
|
rb_define_alias(cNokolexborNode, "text", "content");
|
1188
1199
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
@@ -300,6 +300,64 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
300
300
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
301
301
|
}
|
302
302
|
|
303
|
+
/**
|
304
|
+
* @return [NodeSet] A new NodeSet with the common nodes only.
|
305
|
+
*/
|
306
|
+
static VALUE
|
307
|
+
nl_node_set_intersection(VALUE self, VALUE other)
|
308
|
+
{
|
309
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
310
|
+
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
311
|
+
}
|
312
|
+
|
313
|
+
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
314
|
+
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
315
|
+
|
316
|
+
lexbor_array_t *new_array = lexbor_array_create();
|
317
|
+
|
318
|
+
for (size_t i = 0; i < self_array->length; i++) {
|
319
|
+
for (size_t j = 0; j < other_array->length; j++) {
|
320
|
+
if (self_array->list[i] == other_array->list[j]) {
|
321
|
+
lexbor_array_push(new_array, self_array->list[i]);
|
322
|
+
break;
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
328
|
+
}
|
329
|
+
|
330
|
+
/**
|
331
|
+
* @return [NodeSet] A new NodeSet with the nodes in this NodeSet that aren't in +other+
|
332
|
+
*/
|
333
|
+
static VALUE
|
334
|
+
nl_node_set_difference(VALUE self, VALUE other)
|
335
|
+
{
|
336
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
337
|
+
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
338
|
+
}
|
339
|
+
|
340
|
+
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
341
|
+
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
342
|
+
|
343
|
+
lexbor_array_t *new_array = lexbor_array_create();
|
344
|
+
|
345
|
+
for (size_t i = 0; i < self_array->length; i++) {
|
346
|
+
bool found = false;
|
347
|
+
for (size_t j = 0; j < other_array->length; j++) {
|
348
|
+
if (self_array->list[i] == other_array->list[j]) {
|
349
|
+
found = true;
|
350
|
+
break;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
if (!found) {
|
354
|
+
lexbor_array_push(new_array, self_array->list[i]);
|
355
|
+
}
|
356
|
+
}
|
357
|
+
|
358
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
359
|
+
}
|
360
|
+
|
303
361
|
static lxb_status_t
|
304
362
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
305
363
|
{
|
@@ -410,6 +468,8 @@ void Init_nl_node_set(void)
|
|
410
468
|
rb_define_method(cNokolexborNodeSet, "[]", nl_node_set_slice, -1);
|
411
469
|
rb_define_method(cNokolexborNodeSet, "push", nl_node_set_push, 1);
|
412
470
|
rb_define_method(cNokolexborNodeSet, "|", nl_node_set_union, 1);
|
471
|
+
rb_define_method(cNokolexborNodeSet, "&", nl_node_set_intersection, 1);
|
472
|
+
rb_define_method(cNokolexborNodeSet, "-", nl_node_set_difference, 1);
|
413
473
|
rb_define_method(cNokolexborNodeSet, "to_a", nl_node_set_to_array, 0);
|
414
474
|
rb_define_method(cNokolexborNodeSet, "delete", nl_node_set_delete, 1);
|
415
475
|
rb_define_method(cNokolexborNodeSet, "include?", nl_node_set_is_include, 1);
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -54,6 +54,16 @@ module Nokolexbor
|
|
54
54
|
length == 0
|
55
55
|
end
|
56
56
|
|
57
|
+
# Insert +node+ before the first Node in this NodeSet
|
58
|
+
def before(node)
|
59
|
+
first.before(node)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Insert +node+ after the last Node in this NodeSet
|
63
|
+
def after(node)
|
64
|
+
last.after(node)
|
65
|
+
end
|
66
|
+
|
57
67
|
# @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
|
58
68
|
def index(node = nil)
|
59
69
|
if node
|
@@ -169,6 +179,75 @@ module Nokolexbor
|
|
169
179
|
self
|
170
180
|
end
|
171
181
|
|
182
|
+
# Add the class attribute +name+ to all containing nodes.
|
183
|
+
#
|
184
|
+
# @see Node#add_class
|
185
|
+
def add_class(name)
|
186
|
+
each do |el|
|
187
|
+
el.add_class(name)
|
188
|
+
end
|
189
|
+
self
|
190
|
+
end
|
191
|
+
|
192
|
+
# Append the class attribute +name+ to all containing nodes.
|
193
|
+
#
|
194
|
+
# @see Node#append_class
|
195
|
+
def append_class(name)
|
196
|
+
each do |el|
|
197
|
+
el.append_class(name)
|
198
|
+
end
|
199
|
+
self
|
200
|
+
end
|
201
|
+
|
202
|
+
# Remove the class attribute +name+ from all containing nodes.
|
203
|
+
#
|
204
|
+
# @see Node#remove_class
|
205
|
+
def remove_class(name = nil)
|
206
|
+
each do |el|
|
207
|
+
el.remove_class(name)
|
208
|
+
end
|
209
|
+
self
|
210
|
+
end
|
211
|
+
|
212
|
+
# Remove the attributed named +name+ from all containing nodes.
|
213
|
+
#
|
214
|
+
# @see Node#remove_attr
|
215
|
+
def remove_attr(name)
|
216
|
+
each { |el| el.delete(name) }
|
217
|
+
self
|
218
|
+
end
|
219
|
+
alias_method :remove_attribute, :remove_attr
|
220
|
+
|
221
|
+
# Set attributes on each Node in the NodeSet, or get an
|
222
|
+
# attribute from the first Node in the NodeSet.
|
223
|
+
#
|
224
|
+
# @example Get an attribute from the first Node in a NodeSet.
|
225
|
+
# node_set.attr("href")
|
226
|
+
#
|
227
|
+
# @example Set attributes on each node.
|
228
|
+
# node_set.attr("href" => "http://example.com", "class" => "a")
|
229
|
+
# node_set.attr("href", "http://example.com")
|
230
|
+
# node_set.attr("href") { |node| "http://example.com" }
|
231
|
+
#
|
232
|
+
# @return [NodeSet] +self+, to support chaining of calls.
|
233
|
+
def attr(key, value = nil, &block)
|
234
|
+
unless key.is_a?(Hash) || (key && (value || block))
|
235
|
+
return first&.attribute(key)
|
236
|
+
end
|
237
|
+
|
238
|
+
hash = key.is_a?(Hash) ? key : { key => value }
|
239
|
+
|
240
|
+
hash.each do |k, v|
|
241
|
+
each do |node|
|
242
|
+
node[k] = v || yield(node)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
self
|
247
|
+
end
|
248
|
+
alias_method :set, :attr
|
249
|
+
alias_method :attribute, :attr
|
250
|
+
|
172
251
|
# (see Node#xpath)
|
173
252
|
def xpath(*args)
|
174
253
|
paths, handler, ns, binds = extract_params(args)
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
diff --git a/source/lexbor/dom/interfaces/document.c b/source/lexbor/dom/interfaces/document.c
|
2
|
+
index a2153f4..8a9c69f 100755
|
3
|
+
--- a/source/lexbor/dom/interfaces/document.c
|
4
|
+
+++ b/source/lexbor/dom/interfaces/document.c
|
5
|
+
@@ -12,6 +12,7 @@
|
6
|
+
#include "lexbor/dom/interfaces/cdata_section.h"
|
7
|
+
#include "lexbor/dom/interfaces/cdata_section.h"
|
8
|
+
#include "lexbor/dom/interfaces/processing_instruction.h"
|
9
|
+
+#include "lexbor/html/interfaces/template_element.h"
|
10
|
+
|
11
|
+
|
12
|
+
lxb_dom_document_t *
|
13
|
+
@@ -449,6 +450,11 @@ lxb_dom_document_import_node(lxb_dom_document_t *doc, lxb_dom_node_t *node,
|
14
|
+
return NULL;
|
15
|
+
}
|
16
|
+
|
17
|
+
+ if (curr->local_name == LXB_TAG_TEMPLATE && curr->first_child != NULL && cnode->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
|
18
|
+
+ lxb_dom_node_remove(curr->first_child);
|
19
|
+
+ lxb_html_interface_template(curr)->content = cnode;
|
20
|
+
+ }
|
21
|
+
+
|
22
|
+
lxb_dom_node_insert_child(curr, cnode);
|
23
|
+
|
24
|
+
if (node->first_child != NULL) {
|
@@ -0,0 +1,65 @@
|
|
1
|
+
diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h
|
2
|
+
index acd0c1c..f436257 100755
|
3
|
+
--- a/source/lexbor/dom/interfaces/node.h
|
4
|
+
+++ b/source/lexbor/dom/interfaces/node.h
|
5
|
+
@@ -46,6 +46,8 @@ struct lxb_dom_node {
|
6
|
+
uintptr_t prefix; /* lowercase: lalala */
|
7
|
+
uintptr_t ns; /* namespace */
|
8
|
+
|
9
|
+
+ size_t source_location; /* char offset to the source HTML */
|
10
|
+
+
|
11
|
+
lxb_dom_document_t *owner_document;
|
12
|
+
|
13
|
+
lxb_dom_node_t *next;
|
14
|
+
diff --git a/source/lexbor/html/tokenizer.c b/source/lexbor/html/tokenizer.c
|
15
|
+
index 741bced..6343c65 100755
|
16
|
+
--- a/source/lexbor/html/tokenizer.c
|
17
|
+
+++ b/source/lexbor/html/tokenizer.c
|
18
|
+
@@ -309,6 +309,7 @@ lxb_html_tokenizer_chunk(lxb_html_tokenizer_t *tkz, const lxb_char_t *data,
|
19
|
+
|
20
|
+
tkz->is_eof = false;
|
21
|
+
tkz->status = LXB_STATUS_OK;
|
22
|
+
+ tkz->first = data;
|
23
|
+
tkz->last = end;
|
24
|
+
|
25
|
+
while (data < end) {
|
26
|
+
diff --git a/source/lexbor/html/tokenizer.h b/source/lexbor/html/tokenizer.h
|
27
|
+
index ba9602f..08d0d9a 100755
|
28
|
+
--- a/source/lexbor/html/tokenizer.h
|
29
|
+
+++ b/source/lexbor/html/tokenizer.h
|
30
|
+
@@ -72,6 +72,7 @@ struct lxb_html_tokenizer {
|
31
|
+
lxb_char_t *pos;
|
32
|
+
const lxb_char_t *end;
|
33
|
+
const lxb_char_t *begin;
|
34
|
+
+ const lxb_char_t *first;
|
35
|
+
const lxb_char_t *last;
|
36
|
+
|
37
|
+
/* Entities */
|
38
|
+
diff --git a/source/lexbor/html/tree.c b/source/lexbor/html/tree.c
|
39
|
+
index 8c42990..28c97cc 100755
|
40
|
+
--- a/source/lexbor/html/tree.c
|
41
|
+
+++ b/source/lexbor/html/tree.c
|
42
|
+
@@ -484,6 +484,7 @@ lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
|
43
|
+
|
44
|
+
attr->node.local_name = token_attr->name->attr_id;
|
45
|
+
attr->node.ns = ns;
|
46
|
+
+ attr->node.source_location = token_attr->name_begin - tree->tkz_ref->first;
|
47
|
+
|
48
|
+
/* Fix for adjust MathML/SVG attributes */
|
49
|
+
if (tree->before_append_attr != NULL) {
|
50
|
+
diff --git a/source/lexbor/html/tree.h b/source/lexbor/html/tree.h
|
51
|
+
index 231239d..bc0249e 100755
|
52
|
+
--- a/source/lexbor/html/tree.h
|
53
|
+
+++ b/source/lexbor/html/tree.h
|
54
|
+
@@ -266,8 +266,10 @@ lxb_inline lxb_dom_node_t *
|
55
|
+
lxb_html_tree_create_node(lxb_html_tree_t *tree,
|
56
|
+
lxb_tag_id_t tag_id, lxb_ns_id_t ns)
|
57
|
+
{
|
58
|
+
- return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
|
59
|
+
+ lxb_dom_node_t *node = (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
|
60
|
+
tag_id, ns);
|
61
|
+
+ node->source_location = (tree->tkz_ref->token->begin > tree->tkz_ref->first ? tree->tkz_ref->token->begin : tree->tkz_ref->begin) - tree->tkz_ref->first;
|
62
|
+
+ return node;
|
63
|
+
}
|
64
|
+
|
65
|
+
lxb_inline bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -130,6 +130,8 @@ files:
|
|
130
130
|
- patches/0001-lexbor-support-text-pseudo-element.patch
|
131
131
|
- patches/0002-lexbor-match-id-class-case-sensitive.patch
|
132
132
|
- patches/0003-lexbor-attach-template-content-to-self.patch
|
133
|
+
- patches/0004-lexbor-fix-template-clone.patch
|
134
|
+
- patches/0005-lexbor-add-source-location-to-node.patch
|
133
135
|
- vendor/lexbor/CMakeLists.txt
|
134
136
|
- vendor/lexbor/config.cmake
|
135
137
|
- vendor/lexbor/feature.cmake
|