nokolexbor 0.3.7 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_node.c +37 -26
- data/ext/nokolexbor/nl_node_set.c +60 -0
- data/lib/nokolexbor/node_set.rb +79 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0004-lexbor-fix-template-clone.patch +24 -0
- data/patches/0005-lexbor-add-source-location-to-node.patch +65 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0055757bc7b9a92e9a2d37235722ca6a6945fb438855880ce10055dfa10d0fcb
|
4
|
+
data.tar.gz: b42cb00bc57ac3db09e928ff0b4ae3b3f7fb950312ab64f2606436184785dcc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5527c86fea7d9efbbea6bcc1a2fcaaad506057ccf2d22d689f0de143b6c142f9bd1e8b33686ed2be1d01c37c1b02879c35e392a0ca81489f7f9cb125e569bf33
|
7
|
+
data.tar.gz: 4af797ac972b1ddfaafceec1ded5de2541daa92a366568f768f63f978bf3215e9a03295f598f4e05eea253aa2af8098ff8de33431119e7aeecad8a223de0c6f4
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -802,6 +802,28 @@ nl_node_children(VALUE self)
|
|
802
802
|
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
803
803
|
}
|
804
804
|
|
805
|
+
/**
|
806
|
+
* Get the element children of this node.
|
807
|
+
*
|
808
|
+
* @return [NodeSet] The set of this node's element children.
|
809
|
+
*/
|
810
|
+
static VALUE
|
811
|
+
nl_node_element_children(VALUE self)
|
812
|
+
{
|
813
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
814
|
+
lxb_dom_node_t *child = node->first_child;
|
815
|
+
lexbor_array_t *array = lexbor_array_create();
|
816
|
+
|
817
|
+
while (child != NULL) {
|
818
|
+
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
819
|
+
lexbor_array_push(array, child);
|
820
|
+
}
|
821
|
+
child = child->next;
|
822
|
+
}
|
823
|
+
|
824
|
+
return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
|
825
|
+
}
|
826
|
+
|
805
827
|
/**
|
806
828
|
* Get the first child of this node.
|
807
829
|
*
|
@@ -1031,20 +1053,8 @@ static VALUE
|
|
1031
1053
|
nl_node_first_element_child(VALUE self)
|
1032
1054
|
{
|
1033
1055
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
1034
|
-
lxb_dom_node_t *cur;
|
1056
|
+
lxb_dom_node_t *cur = parent->first_child;
|
1035
1057
|
|
1036
|
-
if (parent == NULL) {
|
1037
|
-
return Qnil;
|
1038
|
-
}
|
1039
|
-
switch (parent->type) {
|
1040
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
1041
|
-
case LXB_DOM_NODE_TYPE_ENTITY:
|
1042
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
1043
|
-
cur = parent->first_child;
|
1044
|
-
break;
|
1045
|
-
default:
|
1046
|
-
return Qnil;
|
1047
|
-
}
|
1048
1058
|
while (cur != NULL) {
|
1049
1059
|
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
1050
1060
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
@@ -1061,20 +1071,8 @@ static VALUE
|
|
1061
1071
|
nl_node_last_element_child(VALUE self)
|
1062
1072
|
{
|
1063
1073
|
lxb_dom_node_t *parent = nl_rb_node_unwrap(self);
|
1064
|
-
lxb_dom_node_t *cur;
|
1074
|
+
lxb_dom_node_t *cur = parent->last_child;
|
1065
1075
|
|
1066
|
-
if (parent == NULL) {
|
1067
|
-
return Qnil;
|
1068
|
-
}
|
1069
|
-
switch (parent->type) {
|
1070
|
-
case LXB_DOM_NODE_TYPE_ELEMENT:
|
1071
|
-
case LXB_DOM_NODE_TYPE_ENTITY:
|
1072
|
-
case LXB_DOM_NODE_TYPE_DOCUMENT:
|
1073
|
-
cur = parent->last_child;
|
1074
|
-
break;
|
1075
|
-
default:
|
1076
|
-
return Qnil;
|
1077
|
-
}
|
1078
1076
|
while (cur != NULL) {
|
1079
1077
|
if (cur->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
1080
1078
|
return nl_rb_node_create(cur, nl_rb_document_get(self));
|
@@ -1133,6 +1131,16 @@ nl_node_inspect(int argc, VALUE *argv, VALUE self)
|
|
1133
1131
|
return rb_call_super(argc, argv);
|
1134
1132
|
}
|
1135
1133
|
|
1134
|
+
/**
|
1135
|
+
* @return [Integer] The node's location at the source HTML. Returns 0 if the node is not parsed from a HTML string.
|
1136
|
+
*/
|
1137
|
+
static VALUE
|
1138
|
+
nl_node_source_location(VALUE self)
|
1139
|
+
{
|
1140
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
1141
|
+
return ULONG2NUM(node->source_location);
|
1142
|
+
}
|
1143
|
+
|
1136
1144
|
void Init_nl_node(void)
|
1137
1145
|
{
|
1138
1146
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
@@ -1164,6 +1172,7 @@ void Init_nl_node(void)
|
|
1164
1172
|
rb_define_method(cNokolexborNode, "next_element", nl_node_next_element, 0);
|
1165
1173
|
rb_define_method(cNokolexborNode, "children", nl_node_children, 0);
|
1166
1174
|
rb_define_method(cNokolexborNode, "child", nl_node_child, 0);
|
1175
|
+
rb_define_method(cNokolexborNode, "element_children", nl_node_element_children, 0);
|
1167
1176
|
rb_define_method(cNokolexborNode, "remove", nl_node_remove, 0);
|
1168
1177
|
rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
|
1169
1178
|
rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
|
@@ -1176,6 +1185,7 @@ void Init_nl_node(void)
|
|
1176
1185
|
rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
|
1177
1186
|
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
1178
1187
|
rb_define_method(cNokolexborNode, "inspect", nl_node_inspect, -1);
|
1188
|
+
rb_define_method(cNokolexborNode, "source_location", nl_node_source_location, 0);
|
1179
1189
|
|
1180
1190
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
1181
1191
|
rb_define_alias(cNokolexborNode, "get_attribute", "[]");
|
@@ -1183,6 +1193,7 @@ void Init_nl_node(void)
|
|
1183
1193
|
rb_define_alias(cNokolexborNode, "set_attribute", "[]=");
|
1184
1194
|
rb_define_alias(cNokolexborNode, "has_attribute?", "key?");
|
1185
1195
|
rb_define_alias(cNokolexborNode, "delete", "remove_attr");
|
1196
|
+
rb_define_alias(cNokolexborNode, "elements", "element_children");
|
1186
1197
|
rb_define_alias(cNokolexborNode, "remove_attribute", "remove_attr");
|
1187
1198
|
rb_define_alias(cNokolexborNode, "text", "content");
|
1188
1199
|
rb_define_alias(cNokolexborNode, "inner_text", "content");
|
@@ -300,6 +300,64 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
300
300
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
301
301
|
}
|
302
302
|
|
303
|
+
/**
|
304
|
+
* @return [NodeSet] A new NodeSet with the common nodes only.
|
305
|
+
*/
|
306
|
+
static VALUE
|
307
|
+
nl_node_set_intersection(VALUE self, VALUE other)
|
308
|
+
{
|
309
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
310
|
+
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
311
|
+
}
|
312
|
+
|
313
|
+
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
314
|
+
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
315
|
+
|
316
|
+
lexbor_array_t *new_array = lexbor_array_create();
|
317
|
+
|
318
|
+
for (size_t i = 0; i < self_array->length; i++) {
|
319
|
+
for (size_t j = 0; j < other_array->length; j++) {
|
320
|
+
if (self_array->list[i] == other_array->list[j]) {
|
321
|
+
lexbor_array_push(new_array, self_array->list[i]);
|
322
|
+
break;
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
328
|
+
}
|
329
|
+
|
330
|
+
/**
|
331
|
+
* @return [NodeSet] A new NodeSet with the nodes in this NodeSet that aren't in +other+
|
332
|
+
*/
|
333
|
+
static VALUE
|
334
|
+
nl_node_set_difference(VALUE self, VALUE other)
|
335
|
+
{
|
336
|
+
if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
|
337
|
+
rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
|
338
|
+
}
|
339
|
+
|
340
|
+
lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
|
341
|
+
lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);
|
342
|
+
|
343
|
+
lexbor_array_t *new_array = lexbor_array_create();
|
344
|
+
|
345
|
+
for (size_t i = 0; i < self_array->length; i++) {
|
346
|
+
bool found = false;
|
347
|
+
for (size_t j = 0; j < other_array->length; j++) {
|
348
|
+
if (self_array->list[i] == other_array->list[j]) {
|
349
|
+
found = true;
|
350
|
+
break;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
if (!found) {
|
354
|
+
lexbor_array_push(new_array, self_array->list[i]);
|
355
|
+
}
|
356
|
+
}
|
357
|
+
|
358
|
+
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
359
|
+
}
|
360
|
+
|
303
361
|
static lxb_status_t
|
304
362
|
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
305
363
|
{
|
@@ -410,6 +468,8 @@ void Init_nl_node_set(void)
|
|
410
468
|
rb_define_method(cNokolexborNodeSet, "[]", nl_node_set_slice, -1);
|
411
469
|
rb_define_method(cNokolexborNodeSet, "push", nl_node_set_push, 1);
|
412
470
|
rb_define_method(cNokolexborNodeSet, "|", nl_node_set_union, 1);
|
471
|
+
rb_define_method(cNokolexborNodeSet, "&", nl_node_set_intersection, 1);
|
472
|
+
rb_define_method(cNokolexborNodeSet, "-", nl_node_set_difference, 1);
|
413
473
|
rb_define_method(cNokolexborNodeSet, "to_a", nl_node_set_to_array, 0);
|
414
474
|
rb_define_method(cNokolexborNodeSet, "delete", nl_node_set_delete, 1);
|
415
475
|
rb_define_method(cNokolexborNodeSet, "include?", nl_node_set_is_include, 1);
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -54,6 +54,16 @@ module Nokolexbor
|
|
54
54
|
length == 0
|
55
55
|
end
|
56
56
|
|
57
|
+
# Insert +node+ before the first Node in this NodeSet
|
58
|
+
def before(node)
|
59
|
+
first.before(node)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Insert +node+ after the last Node in this NodeSet
|
63
|
+
def after(node)
|
64
|
+
last.after(node)
|
65
|
+
end
|
66
|
+
|
57
67
|
# @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
|
58
68
|
def index(node = nil)
|
59
69
|
if node
|
@@ -169,6 +179,75 @@ module Nokolexbor
|
|
169
179
|
self
|
170
180
|
end
|
171
181
|
|
182
|
+
# Add the class attribute +name+ to all containing nodes.
|
183
|
+
#
|
184
|
+
# @see Node#add_class
|
185
|
+
def add_class(name)
|
186
|
+
each do |el|
|
187
|
+
el.add_class(name)
|
188
|
+
end
|
189
|
+
self
|
190
|
+
end
|
191
|
+
|
192
|
+
# Append the class attribute +name+ to all containing nodes.
|
193
|
+
#
|
194
|
+
# @see Node#append_class
|
195
|
+
def append_class(name)
|
196
|
+
each do |el|
|
197
|
+
el.append_class(name)
|
198
|
+
end
|
199
|
+
self
|
200
|
+
end
|
201
|
+
|
202
|
+
# Remove the class attribute +name+ from all containing nodes.
|
203
|
+
#
|
204
|
+
# @see Node#remove_class
|
205
|
+
def remove_class(name = nil)
|
206
|
+
each do |el|
|
207
|
+
el.remove_class(name)
|
208
|
+
end
|
209
|
+
self
|
210
|
+
end
|
211
|
+
|
212
|
+
# Remove the attributed named +name+ from all containing nodes.
|
213
|
+
#
|
214
|
+
# @see Node#remove_attr
|
215
|
+
def remove_attr(name)
|
216
|
+
each { |el| el.delete(name) }
|
217
|
+
self
|
218
|
+
end
|
219
|
+
alias_method :remove_attribute, :remove_attr
|
220
|
+
|
221
|
+
# Set attributes on each Node in the NodeSet, or get an
|
222
|
+
# attribute from the first Node in the NodeSet.
|
223
|
+
#
|
224
|
+
# @example Get an attribute from the first Node in a NodeSet.
|
225
|
+
# node_set.attr("href")
|
226
|
+
#
|
227
|
+
# @example Set attributes on each node.
|
228
|
+
# node_set.attr("href" => "http://example.com", "class" => "a")
|
229
|
+
# node_set.attr("href", "http://example.com")
|
230
|
+
# node_set.attr("href") { |node| "http://example.com" }
|
231
|
+
#
|
232
|
+
# @return [NodeSet] +self+, to support chaining of calls.
|
233
|
+
def attr(key, value = nil, &block)
|
234
|
+
unless key.is_a?(Hash) || (key && (value || block))
|
235
|
+
return first&.attribute(key)
|
236
|
+
end
|
237
|
+
|
238
|
+
hash = key.is_a?(Hash) ? key : { key => value }
|
239
|
+
|
240
|
+
hash.each do |k, v|
|
241
|
+
each do |node|
|
242
|
+
node[k] = v || yield(node)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
self
|
247
|
+
end
|
248
|
+
alias_method :set, :attr
|
249
|
+
alias_method :attribute, :attr
|
250
|
+
|
172
251
|
# (see Node#xpath)
|
173
252
|
def xpath(*args)
|
174
253
|
paths, handler, ns, binds = extract_params(args)
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
diff --git a/source/lexbor/dom/interfaces/document.c b/source/lexbor/dom/interfaces/document.c
|
2
|
+
index a2153f4..8a9c69f 100755
|
3
|
+
--- a/source/lexbor/dom/interfaces/document.c
|
4
|
+
+++ b/source/lexbor/dom/interfaces/document.c
|
5
|
+
@@ -12,6 +12,7 @@
|
6
|
+
#include "lexbor/dom/interfaces/cdata_section.h"
|
7
|
+
#include "lexbor/dom/interfaces/cdata_section.h"
|
8
|
+
#include "lexbor/dom/interfaces/processing_instruction.h"
|
9
|
+
+#include "lexbor/html/interfaces/template_element.h"
|
10
|
+
|
11
|
+
|
12
|
+
lxb_dom_document_t *
|
13
|
+
@@ -449,6 +450,11 @@ lxb_dom_document_import_node(lxb_dom_document_t *doc, lxb_dom_node_t *node,
|
14
|
+
return NULL;
|
15
|
+
}
|
16
|
+
|
17
|
+
+ if (curr->local_name == LXB_TAG_TEMPLATE && curr->first_child != NULL && cnode->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
|
18
|
+
+ lxb_dom_node_remove(curr->first_child);
|
19
|
+
+ lxb_html_interface_template(curr)->content = cnode;
|
20
|
+
+ }
|
21
|
+
+
|
22
|
+
lxb_dom_node_insert_child(curr, cnode);
|
23
|
+
|
24
|
+
if (node->first_child != NULL) {
|
@@ -0,0 +1,65 @@
|
|
1
|
+
diff --git a/source/lexbor/dom/interfaces/node.h b/source/lexbor/dom/interfaces/node.h
|
2
|
+
index acd0c1c..f436257 100755
|
3
|
+
--- a/source/lexbor/dom/interfaces/node.h
|
4
|
+
+++ b/source/lexbor/dom/interfaces/node.h
|
5
|
+
@@ -46,6 +46,8 @@ struct lxb_dom_node {
|
6
|
+
uintptr_t prefix; /* lowercase: lalala */
|
7
|
+
uintptr_t ns; /* namespace */
|
8
|
+
|
9
|
+
+ size_t source_location; /* char offset to the source HTML */
|
10
|
+
+
|
11
|
+
lxb_dom_document_t *owner_document;
|
12
|
+
|
13
|
+
lxb_dom_node_t *next;
|
14
|
+
diff --git a/source/lexbor/html/tokenizer.c b/source/lexbor/html/tokenizer.c
|
15
|
+
index 741bced..6343c65 100755
|
16
|
+
--- a/source/lexbor/html/tokenizer.c
|
17
|
+
+++ b/source/lexbor/html/tokenizer.c
|
18
|
+
@@ -309,6 +309,7 @@ lxb_html_tokenizer_chunk(lxb_html_tokenizer_t *tkz, const lxb_char_t *data,
|
19
|
+
|
20
|
+
tkz->is_eof = false;
|
21
|
+
tkz->status = LXB_STATUS_OK;
|
22
|
+
+ tkz->first = data;
|
23
|
+
tkz->last = end;
|
24
|
+
|
25
|
+
while (data < end) {
|
26
|
+
diff --git a/source/lexbor/html/tokenizer.h b/source/lexbor/html/tokenizer.h
|
27
|
+
index ba9602f..08d0d9a 100755
|
28
|
+
--- a/source/lexbor/html/tokenizer.h
|
29
|
+
+++ b/source/lexbor/html/tokenizer.h
|
30
|
+
@@ -72,6 +72,7 @@ struct lxb_html_tokenizer {
|
31
|
+
lxb_char_t *pos;
|
32
|
+
const lxb_char_t *end;
|
33
|
+
const lxb_char_t *begin;
|
34
|
+
+ const lxb_char_t *first;
|
35
|
+
const lxb_char_t *last;
|
36
|
+
|
37
|
+
/* Entities */
|
38
|
+
diff --git a/source/lexbor/html/tree.c b/source/lexbor/html/tree.c
|
39
|
+
index 8c42990..28c97cc 100755
|
40
|
+
--- a/source/lexbor/html/tree.c
|
41
|
+
+++ b/source/lexbor/html/tree.c
|
42
|
+
@@ -484,6 +484,7 @@ lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
|
43
|
+
|
44
|
+
attr->node.local_name = token_attr->name->attr_id;
|
45
|
+
attr->node.ns = ns;
|
46
|
+
+ attr->node.source_location = token_attr->name_begin - tree->tkz_ref->first;
|
47
|
+
|
48
|
+
/* Fix for adjust MathML/SVG attributes */
|
49
|
+
if (tree->before_append_attr != NULL) {
|
50
|
+
diff --git a/source/lexbor/html/tree.h b/source/lexbor/html/tree.h
|
51
|
+
index 231239d..bc0249e 100755
|
52
|
+
--- a/source/lexbor/html/tree.h
|
53
|
+
+++ b/source/lexbor/html/tree.h
|
54
|
+
@@ -266,8 +266,10 @@ lxb_inline lxb_dom_node_t *
|
55
|
+
lxb_html_tree_create_node(lxb_html_tree_t *tree,
|
56
|
+
lxb_tag_id_t tag_id, lxb_ns_id_t ns)
|
57
|
+
{
|
58
|
+
- return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
|
59
|
+
+ lxb_dom_node_t *node = (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
|
60
|
+
tag_id, ns);
|
61
|
+
+ node->source_location = (tree->tkz_ref->token->begin > tree->tkz_ref->first ? tree->tkz_ref->token->begin : tree->tkz_ref->begin) - tree->tkz_ref->first;
|
62
|
+
+ return node;
|
63
|
+
}
|
64
|
+
|
65
|
+
lxb_inline bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -130,6 +130,8 @@ files:
|
|
130
130
|
- patches/0001-lexbor-support-text-pseudo-element.patch
|
131
131
|
- patches/0002-lexbor-match-id-class-case-sensitive.patch
|
132
132
|
- patches/0003-lexbor-attach-template-content-to-self.patch
|
133
|
+
- patches/0004-lexbor-fix-template-clone.patch
|
134
|
+
- patches/0005-lexbor-add-source-location-to-node.patch
|
133
135
|
- vendor/lexbor/CMakeLists.txt
|
134
136
|
- vendor/lexbor/config.cmake
|
135
137
|
- vendor/lexbor/feature.cmake
|