nokolexbor 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_document.c +8 -1
- data/ext/nokolexbor/nl_node.c +85 -33
- data/ext/nokolexbor/xml_tree.c +13 -1
- data/lib/nokolexbor/node.rb +20 -0
- data/lib/nokolexbor/node_set.rb +6 -4
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17230669785aa3a0bd2703cee73e72ccced0600d12fc52228125d4d6579b8290
|
4
|
+
data.tar.gz: 68fb22f67bcd9cfcad78eb3ecb2f860e71d72e78951987c3fae6ca8b03203c63
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28b1fafc6889a6d020f7f7275ee9e668808334c6106fd79c68f514d233983df38169521d407726d6cc680d8e9f589b3914398bfdfc6a57a24fdda9a910741505
|
7
|
+
data.tar.gz: fd12b7be8b5f159680b652d2092144baab2fdd23e8b13c6d22cea69b6a9920019336cf07a5790148ff6fd1c7c0392d55989c2218abe876599a468397551dec9c
|
@@ -41,11 +41,18 @@ nl_document_parse(VALUE self, VALUE rb_html)
|
|
41
41
|
nl_raise_lexbor_error(status);
|
42
42
|
}
|
43
43
|
|
44
|
-
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
44
|
+
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, &document->dom_document);
|
45
|
+
}
|
46
|
+
|
47
|
+
static VALUE
|
48
|
+
nl_document_new(VALUE self)
|
49
|
+
{
|
50
|
+
return nl_document_parse(self, rb_str_new("", 0));
|
45
51
|
}
|
46
52
|
|
47
53
|
void Init_nl_document(void)
|
48
54
|
{
|
49
55
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
56
|
+
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
50
57
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
51
58
|
}
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -52,6 +52,40 @@ nl_rb_node_unwrap(VALUE rb_node)
|
|
52
52
|
return node;
|
53
53
|
}
|
54
54
|
|
55
|
+
static VALUE
|
56
|
+
nl_node_new(int argc, VALUE *argv, VALUE klass)
|
57
|
+
{
|
58
|
+
lxb_dom_document_t *document;
|
59
|
+
lxb_dom_node_t *node;
|
60
|
+
VALUE rb_name;
|
61
|
+
VALUE rb_document;
|
62
|
+
VALUE rest;
|
63
|
+
|
64
|
+
rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
|
65
|
+
|
66
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument))
|
67
|
+
{
|
68
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
69
|
+
}
|
70
|
+
|
71
|
+
TypedData_Get_Struct(rb_document, lxb_dom_document_t, &nl_document_type, document);
|
72
|
+
|
73
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
74
|
+
if (element == NULL)
|
75
|
+
{
|
76
|
+
rb_raise(rb_eRuntimeError, "Error creating element");
|
77
|
+
}
|
78
|
+
|
79
|
+
VALUE rb_node = nl_rb_node_create(&element->node, rb_document);
|
80
|
+
|
81
|
+
if (rb_block_given_p())
|
82
|
+
{
|
83
|
+
rb_yield(rb_node);
|
84
|
+
}
|
85
|
+
|
86
|
+
return rb_node;
|
87
|
+
}
|
88
|
+
|
55
89
|
static VALUE
|
56
90
|
nl_node_content(VALUE self)
|
57
91
|
{
|
@@ -251,7 +285,7 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
251
285
|
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array)
|
252
286
|
{
|
253
287
|
// No need to sort if there's only one selector, the results are natually in document traversal order
|
254
|
-
if (
|
288
|
+
if (strstr(RSTRING_PTR(selector), ",") != NULL)
|
255
289
|
{
|
256
290
|
int need_order = 0;
|
257
291
|
// Check if we have already markded orders, note that
|
@@ -574,6 +608,39 @@ nl_node_name(VALUE self)
|
|
574
608
|
return rb_utf8_str_new(name, len);
|
575
609
|
}
|
576
610
|
|
611
|
+
static lxb_dom_node_t *
|
612
|
+
nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
|
613
|
+
{
|
614
|
+
size_t tag_name_len;
|
615
|
+
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
616
|
+
if (tag_name == NULL)
|
617
|
+
{
|
618
|
+
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
619
|
+
}
|
620
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
621
|
+
if (element == NULL)
|
622
|
+
{
|
623
|
+
rb_raise(rb_eRuntimeError, "Error creating element");
|
624
|
+
}
|
625
|
+
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, html, size);
|
626
|
+
if (frag_root == NULL)
|
627
|
+
{
|
628
|
+
rb_raise(rb_eArgError, "Error parsing HTML");
|
629
|
+
}
|
630
|
+
return frag_root;
|
631
|
+
}
|
632
|
+
|
633
|
+
static VALUE
|
634
|
+
nl_node_fragment(VALUE self, VALUE html)
|
635
|
+
{
|
636
|
+
Check_Type(html, T_STRING);
|
637
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
638
|
+
lxb_dom_document_t *doc = node->owner_document;
|
639
|
+
|
640
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
|
641
|
+
return nl_rb_node_create(frag_root, nl_rb_document_get(self));
|
642
|
+
}
|
643
|
+
|
577
644
|
static VALUE
|
578
645
|
nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
579
646
|
{
|
@@ -596,22 +663,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
596
663
|
|
597
664
|
if (TYPE(new) == T_STRING)
|
598
665
|
{
|
599
|
-
|
600
|
-
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
601
|
-
if (tag_name == NULL)
|
602
|
-
{
|
603
|
-
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
604
|
-
}
|
605
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
606
|
-
if (element == NULL)
|
607
|
-
{
|
608
|
-
rb_raise(rb_eRuntimeError, "Error creating element");
|
609
|
-
}
|
610
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
|
611
|
-
if (frag_root == NULL)
|
612
|
-
{
|
613
|
-
rb_raise(rb_eArgError, "Error parsing HTML");
|
614
|
-
}
|
666
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
615
667
|
|
616
668
|
while (frag_root->first_child != NULL)
|
617
669
|
{
|
@@ -624,6 +676,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
624
676
|
else if (rb_obj_class(new) == cNokolexborNode)
|
625
677
|
{
|
626
678
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
679
|
+
lxb_dom_node_remove(node_new);
|
627
680
|
insert_after ? lxb_dom_node_insert_after(node, node_new) : lxb_dom_node_insert_before(node, node_new);
|
628
681
|
}
|
629
682
|
else
|
@@ -641,22 +694,7 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
641
694
|
|
642
695
|
if (TYPE(new) == T_STRING)
|
643
696
|
{
|
644
|
-
|
645
|
-
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
646
|
-
if (tag_name == NULL)
|
647
|
-
{
|
648
|
-
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
649
|
-
}
|
650
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
651
|
-
if (element == NULL)
|
652
|
-
{
|
653
|
-
rb_raise(rb_eRuntimeError, "Error creating element");
|
654
|
-
}
|
655
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
|
656
|
-
if (frag_root == NULL)
|
657
|
-
{
|
658
|
-
rb_raise(rb_eArgError, "Error parsing HTML");
|
659
|
-
}
|
697
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
660
698
|
|
661
699
|
while (frag_root->first_child != NULL)
|
662
700
|
{
|
@@ -747,9 +785,20 @@ nl_node_last_element_child(VALUE self)
|
|
747
785
|
return Qnil;
|
748
786
|
}
|
749
787
|
|
788
|
+
static VALUE
|
789
|
+
nl_node_clone(VALUE self)
|
790
|
+
{
|
791
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
792
|
+
lxb_dom_node_t *clone = lxb_dom_node_clone(node, 1);
|
793
|
+
return nl_rb_node_create(clone, nl_rb_document_get(self));
|
794
|
+
}
|
795
|
+
|
750
796
|
void Init_nl_node(void)
|
751
797
|
{
|
752
798
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
799
|
+
rb_undef_alloc_func(cNokolexborNode);
|
800
|
+
|
801
|
+
rb_define_singleton_method(cNokolexborNode, "new", nl_node_new, -1);
|
753
802
|
rb_define_method(cNokolexborNode, "content", nl_node_content, 0);
|
754
803
|
rb_define_method(cNokolexborNode, "[]", nl_node_get_attr, 1);
|
755
804
|
rb_define_method(cNokolexborNode, "[]=", nl_node_set_attr, 2);
|
@@ -773,11 +822,13 @@ void Init_nl_node(void)
|
|
773
822
|
rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
|
774
823
|
rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
|
775
824
|
rb_define_method(cNokolexborNode, "name", nl_node_name, 0);
|
825
|
+
rb_define_method(cNokolexborNode, "fragment", nl_node_fragment, 1);
|
776
826
|
rb_define_method(cNokolexborNode, "add_sibling", nl_node_add_sibling, 2);
|
777
827
|
rb_define_method(cNokolexborNode, "add_child", nl_node_add_child, 1);
|
778
828
|
rb_define_method(cNokolexborNode, "node_type", nl_node_get_type, 0);
|
779
829
|
rb_define_method(cNokolexborNode, "first_element_child", nl_node_first_element_child, 0);
|
780
830
|
rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
|
831
|
+
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
781
832
|
|
782
833
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
783
834
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
@@ -787,4 +838,5 @@ void Init_nl_node(void)
|
|
787
838
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
788
839
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
789
840
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|
841
|
+
rb_define_alias(cNokolexborNode, "dup", "clone");
|
790
842
|
}
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -131,7 +131,7 @@ xmlNodeGetContent(const lxb_dom_node_t *cur)
|
|
131
131
|
* Get the root element of the document (doc->children is a list
|
132
132
|
* containing possibly comments, PIs, etc ...).
|
133
133
|
*
|
134
|
-
* Returns the #
|
134
|
+
* Returns the #lxb_dom_node_t_ptr for the root or NULL
|
135
135
|
*/
|
136
136
|
lxb_dom_node_t_ptr
|
137
137
|
xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
@@ -145,4 +145,16 @@ xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
|
145
145
|
ret = ret->next;
|
146
146
|
}
|
147
147
|
return(ret);
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* xmlFreeNodeList:
|
152
|
+
* @cur: the first node in the list
|
153
|
+
*
|
154
|
+
* Free a node and all its siblings, this is a recursive behaviour, all
|
155
|
+
* the children are freed too.
|
156
|
+
*/
|
157
|
+
void
|
158
|
+
xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
|
159
|
+
// Should never be called
|
148
160
|
}
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -67,6 +67,26 @@ module Nokolexbor
|
|
67
67
|
end)
|
68
68
|
end
|
69
69
|
|
70
|
+
def wrap(node)
|
71
|
+
case node
|
72
|
+
when String
|
73
|
+
new_parent = fragment(node).child
|
74
|
+
when Node
|
75
|
+
new_parent = node.dup
|
76
|
+
else
|
77
|
+
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node.class}"
|
78
|
+
end
|
79
|
+
|
80
|
+
if parent
|
81
|
+
add_sibling(:next, new_parent)
|
82
|
+
else
|
83
|
+
new_parent.remove
|
84
|
+
end
|
85
|
+
new_parent.add_child(self)
|
86
|
+
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
70
90
|
def matches?(selector)
|
71
91
|
ancestors.last.css(selector).any? { |node| node == self }
|
72
92
|
end
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -4,10 +4,12 @@ module Nokolexbor
|
|
4
4
|
class NodeSet < Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def self.new(document, list = [])
|
8
|
+
obj = allocate
|
9
|
+
obj.instance_variable_set(:@document, document)
|
10
|
+
list.each { |x| obj << x }
|
11
|
+
yield obj if block_given?
|
12
|
+
obj
|
11
13
|
end
|
12
14
|
|
13
15
|
def each
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|