nokolexbor 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_document.c +8 -1
- data/ext/nokolexbor/nl_node.c +85 -33
- data/ext/nokolexbor/xml_tree.c +13 -1
- data/lib/nokolexbor/node.rb +20 -0
- data/lib/nokolexbor/node_set.rb +6 -4
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17230669785aa3a0bd2703cee73e72ccced0600d12fc52228125d4d6579b8290
|
4
|
+
data.tar.gz: 68fb22f67bcd9cfcad78eb3ecb2f860e71d72e78951987c3fae6ca8b03203c63
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28b1fafc6889a6d020f7f7275ee9e668808334c6106fd79c68f514d233983df38169521d407726d6cc680d8e9f589b3914398bfdfc6a57a24fdda9a910741505
|
7
|
+
data.tar.gz: fd12b7be8b5f159680b652d2092144baab2fdd23e8b13c6d22cea69b6a9920019336cf07a5790148ff6fd1c7c0392d55989c2218abe876599a468397551dec9c
|
@@ -41,11 +41,18 @@ nl_document_parse(VALUE self, VALUE rb_html)
|
|
41
41
|
nl_raise_lexbor_error(status);
|
42
42
|
}
|
43
43
|
|
44
|
-
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
44
|
+
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, &document->dom_document);
|
45
|
+
}
|
46
|
+
|
47
|
+
static VALUE
|
48
|
+
nl_document_new(VALUE self)
|
49
|
+
{
|
50
|
+
return nl_document_parse(self, rb_str_new("", 0));
|
45
51
|
}
|
46
52
|
|
47
53
|
void Init_nl_document(void)
|
48
54
|
{
|
49
55
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
56
|
+
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
50
57
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
51
58
|
}
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -52,6 +52,40 @@ nl_rb_node_unwrap(VALUE rb_node)
|
|
52
52
|
return node;
|
53
53
|
}
|
54
54
|
|
55
|
+
static VALUE
|
56
|
+
nl_node_new(int argc, VALUE *argv, VALUE klass)
|
57
|
+
{
|
58
|
+
lxb_dom_document_t *document;
|
59
|
+
lxb_dom_node_t *node;
|
60
|
+
VALUE rb_name;
|
61
|
+
VALUE rb_document;
|
62
|
+
VALUE rest;
|
63
|
+
|
64
|
+
rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
|
65
|
+
|
66
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument))
|
67
|
+
{
|
68
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
69
|
+
}
|
70
|
+
|
71
|
+
TypedData_Get_Struct(rb_document, lxb_dom_document_t, &nl_document_type, document);
|
72
|
+
|
73
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
|
74
|
+
if (element == NULL)
|
75
|
+
{
|
76
|
+
rb_raise(rb_eRuntimeError, "Error creating element");
|
77
|
+
}
|
78
|
+
|
79
|
+
VALUE rb_node = nl_rb_node_create(&element->node, rb_document);
|
80
|
+
|
81
|
+
if (rb_block_given_p())
|
82
|
+
{
|
83
|
+
rb_yield(rb_node);
|
84
|
+
}
|
85
|
+
|
86
|
+
return rb_node;
|
87
|
+
}
|
88
|
+
|
55
89
|
static VALUE
|
56
90
|
nl_node_content(VALUE self)
|
57
91
|
{
|
@@ -251,7 +285,7 @@ mark_node_orders(lxb_dom_node_t *root)
|
|
251
285
|
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array)
|
252
286
|
{
|
253
287
|
// No need to sort if there's only one selector, the results are natually in document traversal order
|
254
|
-
if (
|
288
|
+
if (strstr(RSTRING_PTR(selector), ",") != NULL)
|
255
289
|
{
|
256
290
|
int need_order = 0;
|
257
291
|
// Check if we have already markded orders, note that
|
@@ -574,6 +608,39 @@ nl_node_name(VALUE self)
|
|
574
608
|
return rb_utf8_str_new(name, len);
|
575
609
|
}
|
576
610
|
|
611
|
+
static lxb_dom_node_t *
|
612
|
+
nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
|
613
|
+
{
|
614
|
+
size_t tag_name_len;
|
615
|
+
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
616
|
+
if (tag_name == NULL)
|
617
|
+
{
|
618
|
+
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
619
|
+
}
|
620
|
+
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
621
|
+
if (element == NULL)
|
622
|
+
{
|
623
|
+
rb_raise(rb_eRuntimeError, "Error creating element");
|
624
|
+
}
|
625
|
+
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, html, size);
|
626
|
+
if (frag_root == NULL)
|
627
|
+
{
|
628
|
+
rb_raise(rb_eArgError, "Error parsing HTML");
|
629
|
+
}
|
630
|
+
return frag_root;
|
631
|
+
}
|
632
|
+
|
633
|
+
static VALUE
|
634
|
+
nl_node_fragment(VALUE self, VALUE html)
|
635
|
+
{
|
636
|
+
Check_Type(html, T_STRING);
|
637
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
638
|
+
lxb_dom_document_t *doc = node->owner_document;
|
639
|
+
|
640
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
|
641
|
+
return nl_rb_node_create(frag_root, nl_rb_document_get(self));
|
642
|
+
}
|
643
|
+
|
577
644
|
static VALUE
|
578
645
|
nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
579
646
|
{
|
@@ -596,22 +663,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
596
663
|
|
597
664
|
if (TYPE(new) == T_STRING)
|
598
665
|
{
|
599
|
-
|
600
|
-
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
601
|
-
if (tag_name == NULL)
|
602
|
-
{
|
603
|
-
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
604
|
-
}
|
605
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
606
|
-
if (element == NULL)
|
607
|
-
{
|
608
|
-
rb_raise(rb_eRuntimeError, "Error creating element");
|
609
|
-
}
|
610
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
|
611
|
-
if (frag_root == NULL)
|
612
|
-
{
|
613
|
-
rb_raise(rb_eArgError, "Error parsing HTML");
|
614
|
-
}
|
666
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
615
667
|
|
616
668
|
while (frag_root->first_child != NULL)
|
617
669
|
{
|
@@ -624,6 +676,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
|
624
676
|
else if (rb_obj_class(new) == cNokolexborNode)
|
625
677
|
{
|
626
678
|
lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
|
679
|
+
lxb_dom_node_remove(node_new);
|
627
680
|
insert_after ? lxb_dom_node_insert_after(node, node_new) : lxb_dom_node_insert_before(node, node_new);
|
628
681
|
}
|
629
682
|
else
|
@@ -641,22 +694,7 @@ nl_node_add_child(VALUE self, VALUE new)
|
|
641
694
|
|
642
695
|
if (TYPE(new) == T_STRING)
|
643
696
|
{
|
644
|
-
|
645
|
-
lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
|
646
|
-
if (tag_name == NULL)
|
647
|
-
{
|
648
|
-
rb_raise(rb_eRuntimeError, "Error getting tag name");
|
649
|
-
}
|
650
|
-
lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
|
651
|
-
if (element == NULL)
|
652
|
-
{
|
653
|
-
rb_raise(rb_eRuntimeError, "Error creating element");
|
654
|
-
}
|
655
|
-
lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
|
656
|
-
if (frag_root == NULL)
|
657
|
-
{
|
658
|
-
rb_raise(rb_eArgError, "Error parsing HTML");
|
659
|
-
}
|
697
|
+
lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
|
660
698
|
|
661
699
|
while (frag_root->first_child != NULL)
|
662
700
|
{
|
@@ -747,9 +785,20 @@ nl_node_last_element_child(VALUE self)
|
|
747
785
|
return Qnil;
|
748
786
|
}
|
749
787
|
|
788
|
+
static VALUE
|
789
|
+
nl_node_clone(VALUE self)
|
790
|
+
{
|
791
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
792
|
+
lxb_dom_node_t *clone = lxb_dom_node_clone(node, 1);
|
793
|
+
return nl_rb_node_create(clone, nl_rb_document_get(self));
|
794
|
+
}
|
795
|
+
|
750
796
|
void Init_nl_node(void)
|
751
797
|
{
|
752
798
|
cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
|
799
|
+
rb_undef_alloc_func(cNokolexborNode);
|
800
|
+
|
801
|
+
rb_define_singleton_method(cNokolexborNode, "new", nl_node_new, -1);
|
753
802
|
rb_define_method(cNokolexborNode, "content", nl_node_content, 0);
|
754
803
|
rb_define_method(cNokolexborNode, "[]", nl_node_get_attr, 1);
|
755
804
|
rb_define_method(cNokolexborNode, "[]=", nl_node_set_attr, 2);
|
@@ -773,11 +822,13 @@ void Init_nl_node(void)
|
|
773
822
|
rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
|
774
823
|
rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
|
775
824
|
rb_define_method(cNokolexborNode, "name", nl_node_name, 0);
|
825
|
+
rb_define_method(cNokolexborNode, "fragment", nl_node_fragment, 1);
|
776
826
|
rb_define_method(cNokolexborNode, "add_sibling", nl_node_add_sibling, 2);
|
777
827
|
rb_define_method(cNokolexborNode, "add_child", nl_node_add_child, 1);
|
778
828
|
rb_define_method(cNokolexborNode, "node_type", nl_node_get_type, 0);
|
779
829
|
rb_define_method(cNokolexborNode, "first_element_child", nl_node_first_element_child, 0);
|
780
830
|
rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
|
831
|
+
rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
|
781
832
|
|
782
833
|
rb_define_alias(cNokolexborNode, "attr", "[]");
|
783
834
|
rb_define_alias(cNokolexborNode, "set_attr", "[]=");
|
@@ -787,4 +838,5 @@ void Init_nl_node(void)
|
|
787
838
|
rb_define_alias(cNokolexborNode, "to_html", "outer_html");
|
788
839
|
rb_define_alias(cNokolexborNode, "to_s", "outer_html");
|
789
840
|
rb_define_alias(cNokolexborNode, "type", "node_type");
|
841
|
+
rb_define_alias(cNokolexborNode, "dup", "clone");
|
790
842
|
}
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -131,7 +131,7 @@ xmlNodeGetContent(const lxb_dom_node_t *cur)
|
|
131
131
|
* Get the root element of the document (doc->children is a list
|
132
132
|
* containing possibly comments, PIs, etc ...).
|
133
133
|
*
|
134
|
-
* Returns the #
|
134
|
+
* Returns the #lxb_dom_node_t_ptr for the root or NULL
|
135
135
|
*/
|
136
136
|
lxb_dom_node_t_ptr
|
137
137
|
xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
@@ -145,4 +145,16 @@ xmlDocGetRootElement(const lxb_dom_document_t *doc) {
|
|
145
145
|
ret = ret->next;
|
146
146
|
}
|
147
147
|
return(ret);
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* xmlFreeNodeList:
|
152
|
+
* @cur: the first node in the list
|
153
|
+
*
|
154
|
+
* Free a node and all its siblings, this is a recursive behaviour, all
|
155
|
+
* the children are freed too.
|
156
|
+
*/
|
157
|
+
void
|
158
|
+
xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
|
159
|
+
// Should never be called
|
148
160
|
}
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -67,6 +67,26 @@ module Nokolexbor
|
|
67
67
|
end)
|
68
68
|
end
|
69
69
|
|
70
|
+
def wrap(node)
|
71
|
+
case node
|
72
|
+
when String
|
73
|
+
new_parent = fragment(node).child
|
74
|
+
when Node
|
75
|
+
new_parent = node.dup
|
76
|
+
else
|
77
|
+
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node.class}"
|
78
|
+
end
|
79
|
+
|
80
|
+
if parent
|
81
|
+
add_sibling(:next, new_parent)
|
82
|
+
else
|
83
|
+
new_parent.remove
|
84
|
+
end
|
85
|
+
new_parent.add_child(self)
|
86
|
+
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
70
90
|
def matches?(selector)
|
71
91
|
ancestors.last.css(selector).any? { |node| node == self }
|
72
92
|
end
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -4,10 +4,12 @@ module Nokolexbor
|
|
4
4
|
class NodeSet < Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def self.new(document, list = [])
|
8
|
+
obj = allocate
|
9
|
+
obj.instance_variable_set(:@document, document)
|
10
|
+
list.each { |x| obj << x }
|
11
|
+
yield obj if block_given?
|
12
|
+
obj
|
11
13
|
end
|
12
14
|
|
13
15
|
def each
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|