nokolexbor 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22cb30b169c61cb01d5a5b9d81a036ab3b47a99e34ff83f480bf1bd5b5a5eb0c
4
- data.tar.gz: 12a185344aebf6e69088f65209c8750f9f946c5d62127e2f4c90ce9873a7368c
3
+ metadata.gz: 17230669785aa3a0bd2703cee73e72ccced0600d12fc52228125d4d6579b8290
4
+ data.tar.gz: 68fb22f67bcd9cfcad78eb3ecb2f860e71d72e78951987c3fae6ca8b03203c63
5
5
  SHA512:
6
- metadata.gz: 663443bf82ab4061e2c15fdf00de6c762786125731346ed5afac99fbaabe26845e850510181e4d280294c9da9e36973a3cd3bc68a06d154b7563e1fe5000956b
7
- data.tar.gz: a02a164cde0ce3209f518dd01d3d805a7c7010ef86e77fd398e136eb9d6ae60e1d4c913c7c0795fbde5f5ed57dffd824a9553a8df04122ca047ae11f84ea1df4
6
+ metadata.gz: 28b1fafc6889a6d020f7f7275ee9e668808334c6106fd79c68f514d233983df38169521d407726d6cc680d8e9f589b3914398bfdfc6a57a24fdda9a910741505
7
+ data.tar.gz: fd12b7be8b5f159680b652d2092144baab2fdd23e8b13c6d22cea69b6a9920019336cf07a5790148ff6fd1c7c0392d55989c2218abe876599a468397551dec9c
@@ -41,11 +41,18 @@ nl_document_parse(VALUE self, VALUE rb_html)
41
41
  nl_raise_lexbor_error(status);
42
42
  }
43
43
 
44
- return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
44
+ return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, &document->dom_document);
45
+ }
46
+
47
+ static VALUE
48
+ nl_document_new(VALUE self)
49
+ {
50
+ return nl_document_parse(self, rb_str_new("", 0));
45
51
  }
46
52
 
47
53
  void Init_nl_document(void)
48
54
  {
49
55
  cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
56
+ rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
50
57
  rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
51
58
  }
@@ -52,6 +52,40 @@ nl_rb_node_unwrap(VALUE rb_node)
52
52
  return node;
53
53
  }
54
54
 
55
+ static VALUE
56
+ nl_node_new(int argc, VALUE *argv, VALUE klass)
57
+ {
58
+ lxb_dom_document_t *document;
59
+ lxb_dom_node_t *node;
60
+ VALUE rb_name;
61
+ VALUE rb_document;
62
+ VALUE rest;
63
+
64
+ rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
65
+
66
+ if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument))
67
+ {
68
+ rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
69
+ }
70
+
71
+ TypedData_Get_Struct(rb_document, lxb_dom_document_t, &nl_document_type, document);
72
+
73
+ lxb_dom_element_t *element = lxb_dom_document_create_element(document, StringValueCStr(rb_name), RSTRING_LEN(rb_name), NULL);
74
+ if (element == NULL)
75
+ {
76
+ rb_raise(rb_eRuntimeError, "Error creating element");
77
+ }
78
+
79
+ VALUE rb_node = nl_rb_node_create(&element->node, rb_document);
80
+
81
+ if (rb_block_given_p())
82
+ {
83
+ rb_yield(rb_node);
84
+ }
85
+
86
+ return rb_node;
87
+ }
88
+
55
89
  static VALUE
56
90
  nl_node_content(VALUE self)
57
91
  {
@@ -251,7 +285,7 @@ mark_node_orders(lxb_dom_node_t *root)
251
285
  void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array)
252
286
  {
253
287
  // No need to sort if there's only one selector, the results are natually in document traversal order
254
- if (strnstr(RSTRING_PTR(selector), ",", RSTRING_LEN(selector)) != NULL)
288
+ if (strstr(RSTRING_PTR(selector), ",") != NULL)
255
289
  {
256
290
  int need_order = 0;
257
291
  // Check if we have already markded orders, note that
@@ -574,6 +608,39 @@ nl_node_name(VALUE self)
574
608
  return rb_utf8_str_new(name, len);
575
609
  }
576
610
 
611
+ static lxb_dom_node_t *
612
+ nl_node_parse_fragment(lxb_html_document_t *doc, lxb_char_t *html, size_t size)
613
+ {
614
+ size_t tag_name_len;
615
+ lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
616
+ if (tag_name == NULL)
617
+ {
618
+ rb_raise(rb_eRuntimeError, "Error getting tag name");
619
+ }
620
+ lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
621
+ if (element == NULL)
622
+ {
623
+ rb_raise(rb_eRuntimeError, "Error creating element");
624
+ }
625
+ lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, html, size);
626
+ if (frag_root == NULL)
627
+ {
628
+ rb_raise(rb_eArgError, "Error parsing HTML");
629
+ }
630
+ return frag_root;
631
+ }
632
+
633
+ static VALUE
634
+ nl_node_fragment(VALUE self, VALUE html)
635
+ {
636
+ Check_Type(html, T_STRING);
637
+ lxb_dom_node_t *node = nl_rb_node_unwrap(self);
638
+ lxb_dom_document_t *doc = node->owner_document;
639
+
640
+ lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(html), RSTRING_LEN(html));
641
+ return nl_rb_node_create(frag_root, nl_rb_document_get(self));
642
+ }
643
+
577
644
  static VALUE
578
645
  nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
579
646
  {
@@ -596,22 +663,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
596
663
 
597
664
  if (TYPE(new) == T_STRING)
598
665
  {
599
- size_t tag_name_len;
600
- lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
601
- if (tag_name == NULL)
602
- {
603
- rb_raise(rb_eRuntimeError, "Error getting tag name");
604
- }
605
- lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
606
- if (element == NULL)
607
- {
608
- rb_raise(rb_eRuntimeError, "Error creating element");
609
- }
610
- lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
611
- if (frag_root == NULL)
612
- {
613
- rb_raise(rb_eArgError, "Error parsing HTML");
614
- }
666
+ lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
615
667
 
616
668
  while (frag_root->first_child != NULL)
617
669
  {
@@ -624,6 +676,7 @@ nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
624
676
  else if (rb_obj_class(new) == cNokolexborNode)
625
677
  {
626
678
  lxb_dom_node_t *node_new = nl_rb_node_unwrap(new);
679
+ lxb_dom_node_remove(node_new);
627
680
  insert_after ? lxb_dom_node_insert_after(node, node_new) : lxb_dom_node_insert_before(node, node_new);
628
681
  }
629
682
  else
@@ -641,22 +694,7 @@ nl_node_add_child(VALUE self, VALUE new)
641
694
 
642
695
  if (TYPE(new) == T_STRING)
643
696
  {
644
- size_t tag_name_len;
645
- lxb_char_t *tag_name = lxb_tag_name_by_id(lxb_html_document_tags(doc), LXB_TAG__UNDEF, &tag_name_len);
646
- if (tag_name == NULL)
647
- {
648
- rb_raise(rb_eRuntimeError, "Error getting tag name");
649
- }
650
- lxb_dom_element_t *element = lxb_dom_document_create_element(doc, tag_name, tag_name_len, NULL);
651
- if (element == NULL)
652
- {
653
- rb_raise(rb_eRuntimeError, "Error creating element");
654
- }
655
- lxb_dom_node_t *frag_root = lxb_html_document_parse_fragment(doc, element, RSTRING_PTR(new), RSTRING_LEN(new));
656
- if (frag_root == NULL)
657
- {
658
- rb_raise(rb_eArgError, "Error parsing HTML");
659
- }
697
+ lxb_dom_node_t *frag_root = nl_node_parse_fragment(doc, RSTRING_PTR(new), RSTRING_LEN(new));
660
698
 
661
699
  while (frag_root->first_child != NULL)
662
700
  {
@@ -747,9 +785,20 @@ nl_node_last_element_child(VALUE self)
747
785
  return Qnil;
748
786
  }
749
787
 
788
+ static VALUE
789
+ nl_node_clone(VALUE self)
790
+ {
791
+ lxb_dom_node_t *node = nl_rb_node_unwrap(self);
792
+ lxb_dom_node_t *clone = lxb_dom_node_clone(node, 1);
793
+ return nl_rb_node_create(clone, nl_rb_document_get(self));
794
+ }
795
+
750
796
  void Init_nl_node(void)
751
797
  {
752
798
  cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
799
+ rb_undef_alloc_func(cNokolexborNode);
800
+
801
+ rb_define_singleton_method(cNokolexborNode, "new", nl_node_new, -1);
753
802
  rb_define_method(cNokolexborNode, "content", nl_node_content, 0);
754
803
  rb_define_method(cNokolexborNode, "[]", nl_node_get_attr, 1);
755
804
  rb_define_method(cNokolexborNode, "[]=", nl_node_set_attr, 2);
@@ -773,11 +822,13 @@ void Init_nl_node(void)
773
822
  rb_define_method(cNokolexborNode, "destroy", nl_node_destroy, 0);
774
823
  rb_define_method(cNokolexborNode, "attrs", nl_node_attrs, 0);
775
824
  rb_define_method(cNokolexborNode, "name", nl_node_name, 0);
825
+ rb_define_method(cNokolexborNode, "fragment", nl_node_fragment, 1);
776
826
  rb_define_method(cNokolexborNode, "add_sibling", nl_node_add_sibling, 2);
777
827
  rb_define_method(cNokolexborNode, "add_child", nl_node_add_child, 1);
778
828
  rb_define_method(cNokolexborNode, "node_type", nl_node_get_type, 0);
779
829
  rb_define_method(cNokolexborNode, "first_element_child", nl_node_first_element_child, 0);
780
830
  rb_define_method(cNokolexborNode, "last_element_child", nl_node_last_element_child, 0);
831
+ rb_define_method(cNokolexborNode, "clone", nl_node_clone, 0);
781
832
 
782
833
  rb_define_alias(cNokolexborNode, "attr", "[]");
783
834
  rb_define_alias(cNokolexborNode, "set_attr", "[]=");
@@ -787,4 +838,5 @@ void Init_nl_node(void)
787
838
  rb_define_alias(cNokolexborNode, "to_html", "outer_html");
788
839
  rb_define_alias(cNokolexborNode, "to_s", "outer_html");
789
840
  rb_define_alias(cNokolexborNode, "type", "node_type");
841
+ rb_define_alias(cNokolexborNode, "dup", "clone");
790
842
  }
@@ -131,7 +131,7 @@ xmlNodeGetContent(const lxb_dom_node_t *cur)
131
131
  * Get the root element of the document (doc->children is a list
132
132
  * containing possibly comments, PIs, etc ...).
133
133
  *
134
- * Returns the #xmlNodePtr for the root or NULL
134
+ * Returns the #lxb_dom_node_t_ptr for the root or NULL
135
135
  */
136
136
  lxb_dom_node_t_ptr
137
137
  xmlDocGetRootElement(const lxb_dom_document_t *doc) {
@@ -145,4 +145,16 @@ xmlDocGetRootElement(const lxb_dom_document_t *doc) {
145
145
  ret = ret->next;
146
146
  }
147
147
  return(ret);
148
+ }
149
+
150
+ /**
151
+ * xmlFreeNodeList:
152
+ * @cur: the first node in the list
153
+ *
154
+ * Free a node and all its siblings, this is a recursive behaviour, all
155
+ * the children are freed too.
156
+ */
157
+ void
158
+ xmlFreeNodeList(lxb_dom_node_t_ptr cur) {
159
+ // Should never be called
148
160
  }
@@ -67,6 +67,26 @@ module Nokolexbor
67
67
  end)
68
68
  end
69
69
 
70
+ def wrap(node)
71
+ case node
72
+ when String
73
+ new_parent = fragment(node).child
74
+ when Node
75
+ new_parent = node.dup
76
+ else
77
+ raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node.class}"
78
+ end
79
+
80
+ if parent
81
+ add_sibling(:next, new_parent)
82
+ else
83
+ new_parent.remove
84
+ end
85
+ new_parent.add_child(self)
86
+
87
+ self
88
+ end
89
+
70
90
  def matches?(selector)
71
91
  ancestors.last.css(selector).any? { |node| node == self }
72
92
  end
@@ -4,10 +4,12 @@ module Nokolexbor
4
4
  class NodeSet < Node
5
5
  include Enumerable
6
6
 
7
- def initialize(document, list = [])
8
- @document = document
9
- list.each { |x| self << x }
10
- yield self if block_given?
7
+ def self.new(document, list = [])
8
+ obj = allocate
9
+ obj.instance_variable_set(:@document, document)
10
+ list.each { |x| obj << x }
11
+ yield obj if block_given?
12
+ obj
11
13
  end
12
14
 
13
15
  def each
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-14 00:00:00.000000000 Z
11
+ date: 2022-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler