PyPI - selectolax - Versions diffs - 0.3.27__cp39-cp39-musllinux_1_2_aarch64.whl → 0.3.29__cp39-cp39-musllinux_1_2_aarch64.whl - Mend

selectolax 0.3.27__cp39-cp39-musllinux_1_2_aarch64.whl → 0.3.29__cp39-cp39-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of selectolax might be problematic. Click here for more details.

Files changed (19) hide show

selectolax/__init__.py +1 -1
selectolax/lexbor/attrs.pxi +2 -1
selectolax/lexbor/node.pxi +17 -5
selectolax/lexbor/selection.pxi +12 -10
selectolax/lexbor.c +2735 -2481
selectolax/lexbor.cpython-39-aarch64-linux-gnu.so +0 -0
selectolax/lexbor.pyi +3 -3
selectolax/lexbor.pyx +8 -4
selectolax/modest/node.pxi +21 -5
selectolax/parser.c +1857 -1703
selectolax/parser.cpython-39-aarch64-linux-gnu.so +0 -0
selectolax/parser.pyi +15 -22
selectolax/parser.pyx +4 -2
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/LICENSE +1 -1
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/METADATA +7 -1
selectolax-0.3.29.dist-info/RECORD +26 -0
selectolax-0.3.27.dist-info/RECORD +0 -26
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/WHEEL +0 -0
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/top_level.txt +0 -0

selectolax/lexbor.cpython-39-aarch64-linux-gnu.so CHANGED Viewed

Binary file

selectolax/lexbor.pyi CHANGED Viewed

@@ -101,7 +101,7 @@ class LexborNode:
     def id(self) -> str | None: ...
     def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
     def unwrap(self) -> None: ...
-    def unwrap_tags(self, tags: list[str]) -> None: ...
+    def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
     def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
     def replace_with(self, value: bytes | str | LexborNode) -> None: ...
     def insert_before(self, value: bytes | str | LexborNode) -> None: ...
@@ -117,7 +117,7 @@ class LexborNode:
     def text_content(self) -> str | None: ...
 class LexborHTMLParser:
-    def __init__(self, html: str): ...
+    def __init__(self, html: str| bytes ): ...
     @property
     def selector(self) -> "LexborCSSSelector": ...
     @property
@@ -152,7 +152,7 @@ class LexborHTMLParser:
     def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
     def css_matches(self, selector: str) -> bool: ...
     def clone(self) -> LexborHTMLParser: ...
-    def unwrap_tags(self, tags: list[str]) -> None: ...
+    def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
 def create_tag(tag: str) -> LexborNode:
     """

selectolax/lexbor.pyx CHANGED Viewed

@@ -112,6 +112,7 @@ cdef class LexborHTMLParser:
             len(pybyte_name)
         )
         if status != 0x0000:
+            lxb_dom_collection_destroy(collection, <bint> True)
             raise SelectolaxError("Can't locate elements.")
         for i in range(lxb_dom_collection_length_noi(collection)):
@@ -226,13 +227,14 @@ cdef class LexborHTMLParser:
                 len(pybyte_name)
             )
             if status != 0x0000:
+                lxb_dom_collection_destroy(collection, <bint> True)
                 raise SelectolaxError("Can't locate elements.")
             for i in range(lxb_dom_collection_length_noi(collection)):
                 if recursive:
-                    lxb_dom_node_destroy( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
-                else:
                     lxb_dom_node_destroy_deep( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
+                else:
+                    lxb_dom_node_destroy(<lxb_dom_node_t *> lxb_dom_collection_element_noi(collection, i))
             lxb_dom_collection_destroy(collection, <bint> True)
     def select(self, query=None):
@@ -325,7 +327,7 @@ cdef class LexborHTMLParser:
         cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
         return cls
-    def unwrap_tags(self, list tags):
+    def unwrap_tags(self, list tags, delete_empty = False):
         """Unwraps specified tags from the HTML tree.
         Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -334,6 +336,8 @@ cdef class LexborHTMLParser:
         ----------
         tags : list
             List of tags to remove.
+        delete_empty : bool
+            Whenever to delete empty tags.
         Examples
         --------
@@ -344,4 +348,4 @@ cdef class LexborHTMLParser:
         '<body><div>Hello world!</div></body>'
         """
         if self.root is not None:
-            self.root.unwrap_tags(tags)
+            self.root.unwrap_tags(tags, delete_empty=delete_empty)

selectolax/modest/node.pxi CHANGED Viewed

@@ -515,9 +515,14 @@ cdef class Node:
         """An alias for the decompose method."""
         self.decompose(recursive)
-    def unwrap(self):
+    def unwrap(self, delete_empty = False):
         """Replace node with whatever is inside this node.
+        Parameters
+        ----------
+        delete_empty : bool, default False
+            Whenever to delete empty tags.
         Examples
         --------
@@ -526,8 +531,11 @@ cdef class Node:
         >>>  tree.html
         '<html><head></head><body><div>Hello world!</div></body></html>'
+        Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
         """
         if self.node.child == NULL:
+            if delete_empty:
+                myhtml_node_delete(self.node)
             return
         cdef myhtml_tree_node_t* next_node;
         cdef myhtml_tree_node_t* current_node;
@@ -568,7 +576,7 @@ cdef class Node:
             for element in self.css(tag):
                 element.decompose(recursive=recursive)
-    def unwrap_tags(self, list tags):
+    def unwrap_tags(self, list tags, delete_empty = False):
         """Unwraps specified tags from the HTML tree.
         Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -577,6 +585,8 @@ cdef class Node:
         ----------
         tags : list
             List of tags to remove.
+        delete_empty : bool, default False
+            Whenever to delete empty tags.
         Examples
         --------
@@ -585,11 +595,13 @@ cdef class Node:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
+        Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
         """
         for tag in tags:
             for element in self.css(tag):
-                element.unwrap()
+                element.unwrap(delete_empty)
     def replace_with(self, str_or_Node value):
         """Replace current Node with specified value.
@@ -752,7 +764,7 @@ cdef class Node:
         else:
             raise TypeError("Expected a string or Node instance, but %s found" % type(value).__name__)
-    def unwrap_tags(self, list tags):
+    def unwrap_tags(self, list tags, delete_empty = False):
         """Unwraps specified tags from the HTML tree.
         Works the same as th ``unwrap`` method, but applied to a list of tags.
@@ -761,6 +773,8 @@ cdef class Node:
         ----------
         tags : list
             List of tags to remove.
+        delete_empty : bool, default False
+            Whenever to delete empty tags.
         Examples
         --------
@@ -769,11 +783,13 @@ cdef class Node:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
+        Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
         """
         for tag in tags:
             for element in self.css(tag):
-                element.unwrap()
+                element.unwrap(delete_empty)
     @property
     def raw_value(self):