PyPI - selectolax - Versions diffs - 0.3.27__cp311-cp311-musllinux_1_2_x86_64.whl → 0.3.29__cp311-cp311-musllinux_1_2_x86_64.whl - Mend

selectolax 0.3.27__cp311-cp311-musllinux_1_2_x86_64.whl → 0.3.29__cp311-cp311-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of selectolax might be problematic. Click here for more details.

Files changed (19) hide show

selectolax/__init__.py +1 -1
selectolax/lexbor/attrs.pxi +2 -1
selectolax/lexbor/node.pxi +17 -5
selectolax/lexbor/selection.pxi +12 -10
selectolax/lexbor.c +2735 -2481
selectolax/lexbor.cpython-311-x86_64-linux-musl.so +0 -0
selectolax/lexbor.pyi +3 -3
selectolax/lexbor.pyx +8 -4
selectolax/modest/node.pxi +21 -5
selectolax/parser.c +1857 -1703
selectolax/parser.cpython-311-x86_64-linux-musl.so +0 -0
selectolax/parser.pyi +15 -22
selectolax/parser.pyx +4 -2
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/LICENSE +1 -1
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/METADATA +7 -1
selectolax-0.3.29.dist-info/RECORD +26 -0
selectolax-0.3.27.dist-info/RECORD +0 -26
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/WHEEL +0 -0
{selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/top_level.txt +0 -0

selectolax/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@
 __author__ = """Artem Golubin"""
 __email__ = 'me@rushter.com'
-__version__ = '0.3.27'
+__version__ = '0.3.29'
 from . import parser
 from . import lexbor

selectolax/lexbor/attrs.pxi CHANGED Viewed

@@ -19,8 +19,9 @@ cdef class LexborAttributes:
         while attr != NULL:
             key = lxb_dom_attr_local_name_noi(attr, &str_len)
+            if key is not NULL:
+                yield key.decode(_ENCODING)
             attr = attr.next
-            yield key.decode(_ENCODING)
     def __setitem__(self, str key, value):
         value = str(value)

selectolax/lexbor/node.pxi CHANGED Viewed

@@ -95,7 +95,7 @@ cdef class LexborNode:
         text : str
         """
         cdef lexbor_str_t *lxb_str
-        cdef lxb_status_t lxb_status_t
+        cdef lxb_status_t status
         lxb_str = lexbor_str_create()
         status = lxb_html_serialize_tree_str(self.node, lxb_str)
@@ -416,9 +416,14 @@ cdef class LexborNode:
             node = node.next
-    def unwrap(self):
+    def unwrap(self, delete_empty=False):
         """Replace node with whatever is inside this node.
+        Parameters
+        ----------
+        delete_empty : bool, default False
+            If True, removes empty tags.
         Examples
         --------
@@ -426,9 +431,12 @@ cdef class LexborNode:
         >>>  tree.css_first('i').unwrap()
         >>>  tree.html
         '<html><head></head><body><div>Hello world!</div></body></html>'
+        Note: by default, empty tags are ignored, use "delete_empty" to change this.
         """
         if self.node.first_child == NULL:
+            if delete_empty:
+                lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
             return
         cdef lxb_dom_node_t* next_node;
         cdef lxb_dom_node_t* current_node;
@@ -445,7 +453,7 @@ cdef class LexborNode:
             lxb_dom_node_insert_before(self.node, self.node.first_child)
         lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
-    def unwrap_tags(self, list tags):
+    def unwrap_tags(self, list tags, delete_empty = False):
         """Unwraps specified tags from the HTML tree.
         Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -454,6 +462,8 @@ cdef class LexborNode:
         ----------
         tags : list
             List of tags to remove.
+        delete_empty : bool, default False
+            If True, removes empty tags.
         Examples
         --------
@@ -462,11 +472,13 @@ cdef class LexborNode:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
+        Note: by default, empty tags are ignored, use "delete_empty" to change this.
         """
         for tag in tags:
             for element in self.css(tag):
-                element.unwrap()
+                element.unwrap(delete_empty)
     def traverse(self, include_text=False):

selectolax/lexbor/selection.pxi CHANGED Viewed

@@ -77,10 +77,12 @@ cdef class LexborCSSSelector:
     def __dealloc__(self):
-        lxb_selectors_destroy(self.selectors, True)
-        # lxb_css_memory_destroy(self.parser.memory, True)
-        lxb_css_parser_destroy(self.parser, True)
-        lxb_css_selectors_destroy(self.css_selectors, True)
+        if self.selectors != NULL:
+            lxb_selectors_destroy(self.selectors, True)
+        if self.parser != NULL:
+            lxb_css_parser_destroy(self.parser, True)
+        if self.css_selectors != NULL:
+            lxb_css_selectors_destroy(self.css_selectors, True)
@@ -104,16 +106,16 @@ cdef class LexborSelector:
         raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
     @property
-    def matches(self):
+    def matches(self) -> list:
         """Returns all possible matches"""
         return self.nodes
     @property
-    def any_matches(self):
+    def any_matches(self) -> bool:
         """Returns True if there are any matches"""
         return bool(self.nodes)
-    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
         """Filter all current matches given text."""
         nodes = []
         for node in self.nodes:
@@ -123,7 +125,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
         """Returns True if any node in the current search scope contains specified text"""
         nodes = []
         for node in self.nodes:
@@ -132,7 +134,7 @@ cdef class LexborSelector:
                 return True
         return False
-    def attribute_longer_than(self, str attribute, int length, str start  = None):
+    def attribute_longer_than(self, str attribute, int length, str start  = None) -> LexborSelector:
         """Filter all current matches by attribute length.
         Similar to `string-length` in XPath.
@@ -147,7 +149,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_attribute_longer_than(self, str attribute, int length, str start  = None):
+    def any_attribute_longer_than(self, str attribute, int length, str start  = None) -> bool:
         """Returns True any href attribute longer than a specified length.
         Similar to `string-length` in XPath.