PyPI - selectolax - Versions diffs - 0.3.16__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.3.29__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl - Mend

selectolax 0.3.16__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.3.29__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of selectolax might be problematic. Click here for more details.

Files changed (24) hide show

selectolax/__init__.py +1 -1
selectolax/lexbor/attrs.pxi +2 -1
selectolax/lexbor/node.pxi +68 -5
selectolax/lexbor/selection.pxi +14 -11
selectolax/lexbor/util.pxi +19 -0
selectolax/lexbor.c +39208 -18768
selectolax/lexbor.cpython-38-aarch64-linux-gnu.so +0 -0
selectolax/lexbor.pxd +7 -1
selectolax/lexbor.pyi +89 -39
selectolax/lexbor.pyx +10 -5
selectolax/modest/node.pxi +61 -5
selectolax/modest/selection.pxi +1 -1
selectolax/modest/util.pxi +19 -0
selectolax/parser.c +33796 -14836
selectolax/parser.cpython-38-aarch64-linux-gnu.so +0 -0
selectolax/parser.pyi +86 -41
selectolax/parser.pyx +5 -4
selectolax/utils.pxi +95 -1
{selectolax-0.3.16.dist-info → selectolax-0.3.29.dist-info}/LICENSE +1 -1
{selectolax-0.3.16.dist-info → selectolax-0.3.29.dist-info}/METADATA +17 -4
selectolax-0.3.29.dist-info/RECORD +26 -0
{selectolax-0.3.16.dist-info → selectolax-0.3.29.dist-info}/WHEEL +1 -1
selectolax-0.3.16.dist-info/RECORD +0 -24
{selectolax-0.3.16.dist-info → selectolax-0.3.29.dist-info}/top_level.txt +0 -0

selectolax/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@
 __author__ = """Artem Golubin"""
 __email__ = 'me@rushter.com'
-__version__ = '0.3.16'
+__version__ = '0.3.29'
 from . import parser
 from . import lexbor

selectolax/lexbor/attrs.pxi CHANGED Viewed

@@ -19,8 +19,9 @@ cdef class LexborAttributes:
         while attr != NULL:
             key = lxb_dom_attr_local_name_noi(attr, &str_len)
+            if key is not NULL:
+                yield key.decode(_ENCODING)
             attr = attr.next
-            yield key.decode(_ENCODING)
     def __setitem__(self, str key, value):
         value = str(value)

selectolax/lexbor/node.pxi CHANGED Viewed

@@ -95,7 +95,7 @@ cdef class LexborNode:
         text : str
         """
         cdef lexbor_str_t *lxb_str
-        cdef lxb_status_t lxb_status_t
+        cdef lxb_status_t status
         lxb_str = lexbor_str_create()
         status = lxb_html_serialize_tree_str(self.node, lxb_str)
@@ -416,9 +416,14 @@ cdef class LexborNode:
             node = node.next
-    def unwrap(self):
+    def unwrap(self, delete_empty=False):
         """Replace node with whatever is inside this node.
+        Parameters
+        ----------
+        delete_empty : bool, default False
+            If True, removes empty tags.
         Examples
         --------
@@ -426,9 +431,12 @@ cdef class LexborNode:
         >>>  tree.css_first('i').unwrap()
         >>>  tree.html
         '<html><head></head><body><div>Hello world!</div></body></html>'
+        Note: by default, empty tags are ignored, use "delete_empty" to change this.
         """
         if self.node.first_child == NULL:
+            if delete_empty:
+                lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
             return
         cdef lxb_dom_node_t* next_node;
         cdef lxb_dom_node_t* current_node;
@@ -445,7 +453,7 @@ cdef class LexborNode:
             lxb_dom_node_insert_before(self.node, self.node.first_child)
         lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
-    def unwrap_tags(self, list tags):
+    def unwrap_tags(self, list tags, delete_empty = False):
         """Unwraps specified tags from the HTML tree.
         Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -454,6 +462,8 @@ cdef class LexborNode:
         ----------
         tags : list
             List of tags to remove.
+        delete_empty : bool, default False
+            If True, removes empty tags.
         Examples
         --------
@@ -462,11 +472,13 @@ cdef class LexborNode:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
+        Note: by default, empty tags are ignored, use "delete_empty" to change this.
         """
         for tag in tags:
             for element in self.css(tag):
-                element.unwrap()
+                element.unwrap(delete_empty)
     def traverse(self, include_text=False):
@@ -655,6 +667,57 @@ cdef class LexborNode:
         else:
             raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
+    def insert_child(self, str_or_LexborNode value):
+        """
+        Insert a node inside (at the end of) the current Node.
+        Parameters
+        ----------
+        value : str, bytes or Node
+            The text or Node instance to insert inside the Node.
+            When a text string is passed, it's treated as text. All HTML tags will be escaped.
+            Convert and pass the ``Node`` object when you want to work with HTML.
+            Does not clone the ``Node`` object.
+            All future changes to the passed ``Node`` object will also be taken into account.
+        Examples
+        --------
+        >>> tree = LexborHTMLParser('<div>Get <img src=""></div>')
+        >>> div = tree.css_first('div')
+        >>> div.insert_child('Laptop')
+        >>> tree.body.child.html
+        '<div>Get <img src="">Laptop</div>'
+        >>> html_parser = LexborHTMLParser('<div>Get <span alt="Laptop"> <div>Laptop</div> </span></div>')
+        >>> html_parser2 = LexborHTMLParser('<div>Test</div>')
+        >>> span_node = html_parser.css_first('span')
+        >>> span_node.insert_child(html_parser2.body.child)
+        <div>Get <span alt="Laptop"> <div>Laptop</div> <div>Test</div> </span></div>'
+        """
+        cdef lxb_dom_node_t * new_node
+        if isinstance(value, (str, bytes, unicode)):
+            bytes_val = to_bytes(value)
+            new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
+                    &self.parser.document.dom_document,
+                    <lxb_char_t *> bytes_val, len(bytes_val)
+            )
+            if new_node == NULL:
+                raise SelectolaxError("Can't create a new node")
+            lxb_dom_node_insert_child(self.node,  new_node)
+        elif isinstance(value, LexborNode):
+            new_node = lxb_dom_document_import_node(
+                &self.parser.document.dom_document,
+                <lxb_dom_node_t *> value.node,
+                <bint> True
+            )
+            if new_node == NULL:
+                raise SelectolaxError("Can't create a new node")
+            lxb_dom_node_insert_child(self.node, <lxb_dom_node_t *> new_node)
+        else:
+            raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
     @property
     def raw_value(self):
         """Return the raw (unparsed, original) value of a node.

selectolax/lexbor/selection.pxi CHANGED Viewed

@@ -28,7 +28,7 @@ cdef class LexborCSSSelector:
         self.selectors = lxb_selectors_create()
         status = lxb_selectors_init(self.selectors)
+        lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
         if status != LXB_STATUS_OK:
             raise SelectolaxError("Can't initialize CSS selector.")
@@ -72,14 +72,17 @@ cdef class LexborCSSSelector:
             raise SelectolaxError("Can't parse CSS selector.")
         result = bool(self.results)
         self.results = []
+        lxb_css_selector_list_destroy_memory(selectors_list)
         return result
     def __dealloc__(self):
-        lxb_selectors_destroy(self.selectors, True)
-        lxb_css_parser_destroy(self.parser, True)
-        lxb_css_selectors_destroy(self.css_selectors, True)
-        # lxb_css_memory_destroy(, True)
+        if self.selectors != NULL:
+            lxb_selectors_destroy(self.selectors, True)
+        if self.parser != NULL:
+            lxb_css_parser_destroy(self.parser, True)
+        if self.css_selectors != NULL:
+            lxb_css_selectors_destroy(self.css_selectors, True)
@@ -103,16 +106,16 @@ cdef class LexborSelector:
         raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
     @property
-    def matches(self):
+    def matches(self) -> list:
         """Returns all possible matches"""
         return self.nodes
     @property
-    def any_matches(self):
+    def any_matches(self) -> bool:
         """Returns True if there are any matches"""
         return bool(self.nodes)
-    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
         """Filter all current matches given text."""
         nodes = []
         for node in self.nodes:
@@ -122,7 +125,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
         """Returns True if any node in the current search scope contains specified text"""
         nodes = []
         for node in self.nodes:
@@ -131,7 +134,7 @@ cdef class LexborSelector:
                 return True
         return False
-    def attribute_longer_than(self, str attribute, int length, str start  = None):
+    def attribute_longer_than(self, str attribute, int length, str start  = None) -> LexborSelector:
         """Filter all current matches by attribute length.
         Similar to `string-length` in XPath.
@@ -146,7 +149,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_attribute_longer_than(self, str attribute, int length, str start  = None):
+    def any_attribute_longer_than(self, str attribute, int length, str start  = None) -> bool:
         """Returns True any href attribute longer than a specified length.
         Similar to `string-length` in XPath.

selectolax/lexbor/util.pxi ADDED Viewed

@@ -0,0 +1,19 @@
+include "../utils.pxi"
+def create_tag(tag: str):
+    """
+    Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
+    e.g. `"<div></div>"`.
+    """
+    return do_create_tag(tag, LexborHTMLParser)
+def parse_fragment(html: str):
+    """
+    Given HTML, parse it into a list of Nodes, such that the nodes
+    correspond to the given HTML.
+    For contrast, HTMLParser adds `<html>`, `<head>`, and `<body>` tags
+    if they are missing. This function does not add these tags.
+    """
+    return do_parse_fragment(html, LexborHTMLParser)