PyPI - selectolax - Versions diffs - 0.3.15__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.3.28__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl - Mend

selectolax 0.3.15__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.3.28__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of selectolax might be problematic. Click here for more details.

Files changed (24) hide show

selectolax/__init__.py +1 -1
selectolax/lexbor/attrs.pxi +2 -1
selectolax/lexbor/node.pxi +59 -1
selectolax/lexbor/selection.pxi +14 -11
selectolax/lexbor/util.pxi +19 -0
selectolax/lexbor.c +23543 -12863
selectolax/lexbor.cpython-38-aarch64-linux-gnu.so +0 -0
selectolax/lexbor.pxd +7 -1
selectolax/lexbor.pyi +91 -38
selectolax/lexbor.pyx +6 -3
selectolax/modest/node.pxi +53 -0
selectolax/modest/selection.pxi +1 -1
selectolax/modest/util.pxi +19 -0
selectolax/parser.c +16862 -6964
selectolax/parser.cpython-38-aarch64-linux-gnu.so +0 -0
selectolax/parser.pyi +101 -37
selectolax/parser.pyx +1 -2
selectolax/utils.pxi +95 -1
{selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/LICENSE +1 -1
{selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/METADATA +17 -4
selectolax-0.3.28.dist-info/RECORD +26 -0
{selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/WHEEL +1 -1
selectolax-0.3.15.dist-info/RECORD +0 -24
{selectolax-0.3.15.dist-info → selectolax-0.3.28.dist-info}/top_level.txt +0 -0

selectolax/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@
 __author__ = """Artem Golubin"""
 __email__ = 'me@rushter.com'
-__version__ = '0.3.15'
+__version__ = '0.3.28'
 from . import parser
 from . import lexbor

selectolax/lexbor/attrs.pxi CHANGED Viewed

@@ -19,8 +19,9 @@ cdef class LexborAttributes:
         while attr != NULL:
             key = lxb_dom_attr_local_name_noi(attr, &str_len)
+            if key is not NULL:
+                yield key.decode(_ENCODING)
             attr = attr.next
-            yield key.decode(_ENCODING)
     def __setitem__(self, str key, value):
         value = str(value)

selectolax/lexbor/node.pxi CHANGED Viewed

@@ -27,6 +27,10 @@ cdef class LexborNode:
         self.node = node
         return self
+    @property
+    def mem_id(self):
+        return <size_t> self.node
     @property
     def child(self):
         """Alias for the `first_child` property."""
@@ -91,7 +95,7 @@ cdef class LexborNode:
         text : str
         """
         cdef lexbor_str_t *lxb_str
-        cdef lxb_status_t lxb_status_t
+        cdef lxb_status_t status
         lxb_str = lexbor_str_create()
         status = lxb_html_serialize_tree_str(self.node, lxb_str)
@@ -101,6 +105,9 @@ cdef class LexborNode:
             return html
         return None
+    def __hash__(self):
+        return self.mem_id
     def text_lexbor(self):
         """Returns the text of the node including text of all its child nodes.
@@ -648,6 +655,57 @@ cdef class LexborNode:
         else:
             raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
+    def insert_child(self, str_or_LexborNode value):
+        """
+        Insert a node inside (at the end of) the current Node.
+        Parameters
+        ----------
+        value : str, bytes or Node
+            The text or Node instance to insert inside the Node.
+            When a text string is passed, it's treated as text. All HTML tags will be escaped.
+            Convert and pass the ``Node`` object when you want to work with HTML.
+            Does not clone the ``Node`` object.
+            All future changes to the passed ``Node`` object will also be taken into account.
+        Examples
+        --------
+        >>> tree = LexborHTMLParser('<div>Get <img src=""></div>')
+        >>> div = tree.css_first('div')
+        >>> div.insert_child('Laptop')
+        >>> tree.body.child.html
+        '<div>Get <img src="">Laptop</div>'
+        >>> html_parser = LexborHTMLParser('<div>Get <span alt="Laptop"> <div>Laptop</div> </span></div>')
+        >>> html_parser2 = LexborHTMLParser('<div>Test</div>')
+        >>> span_node = html_parser.css_first('span')
+        >>> span_node.insert_child(html_parser2.body.child)
+        <div>Get <span alt="Laptop"> <div>Laptop</div> <div>Test</div> </span></div>'
+        """
+        cdef lxb_dom_node_t * new_node
+        if isinstance(value, (str, bytes, unicode)):
+            bytes_val = to_bytes(value)
+            new_node = <lxb_dom_node_t *> lxb_dom_document_create_text_node(
+                    &self.parser.document.dom_document,
+                    <lxb_char_t *> bytes_val, len(bytes_val)
+            )
+            if new_node == NULL:
+                raise SelectolaxError("Can't create a new node")
+            lxb_dom_node_insert_child(self.node,  new_node)
+        elif isinstance(value, LexborNode):
+            new_node = lxb_dom_document_import_node(
+                &self.parser.document.dom_document,
+                <lxb_dom_node_t *> value.node,
+                <bint> True
+            )
+            if new_node == NULL:
+                raise SelectolaxError("Can't create a new node")
+            lxb_dom_node_insert_child(self.node, <lxb_dom_node_t *> new_node)
+        else:
+            raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
     @property
     def raw_value(self):
         """Return the raw (unparsed, original) value of a node.

selectolax/lexbor/selection.pxi CHANGED Viewed

@@ -28,7 +28,7 @@ cdef class LexborCSSSelector:
         self.selectors = lxb_selectors_create()
         status = lxb_selectors_init(self.selectors)
+        lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
         if status != LXB_STATUS_OK:
             raise SelectolaxError("Can't initialize CSS selector.")
@@ -72,14 +72,17 @@ cdef class LexborCSSSelector:
             raise SelectolaxError("Can't parse CSS selector.")
         result = bool(self.results)
         self.results = []
+        lxb_css_selector_list_destroy_memory(selectors_list)
         return result
     def __dealloc__(self):
-        lxb_selectors_destroy(self.selectors, True)
-        lxb_css_parser_destroy(self.parser, True)
-        lxb_css_selectors_destroy(self.css_selectors, True)
-        # lxb_css_memory_destroy(, True)
+        if self.selectors != NULL:
+            lxb_selectors_destroy(self.selectors, True)
+        if self.parser != NULL:
+            lxb_css_parser_destroy(self.parser, True)
+        if self.css_selectors != NULL:
+            lxb_css_selectors_destroy(self.css_selectors, True)
@@ -103,16 +106,16 @@ cdef class LexborSelector:
         raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
     @property
-    def matches(self):
+    def matches(self) -> list:
         """Returns all possible matches"""
         return self.nodes
     @property
-    def any_matches(self):
+    def any_matches(self) -> bool:
         """Returns True if there are any matches"""
         return bool(self.nodes)
-    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
         """Filter all current matches given text."""
         nodes = []
         for node in self.nodes:
@@ -122,7 +125,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
+    def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
         """Returns True if any node in the current search scope contains specified text"""
         nodes = []
         for node in self.nodes:
@@ -131,7 +134,7 @@ cdef class LexborSelector:
                 return True
         return False
-    def attribute_longer_than(self, str attribute, int length, str start  = None):
+    def attribute_longer_than(self, str attribute, int length, str start  = None) -> LexborSelector:
         """Filter all current matches by attribute length.
         Similar to `string-length` in XPath.
@@ -146,7 +149,7 @@ cdef class LexborSelector:
         self.nodes = nodes
         return self
-    def any_attribute_longer_than(self, str attribute, int length, str start  = None):
+    def any_attribute_longer_than(self, str attribute, int length, str start  = None) -> bool:
         """Returns True any href attribute longer than a specified length.
         Similar to `string-length` in XPath.

selectolax/lexbor/util.pxi ADDED Viewed

@@ -0,0 +1,19 @@
+include "../utils.pxi"
+def create_tag(tag: str):
+    """
+    Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
+    e.g. `"<div></div>"`.
+    """
+    return do_create_tag(tag, LexborHTMLParser)
+def parse_fragment(html: str):
+    """
+    Given HTML, parse it into a list of Nodes, such that the nodes
+    correspond to the given HTML.
+    For contrast, HTMLParser adds `<html>`, `<head>`, and `<body>` tags
+    if they are missing. This function does not add these tags.
+    """
+    return do_parse_fragment(html, LexborHTMLParser)