selectolax 0.3.29__cp312-cp312-win_amd64.whl → 0.3.31__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/attrs.pxi +27 -9
- selectolax/lexbor/node.pxi +9 -6
- selectolax/lexbor/selection.pxi +7 -0
- selectolax/lexbor.c +53277 -55311
- selectolax/lexbor.cp312-win_amd64.pyd +0 -0
- selectolax/lexbor.pxd +5 -6
- selectolax/lexbor.pyi +648 -61
- selectolax/lexbor.pyx +6 -0
- selectolax/modest/node.pxi +8 -6
- selectolax/parser.c +50957 -52325
- selectolax/parser.cp312-win_amd64.pyd +0 -0
- selectolax/parser.pyi +487 -43
- selectolax/parser.pyx +15 -4
- {selectolax-0.3.29.dist-info → selectolax-0.3.31.dist-info}/METADATA +10 -17
- selectolax-0.3.31.dist-info/RECORD +26 -0
- {selectolax-0.3.29.dist-info → selectolax-0.3.31.dist-info}/WHEEL +1 -1
- selectolax-0.3.29.dist-info/RECORD +0 -26
- {selectolax-0.3.29.dist-info → selectolax-0.3.31.dist-info}/licenses/LICENSE +0 -0
- {selectolax-0.3.29.dist-info → selectolax-0.3.31.dist-info}/top_level.txt +0 -0
selectolax/lexbor.pyx
CHANGED
|
@@ -96,6 +96,12 @@ cdef class LexborHTMLParser:
|
|
|
96
96
|
name : str (e.g. div)
|
|
97
97
|
|
|
98
98
|
"""
|
|
99
|
+
|
|
100
|
+
if not name:
|
|
101
|
+
raise ValueError("Tag name cannot be empty")
|
|
102
|
+
if len(name) > 100:
|
|
103
|
+
raise ValueError("Tag name is too long")
|
|
104
|
+
|
|
99
105
|
cdef lxb_dom_collection_t* collection = NULL
|
|
100
106
|
cdef lxb_status_t status
|
|
101
107
|
pybyte_name = name.encode('UTF-8')
|
selectolax/modest/node.pxi
CHANGED
|
@@ -14,6 +14,8 @@ cdef class Stack:
|
|
|
14
14
|
self.capacity = capacity
|
|
15
15
|
self.top = 0
|
|
16
16
|
self._stack = <myhtml_tree_node_t**> malloc(capacity * sizeof(myhtml_tree_node_t))
|
|
17
|
+
if self._stack == NULL:
|
|
18
|
+
raise MemoryError("Failed to allocate memory for stack")
|
|
17
19
|
|
|
18
20
|
def __dealloc__(self):
|
|
19
21
|
free(self._stack)
|
|
@@ -131,7 +133,7 @@ cdef class _Attributes:
|
|
|
131
133
|
|
|
132
134
|
|
|
133
135
|
ctypedef fused str_or_Node:
|
|
134
|
-
|
|
136
|
+
str
|
|
135
137
|
bytes
|
|
136
138
|
Node
|
|
137
139
|
|
|
@@ -595,7 +597,7 @@ cdef class Node:
|
|
|
595
597
|
>>> tree.body.unwrap_tags(['i','a'])
|
|
596
598
|
>>> tree.body.html
|
|
597
599
|
'<body><div>Hello world!</div></body>'
|
|
598
|
-
|
|
600
|
+
|
|
599
601
|
Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
|
|
600
602
|
"""
|
|
601
603
|
|
|
@@ -783,7 +785,7 @@ cdef class Node:
|
|
|
783
785
|
>>> tree.body.unwrap_tags(['i','a'])
|
|
784
786
|
>>> tree.body.html
|
|
785
787
|
'<body><div>Hello world!</div></body>'
|
|
786
|
-
|
|
788
|
+
|
|
787
789
|
Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
|
|
788
790
|
"""
|
|
789
791
|
|
|
@@ -978,8 +980,8 @@ cdef inline str append_text(str text, str node_text, str separator='', bint stri
|
|
|
978
980
|
|
|
979
981
|
cdef inline bytes to_bytes(str_or_Node value):
|
|
980
982
|
cdef bytes bytes_val
|
|
981
|
-
if isinstance(value,
|
|
982
|
-
bytes_val = value.encode(
|
|
983
|
+
if isinstance(value, unicode):
|
|
984
|
+
bytes_val = <bytes>value.encode("utf-8")
|
|
983
985
|
elif isinstance(value, bytes):
|
|
984
|
-
bytes_val =
|
|
986
|
+
bytes_val = <bytes>value
|
|
985
987
|
return bytes_val
|