selectolax 0.3.27__cp311-cp311-musllinux_1_2_x86_64.whl → 0.3.29__cp311-cp311-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +1 -1
- selectolax/lexbor/attrs.pxi +2 -1
- selectolax/lexbor/node.pxi +17 -5
- selectolax/lexbor/selection.pxi +12 -10
- selectolax/lexbor.c +2735 -2481
- selectolax/lexbor.cpython-311-x86_64-linux-musl.so +0 -0
- selectolax/lexbor.pyi +3 -3
- selectolax/lexbor.pyx +8 -4
- selectolax/modest/node.pxi +21 -5
- selectolax/parser.c +1857 -1703
- selectolax/parser.cpython-311-x86_64-linux-musl.so +0 -0
- selectolax/parser.pyi +15 -22
- selectolax/parser.pyx +4 -2
- {selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/LICENSE +1 -1
- {selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/METADATA +7 -1
- selectolax-0.3.29.dist-info/RECORD +26 -0
- selectolax-0.3.27.dist-info/RECORD +0 -26
- {selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/WHEEL +0 -0
- {selectolax-0.3.27.dist-info → selectolax-0.3.29.dist-info}/top_level.txt +0 -0
selectolax/__init__.py
CHANGED
selectolax/lexbor/attrs.pxi
CHANGED
|
@@ -19,8 +19,9 @@ cdef class LexborAttributes:
|
|
|
19
19
|
|
|
20
20
|
while attr != NULL:
|
|
21
21
|
key = lxb_dom_attr_local_name_noi(attr, &str_len)
|
|
22
|
+
if key is not NULL:
|
|
23
|
+
yield key.decode(_ENCODING)
|
|
22
24
|
attr = attr.next
|
|
23
|
-
yield key.decode(_ENCODING)
|
|
24
25
|
|
|
25
26
|
def __setitem__(self, str key, value):
|
|
26
27
|
value = str(value)
|
selectolax/lexbor/node.pxi
CHANGED
|
@@ -95,7 +95,7 @@ cdef class LexborNode:
|
|
|
95
95
|
text : str
|
|
96
96
|
"""
|
|
97
97
|
cdef lexbor_str_t *lxb_str
|
|
98
|
-
cdef lxb_status_t
|
|
98
|
+
cdef lxb_status_t status
|
|
99
99
|
|
|
100
100
|
lxb_str = lexbor_str_create()
|
|
101
101
|
status = lxb_html_serialize_tree_str(self.node, lxb_str)
|
|
@@ -416,9 +416,14 @@ cdef class LexborNode:
|
|
|
416
416
|
node = node.next
|
|
417
417
|
|
|
418
418
|
|
|
419
|
-
def unwrap(self):
|
|
419
|
+
def unwrap(self, delete_empty=False):
|
|
420
420
|
"""Replace node with whatever is inside this node.
|
|
421
421
|
|
|
422
|
+
Parameters
|
|
423
|
+
----------
|
|
424
|
+
delete_empty : bool, default False
|
|
425
|
+
If True, removes empty tags.
|
|
426
|
+
|
|
422
427
|
Examples
|
|
423
428
|
--------
|
|
424
429
|
|
|
@@ -426,9 +431,12 @@ cdef class LexborNode:
|
|
|
426
431
|
>>> tree.css_first('i').unwrap()
|
|
427
432
|
>>> tree.html
|
|
428
433
|
'<html><head></head><body><div>Hello world!</div></body></html>'
|
|
429
|
-
|
|
434
|
+
|
|
435
|
+
Note: by default, empty tags are ignored, use "delete_empty" to change this.
|
|
430
436
|
"""
|
|
431
437
|
if self.node.first_child == NULL:
|
|
438
|
+
if delete_empty:
|
|
439
|
+
lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
|
|
432
440
|
return
|
|
433
441
|
cdef lxb_dom_node_t* next_node;
|
|
434
442
|
cdef lxb_dom_node_t* current_node;
|
|
@@ -445,7 +453,7 @@ cdef class LexborNode:
|
|
|
445
453
|
lxb_dom_node_insert_before(self.node, self.node.first_child)
|
|
446
454
|
lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
|
|
447
455
|
|
|
448
|
-
def unwrap_tags(self, list tags):
|
|
456
|
+
def unwrap_tags(self, list tags, delete_empty = False):
|
|
449
457
|
"""Unwraps specified tags from the HTML tree.
|
|
450
458
|
|
|
451
459
|
Works the same as the ``unwrap`` method, but applied to a list of tags.
|
|
@@ -454,6 +462,8 @@ cdef class LexborNode:
|
|
|
454
462
|
----------
|
|
455
463
|
tags : list
|
|
456
464
|
List of tags to remove.
|
|
465
|
+
delete_empty : bool, default False
|
|
466
|
+
If True, removes empty tags.
|
|
457
467
|
|
|
458
468
|
Examples
|
|
459
469
|
--------
|
|
@@ -462,11 +472,13 @@ cdef class LexborNode:
|
|
|
462
472
|
>>> tree.body.unwrap_tags(['i','a'])
|
|
463
473
|
>>> tree.body.html
|
|
464
474
|
'<body><div>Hello world!</div></body>'
|
|
475
|
+
|
|
476
|
+
Note: by default, empty tags are ignored, use "delete_empty" to change this.
|
|
465
477
|
"""
|
|
466
478
|
|
|
467
479
|
for tag in tags:
|
|
468
480
|
for element in self.css(tag):
|
|
469
|
-
element.unwrap()
|
|
481
|
+
element.unwrap(delete_empty)
|
|
470
482
|
|
|
471
483
|
|
|
472
484
|
def traverse(self, include_text=False):
|
selectolax/lexbor/selection.pxi
CHANGED
|
@@ -77,10 +77,12 @@ cdef class LexborCSSSelector:
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
def __dealloc__(self):
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
if self.selectors != NULL:
|
|
81
|
+
lxb_selectors_destroy(self.selectors, True)
|
|
82
|
+
if self.parser != NULL:
|
|
83
|
+
lxb_css_parser_destroy(self.parser, True)
|
|
84
|
+
if self.css_selectors != NULL:
|
|
85
|
+
lxb_css_selectors_destroy(self.css_selectors, True)
|
|
84
86
|
|
|
85
87
|
|
|
86
88
|
|
|
@@ -104,16 +106,16 @@ cdef class LexborSelector:
|
|
|
104
106
|
raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
|
|
105
107
|
|
|
106
108
|
@property
|
|
107
|
-
def matches(self):
|
|
109
|
+
def matches(self) -> list:
|
|
108
110
|
"""Returns all possible matches"""
|
|
109
111
|
return self.nodes
|
|
110
112
|
|
|
111
113
|
@property
|
|
112
|
-
def any_matches(self):
|
|
114
|
+
def any_matches(self) -> bool:
|
|
113
115
|
"""Returns True if there are any matches"""
|
|
114
116
|
return bool(self.nodes)
|
|
115
117
|
|
|
116
|
-
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
|
|
118
|
+
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
|
|
117
119
|
"""Filter all current matches given text."""
|
|
118
120
|
nodes = []
|
|
119
121
|
for node in self.nodes:
|
|
@@ -123,7 +125,7 @@ cdef class LexborSelector:
|
|
|
123
125
|
self.nodes = nodes
|
|
124
126
|
return self
|
|
125
127
|
|
|
126
|
-
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
|
|
128
|
+
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
|
|
127
129
|
"""Returns True if any node in the current search scope contains specified text"""
|
|
128
130
|
nodes = []
|
|
129
131
|
for node in self.nodes:
|
|
@@ -132,7 +134,7 @@ cdef class LexborSelector:
|
|
|
132
134
|
return True
|
|
133
135
|
return False
|
|
134
136
|
|
|
135
|
-
def attribute_longer_than(self, str attribute, int length, str start = None):
|
|
137
|
+
def attribute_longer_than(self, str attribute, int length, str start = None) -> LexborSelector:
|
|
136
138
|
"""Filter all current matches by attribute length.
|
|
137
139
|
|
|
138
140
|
Similar to `string-length` in XPath.
|
|
@@ -147,7 +149,7 @@ cdef class LexborSelector:
|
|
|
147
149
|
self.nodes = nodes
|
|
148
150
|
return self
|
|
149
151
|
|
|
150
|
-
def any_attribute_longer_than(self, str attribute, int length, str start = None):
|
|
152
|
+
def any_attribute_longer_than(self, str attribute, int length, str start = None) -> bool:
|
|
151
153
|
"""Returns True any href attribute longer than a specified length.
|
|
152
154
|
|
|
153
155
|
Similar to `string-length` in XPath.
|