selectolax 0.3.29__cp39-cp39-musllinux_1_2_aarch64.whl → 0.4.0__cp39-cp39-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of selectolax might be problematic. Click here for more details.
- selectolax/__init__.py +3 -5
- selectolax/lexbor/attrs.pxi +26 -9
- selectolax/lexbor/node.pxi +215 -60
- selectolax/lexbor/node_remove.pxi +29 -0
- selectolax/lexbor/selection.pxi +57 -26
- selectolax/lexbor/util.pxi +1 -0
- selectolax/lexbor.c +24654 -25072
- selectolax/lexbor.cpython-39-aarch64-linux-gnu.so +0 -0
- selectolax/lexbor.pxd +44 -40
- selectolax/lexbor.pyi +847 -65
- selectolax/lexbor.pyx +94 -21
- selectolax/modest/node.pxi +49 -43
- selectolax/modest/selection.pxi +24 -22
- selectolax/modest/util.pxi +1 -0
- selectolax/parser.c +18015 -20066
- selectolax/parser.cpython-39-aarch64-linux-gnu.so +0 -0
- selectolax/parser.pxd +17 -20
- selectolax/parser.pyi +493 -46
- selectolax/parser.pyx +41 -33
- selectolax/utils.pxi +13 -3
- selectolax-0.4.0.dist-info/METADATA +32 -0
- selectolax-0.4.0.dist-info/RECORD +27 -0
- {selectolax-0.3.29.dist-info → selectolax-0.4.0.dist-info}/WHEEL +1 -1
- selectolax-0.3.29.dist-info/METADATA +0 -183
- selectolax-0.3.29.dist-info/RECORD +0 -26
- {selectolax-0.3.29.dist-info → selectolax-0.4.0.dist-info/licenses}/LICENSE +0 -0
- {selectolax-0.3.29.dist-info → selectolax-0.4.0.dist-info}/top_level.txt +0 -0
selectolax/lexbor/selection.pxi
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
cimport cython
|
|
2
|
+
from cpython.exc cimport PyErr_SetObject
|
|
3
|
+
from cpython.list cimport PyList_GET_SIZE
|
|
4
|
+
|
|
2
5
|
|
|
3
6
|
@cython.final
|
|
4
7
|
cdef class LexborCSSSelector:
|
|
@@ -8,21 +11,22 @@ cdef class LexborCSSSelector:
|
|
|
8
11
|
self.results = []
|
|
9
12
|
self.current_node = None
|
|
10
13
|
|
|
11
|
-
cdef _create_css_parser(self):
|
|
14
|
+
cdef int _create_css_parser(self) except -1:
|
|
12
15
|
cdef lxb_status_t status
|
|
13
16
|
|
|
14
|
-
|
|
15
17
|
self.parser = lxb_css_parser_create()
|
|
16
18
|
status = lxb_css_parser_init(self.parser, NULL)
|
|
17
19
|
|
|
18
20
|
if status != LXB_STATUS_OK:
|
|
19
|
-
|
|
21
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS parser.")
|
|
22
|
+
return -1
|
|
20
23
|
|
|
21
24
|
self.css_selectors = lxb_css_selectors_create()
|
|
22
25
|
status = lxb_css_selectors_init(self.css_selectors)
|
|
23
26
|
|
|
24
27
|
if status != LXB_STATUS_OK:
|
|
25
|
-
|
|
28
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
|
|
29
|
+
return -1
|
|
26
30
|
|
|
27
31
|
lxb_css_parser_selectors_set(self.parser, self.css_selectors)
|
|
28
32
|
|
|
@@ -30,14 +34,24 @@ cdef class LexborCSSSelector:
|
|
|
30
34
|
status = lxb_selectors_init(self.selectors)
|
|
31
35
|
lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
|
|
32
36
|
if status != LXB_STATUS_OK:
|
|
33
|
-
|
|
37
|
+
PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
|
|
38
|
+
return -1
|
|
39
|
+
return 0
|
|
40
|
+
|
|
41
|
+
cpdef list find(self, str query, LexborNode node):
|
|
42
|
+
return self._find(query, node, 0)
|
|
34
43
|
|
|
44
|
+
cpdef list find_first(self, str query, LexborNode node):
|
|
45
|
+
return self._find(query, node, 1)
|
|
35
46
|
|
|
36
|
-
cpdef
|
|
47
|
+
cpdef list _find(self, str query, LexborNode node, bint only_first):
|
|
37
48
|
cdef lxb_css_selector_list_t* selectors
|
|
38
49
|
cdef lxb_char_t* c_selector
|
|
39
50
|
cdef lxb_css_selector_list_t * selectors_list
|
|
40
51
|
|
|
52
|
+
if not isinstance(query, str):
|
|
53
|
+
raise TypeError("Query must be a string.")
|
|
54
|
+
|
|
41
55
|
bytes_query = query.encode(_ENCODING)
|
|
42
56
|
selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t>len(query))
|
|
43
57
|
|
|
@@ -46,36 +60,47 @@ cdef class LexborCSSSelector:
|
|
|
46
60
|
|
|
47
61
|
self.current_node = node
|
|
48
62
|
self.results = []
|
|
49
|
-
|
|
50
|
-
|
|
63
|
+
if only_first:
|
|
64
|
+
status = lxb_selectors_find(self.selectors, node.node, selectors_list,
|
|
65
|
+
<lxb_selectors_cb_f>css_finder_callback_first, <void*>self)
|
|
66
|
+
else:
|
|
67
|
+
status = lxb_selectors_find(self.selectors, node.node, selectors_list,
|
|
68
|
+
<lxb_selectors_cb_f>css_finder_callback, <void*>self)
|
|
51
69
|
results = list(self.results)
|
|
52
70
|
self.results = []
|
|
53
71
|
self.current_node = None
|
|
54
72
|
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
55
73
|
return results
|
|
56
74
|
|
|
57
|
-
cpdef any_matches(self, str query, LexborNode node):
|
|
75
|
+
cpdef int any_matches(self, str query, LexborNode node) except -1:
|
|
58
76
|
cdef lxb_css_selector_list_t * selectors
|
|
59
77
|
cdef lxb_char_t * c_selector
|
|
60
78
|
cdef lxb_css_selector_list_t * selectors_list
|
|
79
|
+
cdef int result
|
|
80
|
+
|
|
81
|
+
if not isinstance(query, str):
|
|
82
|
+
raise TypeError("Query must be a string.")
|
|
61
83
|
|
|
62
84
|
bytes_query = query.encode(_ENCODING)
|
|
63
85
|
selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t> len(query))
|
|
64
86
|
|
|
65
87
|
if selectors_list == NULL:
|
|
66
|
-
|
|
88
|
+
PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
|
|
89
|
+
return -1
|
|
67
90
|
|
|
68
91
|
self.results = []
|
|
69
92
|
status = lxb_selectors_find(self.selectors, node.node, selectors_list,
|
|
70
93
|
<lxb_selectors_cb_f> css_matcher_callback, <void *> self)
|
|
71
94
|
if status != LXB_STATUS_OK:
|
|
72
|
-
|
|
73
|
-
|
|
95
|
+
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
96
|
+
PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
|
|
97
|
+
return -1
|
|
98
|
+
|
|
99
|
+
result = PyList_GET_SIZE(self.results) > 0
|
|
74
100
|
self.results = []
|
|
75
101
|
lxb_css_selector_list_destroy_memory(selectors_list)
|
|
76
102
|
return result
|
|
77
103
|
|
|
78
|
-
|
|
79
104
|
def __dealloc__(self):
|
|
80
105
|
if self.selectors != NULL:
|
|
81
106
|
lxb_selectors_destroy(self.selectors, True)
|
|
@@ -85,7 +110,6 @@ cdef class LexborCSSSelector:
|
|
|
85
110
|
lxb_css_selectors_destroy(self.css_selectors, True)
|
|
86
111
|
|
|
87
112
|
|
|
88
|
-
|
|
89
113
|
cdef class LexborSelector:
|
|
90
114
|
"""An advanced CSS selector that supports additional operations.
|
|
91
115
|
|
|
@@ -100,10 +124,9 @@ cdef class LexborSelector:
|
|
|
100
124
|
self.node = node
|
|
101
125
|
self.nodes = self.node.parser.selector.find(query, self.node) if query else [node, ]
|
|
102
126
|
|
|
103
|
-
|
|
104
127
|
cpdef css(self, str query):
|
|
105
128
|
"""Evaluate CSS selector against current scope."""
|
|
106
|
-
raise
|
|
129
|
+
raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
|
|
107
130
|
|
|
108
131
|
@property
|
|
109
132
|
def matches(self) -> list:
|
|
@@ -117,7 +140,7 @@ cdef class LexborSelector:
|
|
|
117
140
|
|
|
118
141
|
def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
|
|
119
142
|
"""Filter all current matches given text."""
|
|
120
|
-
nodes = []
|
|
143
|
+
cdef list nodes = []
|
|
121
144
|
for node in self.nodes:
|
|
122
145
|
node_text = node.text(deep=deep, separator=separator, strip=strip)
|
|
123
146
|
if node_text and text in node_text:
|
|
@@ -127,7 +150,7 @@ cdef class LexborSelector:
|
|
|
127
150
|
|
|
128
151
|
def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
|
|
129
152
|
"""Returns True if any node in the current search scope contains specified text"""
|
|
130
|
-
|
|
153
|
+
cdef LexborNode node
|
|
131
154
|
for node in self.nodes:
|
|
132
155
|
node_text = node.text(deep=deep, separator=separator, strip=strip)
|
|
133
156
|
if node_text and text in node_text:
|
|
@@ -139,7 +162,7 @@ cdef class LexborSelector:
|
|
|
139
162
|
|
|
140
163
|
Similar to `string-length` in XPath.
|
|
141
164
|
"""
|
|
142
|
-
nodes = []
|
|
165
|
+
cdef list nodes = []
|
|
143
166
|
for node in self.nodes:
|
|
144
167
|
attr = node.attributes.get(attribute)
|
|
145
168
|
if attr and start and start in attr:
|
|
@@ -154,7 +177,7 @@ cdef class LexborSelector:
|
|
|
154
177
|
|
|
155
178
|
Similar to `string-length` in XPath.
|
|
156
179
|
"""
|
|
157
|
-
|
|
180
|
+
cdef LexborNode node
|
|
158
181
|
for node in self.nodes:
|
|
159
182
|
attr = node.attributes.get(attribute)
|
|
160
183
|
if attr and start and start in attr:
|
|
@@ -169,16 +192,24 @@ cdef class LexborSelector:
|
|
|
169
192
|
|
|
170
193
|
cdef lxb_status_t css_finder_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
|
|
171
194
|
cdef LexborNode lxb_node
|
|
172
|
-
cdef
|
|
173
|
-
cls = <
|
|
174
|
-
lxb_node = LexborNode()
|
|
175
|
-
lxb_node._cinit(<lxb_dom_node_t *> node, cls.current_node.parser)
|
|
195
|
+
cdef LexborCSSSelector cls
|
|
196
|
+
cls = <LexborCSSSelector> ctx
|
|
197
|
+
lxb_node = LexborNode.new(<lxb_dom_node_t *> node, cls.current_node.parser)
|
|
176
198
|
cls.results.append(lxb_node)
|
|
177
199
|
return LXB_STATUS_OK
|
|
178
200
|
|
|
201
|
+
cdef lxb_status_t css_finder_callback_first(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
|
|
202
|
+
cdef LexborNode lxb_node
|
|
203
|
+
cdef LexborCSSSelector cls
|
|
204
|
+
cls = <LexborCSSSelector> ctx
|
|
205
|
+
lxb_node = LexborNode.new(<lxb_dom_node_t *> node, cls.current_node.parser)
|
|
206
|
+
cls.results.append(lxb_node)
|
|
207
|
+
return LXB_STATUS_STOP
|
|
208
|
+
|
|
209
|
+
|
|
179
210
|
cdef lxb_status_t css_matcher_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
|
|
180
211
|
cdef LexborNode lxb_node
|
|
181
|
-
cdef
|
|
182
|
-
cls = <
|
|
212
|
+
cdef LexborCSSSelector cls
|
|
213
|
+
cls = <LexborCSSSelector> ctx
|
|
183
214
|
cls.results.append(True)
|
|
184
215
|
return LXB_STATUS_STOP
|