selectolax 0.3.30__cp310-cp310-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

@@ -0,0 +1,193 @@
1
+ cimport cython
2
+
3
+ @cython.final
4
+ cdef class CSSSelector:
5
+
6
+ cdef char *c_selector
7
+ cdef mycss_entry_t *css_entry
8
+ cdef modest_finder_t *finder
9
+ cdef mycss_selectors_list_t *selectors_list
10
+
11
+ def __init__(self, str selector):
12
+
13
+ selector_pybyte = selector.encode('UTF-8')
14
+ self.c_selector = selector_pybyte
15
+
16
+ # In order to propagate errors these methods should return no value
17
+ self._create_css_parser()
18
+ self._prepare_selector(self.css_entry, self.c_selector, len(self.c_selector))
19
+ self.finder = modest_finder_create_simple()
20
+
21
+ cdef myhtml_collection_t* find(self, myhtml_tree_node_t* scope):
22
+ """Find all possible matches."""
23
+
24
+ cdef myhtml_collection_t *collection
25
+
26
+ collection = NULL
27
+ modest_finder_by_selectors_list(self.finder, scope, self.selectors_list, &collection)
28
+
29
+ return collection
30
+
31
+
32
+ cdef _create_css_parser(self):
33
+ cdef mystatus_t status
34
+
35
+ cdef mycss_t *mycss = mycss_create()
36
+ status = mycss_init(mycss)
37
+
38
+ if status != 0:
39
+ raise RuntimeError("Can't init MyCSS object.")
40
+ # return
41
+
42
+ self.css_entry = mycss_entry_create()
43
+ status = mycss_entry_init(mycss, self.css_entry)
44
+
45
+ if status != 0:
46
+ raise RuntimeError("Can't init MyCSS Entry object.")
47
+
48
+
49
+
50
+ cdef _prepare_selector(self, mycss_entry_t *css_entry,
51
+ const char *selector, size_t selector_size):
52
+ cdef mystatus_t out_status;
53
+ self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry),
54
+ myencoding_t.MyENCODING_UTF_8,
55
+ selector, selector_size,
56
+ &out_status)
57
+
58
+ if (self.selectors_list == NULL) or (self.selectors_list.flags and MyCSS_SELECTORS_FLAGS_SELECTOR_BAD):
59
+ raise ValueError("Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
60
+
61
+ def __dealloc__(self):
62
+ mycss_selectors_list_destroy(mycss_entry_selectors(self.css_entry), self.selectors_list, 1)
63
+ modest_finder_destroy(self.finder, 1)
64
+
65
+ cdef mycss_t *mycss = self.css_entry.mycss
66
+ mycss_entry_destroy(self.css_entry, 1)
67
+ mycss_destroy(mycss, 1)
68
+
69
+
70
+ cdef class Selector:
71
+ """An advanced CSS selector that supports additional operations.
72
+
73
+ Think of it as a toolkit that mimics some of the features of XPath.
74
+
75
+ Please note, this is an experimental feature that can change in the future.
76
+ """
77
+ cdef Node node
78
+ cdef list nodes
79
+
80
+ def __init__(self, Node node, query):
81
+ """custom init, because __cinit__ doesn't accept C types"""
82
+ self.node = node
83
+ self.nodes = find_nodes(node.parser, node.node, query) if query else [node, ]
84
+
85
+
86
+ cpdef css(self, str query):
87
+ """Evaluate CSS selector against current scope."""
88
+ cdef Node current_node
89
+ nodes = list()
90
+ for node in self.nodes:
91
+ current_node = node
92
+ nodes.extend(find_nodes(self.node.parser, current_node.node, query))
93
+ self.nodes = nodes
94
+ return self
95
+
96
+ @property
97
+ def matches(self):
98
+ """Returns all possible matches"""
99
+ return self.nodes
100
+
101
+ @property
102
+ def any_matches(self):
103
+ """Returns True if there are any matches"""
104
+ return bool(self.nodes)
105
+
106
+ def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
107
+ """Filter all current matches given text."""
108
+ nodes = []
109
+ for node in self.nodes:
110
+ node_text = node.text(deep=deep, separator=separator, strip=strip)
111
+ if node_text and text in node_text:
112
+ nodes.append(node)
113
+ self.nodes = nodes
114
+ return self
115
+
116
+ def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
117
+ """Returns True if any node in the current search scope contains specified text"""
118
+ nodes = []
119
+ for node in self.nodes:
120
+ node_text = node.text(deep=deep, separator=separator, strip=strip)
121
+ if node_text and text in node_text:
122
+ return True
123
+ return False
124
+
125
+ def attribute_longer_than(self, str attribute, int length, str start = None):
126
+ """Filter all current matches by attribute length.
127
+
128
+ Similar to `string-length` in XPath.
129
+ """
130
+ nodes = []
131
+ for node in self.nodes:
132
+ attr = node.attributes.get(attribute)
133
+ if attr and start and start in attr:
134
+ attr = attr[attr.find(start) + len(start):]
135
+ if len(attr) > length:
136
+ nodes.append(node)
137
+ self.nodes = nodes
138
+ return self
139
+
140
+ def any_attribute_longer_than(self, str attribute, int length, str start = None):
141
+ """Returns True any href attribute longer than a specified length.
142
+
143
+ Similar to `string-length` in XPath.
144
+ """
145
+ nodes = []
146
+ for node in self.nodes:
147
+ attr = node.attributes.get(attribute)
148
+ if attr and start and start in attr:
149
+ attr = attr[attr.find(start) + len(start):]
150
+ if len(attr) > length:
151
+ return True
152
+ return False
153
+
154
+ def __bool__(self):
155
+ return bool(self.nodes)
156
+
157
+ cdef find_nodes(HTMLParser parser, myhtml_tree_node_t *node, str query):
158
+ cdef myhtml_collection_t *collection
159
+ cdef CSSSelector selector = CSSSelector(query)
160
+
161
+ result = list()
162
+ collection = selector.find(node)
163
+
164
+ if collection == NULL:
165
+ return result
166
+
167
+ for i in range(collection.length):
168
+ n = Node()
169
+ n._init(collection.list[i], parser)
170
+ result.append(n)
171
+ myhtml_collection_destroy(collection)
172
+ return result
173
+
174
+
175
+ cdef bool find_matches(HTMLParser parser, myhtml_tree_node_t *node, tuple selectors):
176
+ cdef myhtml_collection_t *collection
177
+ cdef CSSSelector selector
178
+ cdef int collection_size
179
+
180
+ for query in selectors:
181
+ selector = CSSSelector(query)
182
+ collection_size = 0
183
+ collection = NULL
184
+
185
+ collection = selector.find(node)
186
+ if collection == NULL:
187
+ continue
188
+
189
+ collection_size = collection.length
190
+ myhtml_collection_destroy(collection)
191
+ if collection_size > 0:
192
+ return True
193
+ return False
@@ -0,0 +1,19 @@
1
+ include "../utils.pxi"
2
+
3
+ def create_tag(tag: str):
4
+ """
5
+ Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
6
+ e.g. `"<div></div>"`.
7
+ """
8
+ return do_create_tag(tag, HTMLParser)
9
+
10
+
11
+ def parse_fragment(html: str):
12
+ """
13
+ Given HTML, parse it into a list of Nodes, such that the nodes
14
+ correspond to the given HTML.
15
+
16
+ For contrast, HTMLParser adds `<html>`, `<head>`, and `<body>` tags
17
+ if they are missing. This function does not add these tags.
18
+ """
19
+ return do_parse_fragment(html, HTMLParser)