selectolax 0.4.4__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ cimport cython
2
+ from cpython.exc cimport PyErr_SetObject
3
+
4
+
5
+ @cython.final
6
+ cdef class CSSSelector:
7
+
8
+ cdef char *c_selector
9
+ cdef mycss_entry_t *css_entry
10
+ cdef modest_finder_t *finder
11
+ cdef mycss_selectors_list_t *selectors_list
12
+
13
+ def __init__(self, str selector):
14
+
15
+ selector_pybyte = selector.encode('UTF-8')
16
+ self.c_selector = selector_pybyte
17
+
18
+ # In order to propagate errors these methods should return no value
19
+ self._create_css_parser()
20
+ self._prepare_selector(self.css_entry, self.c_selector, len(self.c_selector))
21
+ self.finder = modest_finder_create_simple()
22
+
23
+ cdef myhtml_collection_t* find(self, myhtml_tree_node_t* scope):
24
+ """Find all possible matches."""
25
+
26
+ cdef myhtml_collection_t *collection
27
+
28
+ collection = NULL
29
+ modest_finder_by_selectors_list(self.finder, scope, self.selectors_list, &collection)
30
+
31
+ return collection
32
+
33
+ cdef int _create_css_parser(self) except -1:
34
+ cdef mystatus_t status
35
+
36
+ cdef mycss_t *mycss = mycss_create()
37
+ status = mycss_init(mycss)
38
+
39
+ if status != 0:
40
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS object.")
41
+ return -1
42
+
43
+ self.css_entry = mycss_entry_create()
44
+ status = mycss_entry_init(mycss, self.css_entry)
45
+
46
+ if status != 0:
47
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS Entry object.")
48
+ return -1
49
+ return 0
50
+
51
+ cdef int _prepare_selector(self, mycss_entry_t *css_entry, const char *selector, size_t selector_size) except -1:
52
+ cdef mystatus_t out_status
53
+ self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry), myencoding_t.MyENCODING_UTF_8,
54
+ selector, selector_size, &out_status)
55
+
56
+ if (self.selectors_list == NULL) or (self.selectors_list.flags and MyCSS_SELECTORS_FLAGS_SELECTOR_BAD):
57
+ PyErr_SetObject(ValueError, "Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
58
+ return -1
59
+ return 0
60
+
61
+ def __dealloc__(self):
62
+ mycss_selectors_list_destroy(mycss_entry_selectors(self.css_entry), self.selectors_list, 1)
63
+ modest_finder_destroy(self.finder, 1)
64
+
65
+ cdef mycss_t *mycss = self.css_entry.mycss
66
+ mycss_entry_destroy(self.css_entry, 1)
67
+ mycss_destroy(mycss, 1)
68
+
69
+
70
+ cdef class Selector:
71
+ """An advanced CSS selector that supports additional operations.
72
+
73
+ Think of it as a toolkit that mimics some of the features of XPath.
74
+
75
+ Please note, this is an experimental feature that can change in the future.
76
+ """
77
+ cdef Node node
78
+ cdef list nodes
79
+
80
+ def __init__(self, Node node, str query):
81
+ """custom init, because __cinit__ doesn't accept C types"""
82
+ self.node = node
83
+ self.nodes = find_nodes(node.parser, node.node, query) if query else [node, ]
84
+
85
+ cpdef css(self, str query):
86
+ """Evaluate CSS selector against current scope."""
87
+ cdef Node current_node
88
+ nodes = list()
89
+ for node in self.nodes:
90
+ current_node = node
91
+ nodes.extend(find_nodes(self.node.parser, current_node.node, query))
92
+ self.nodes = nodes
93
+ return self
94
+
95
+ @property
96
+ def matches(self):
97
+ """Returns all possible matches"""
98
+ return self.nodes
99
+
100
+ @property
101
+ def any_matches(self):
102
+ """Returns True if there are any matches"""
103
+ return bool(self.nodes)
104
+
105
+ def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
106
+ """Filter all current matches given text."""
107
+ nodes = []
108
+ cdef Node node
109
+ for node in self.nodes:
110
+ node_text = node.text(deep=deep, separator=separator, strip=strip)
111
+ if node_text and text in node_text:
112
+ nodes.append(node)
113
+ self.nodes = nodes
114
+ return self
115
+
116
+ def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
117
+ """Returns True if any node in the current search scope contains specified text"""
118
+ nodes = []
119
+ cdef Node node
120
+ for node in self.nodes:
121
+ node_text = node.text(deep=deep, separator=separator, strip=strip)
122
+ if node_text and text in node_text:
123
+ return True
124
+ return False
125
+
126
+ def attribute_longer_than(self, str attribute, int length, str start = None):
127
+ """Filter all current matches by attribute length.
128
+
129
+ Similar to `string-length` in XPath.
130
+ """
131
+ nodes = []
132
+ for node in self.nodes:
133
+ attr = node.attributes.get(attribute)
134
+ if attr and start and start in attr:
135
+ attr = attr[attr.find(start) + len(start):]
136
+ if len(attr) > length:
137
+ nodes.append(node)
138
+ self.nodes = nodes
139
+ return self
140
+
141
+ def any_attribute_longer_than(self, str attribute, int length, str start = None):
142
+ """Returns True any href attribute longer than a specified length.
143
+
144
+ Similar to `string-length` in XPath.
145
+ """
146
+ cdef list nodes = []
147
+ cdef Node node
148
+ for node in self.nodes:
149
+ attr = node.attributes.get(attribute)
150
+ if attr and start and start in attr:
151
+ attr = attr[attr.find(start) + len(start):]
152
+ if len(attr) > length:
153
+ return True
154
+ return False
155
+
156
+ def __bool__(self):
157
+ return bool(self.nodes)
158
+
159
+ cdef find_nodes(HTMLParser parser, myhtml_tree_node_t *node, str query):
160
+ cdef myhtml_collection_t *collection
161
+ cdef CSSSelector selector = CSSSelector(query)
162
+ cdef Node n
163
+ cdef list result = []
164
+ collection = selector.find(node)
165
+
166
+ if collection == NULL:
167
+ return result
168
+
169
+ for i in range(collection.length):
170
+ n = Node.new(collection.list[i], parser)
171
+ result.append(n)
172
+ myhtml_collection_destroy(collection)
173
+ return result
174
+
175
+
176
+ cdef bool find_matches(HTMLParser parser, myhtml_tree_node_t *node, tuple selectors):
177
+ cdef myhtml_collection_t *collection
178
+ cdef CSSSelector selector
179
+ cdef int collection_size
180
+ cdef str query
181
+
182
+ for query in selectors:
183
+ selector = CSSSelector(query)
184
+ collection_size = 0
185
+ collection = NULL
186
+
187
+ collection = selector.find(node)
188
+ if collection == NULL:
189
+ continue
190
+
191
+ collection_size = collection.length
192
+ myhtml_collection_destroy(collection)
193
+ if collection_size > 0:
194
+ return True
195
+ return False
@@ -0,0 +1,20 @@
1
+ include "../utils.pxi"
2
+
3
+
4
+ def create_tag(tag: str):
5
+ """
6
+ Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,
7
+ e.g. `"<div></div>"`.
8
+ """
9
+ return do_create_tag(tag, HTMLParser)
10
+
11
+
12
+ def parse_fragment(html: str):
13
+ """
14
+ Given HTML, parse it into a list of Nodes, such that the nodes
15
+ correspond to the given HTML.
16
+
17
+ For contrast, HTMLParser adds `<html>`, `<head>`, and `<body>` tags
18
+ if they are missing. This function does not add these tags.
19
+ """
20
+ return do_parse_fragment(html, HTMLParser)