selectolax 0.3.31__cp313-cp313-macosx_11_0_arm64.whl → 0.3.33__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_NoMemory
2
3
 
3
4
  from libc.stdlib cimport free
4
5
  from libc.stdlib cimport malloc
@@ -8,6 +9,7 @@ from libc.string cimport memcpy
8
9
  DEF _STACK_SIZE = 100
9
10
  DEF _ENCODING = 'UTF-8'
10
11
 
12
+
11
13
  @cython.final
12
14
  cdef class Stack:
13
15
  def __cinit__(self, size_t capacity=25):
@@ -23,9 +25,10 @@ cdef class Stack:
23
25
  cdef bint is_empty(self):
24
26
  return self.top <= 0
25
27
 
26
- cdef push(self, myhtml_tree_node_t* res):
28
+ cdef int push(self, myhtml_tree_node_t* res) except -1:
27
29
  if self.top >= self.capacity:
28
- self.resize()
30
+ if self.resize() < 0:
31
+ return -1
29
32
  self._stack[self.top] = res
30
33
  self.top += 1
31
34
 
@@ -33,10 +36,13 @@ cdef class Stack:
33
36
  self.top = self.top - 1
34
37
  return self._stack[self.top]
35
38
 
36
- cdef resize(self):
39
+ cdef int resize(self) except -1:
37
40
  self.capacity *= 2
38
41
  self._stack = <myhtml_tree_node_t**> realloc(<void*> self._stack, self.capacity * sizeof(myhtml_tree_node_t))
39
-
42
+ if self._stack == NULL:
43
+ PyErr_NoMemory()
44
+ return -1
45
+ return 0
40
46
 
41
47
  cdef class _Attributes:
42
48
  """A dict-like object that represents attributes."""
@@ -130,25 +136,24 @@ cdef class _Attributes:
130
136
  tag_name = c_text.decode(_ENCODING, 'ignore') if c_text != NULL else 'unknown'
131
137
  return "<%s attributes, %s items>" % (tag_name, len(self))
132
138
 
133
-
134
-
135
139
  ctypedef fused str_or_Node:
136
140
  str
137
141
  bytes
138
142
  Node
139
143
 
140
-
141
144
  cdef class Node:
142
145
  """A class that represents HTML node (element)."""
143
146
  cdef myhtml_tree_node_t *node
144
147
  cdef public HTMLParser parser
145
148
 
146
-
147
- cdef _init(self, myhtml_tree_node_t *node, HTMLParser parser):
148
- # custom init, because __cinit__ doesn't accept C types
149
- self.node = node
149
+ @staticmethod
150
+ cdef Node new(myhtml_tree_node_t *node, HTMLParser parser):
151
+ # custom __init__ for C, because __cinit__ doesn't accept C types
152
+ cdef Node cls = Node.__new__(Node)
153
+ cls.node = node
150
154
  # Keep reference to the selector object, so myhtml structures will not be garbage collected prematurely
151
- self.parser = parser
155
+ cls.parser = parser
156
+ return cls
152
157
 
153
158
  @property
154
159
  def attributes(self):
@@ -288,7 +293,7 @@ cdef class Node:
288
293
  cdef inline _text_deep(self, myhtml_tree_node_t *node, separator='', strip=False):
289
294
  text = ""
290
295
  cdef Stack stack = Stack(_STACK_SIZE)
291
- cdef myhtml_tree_node_t* current_node = NULL;
296
+ cdef myhtml_tree_node_t* current_node = NULL
292
297
 
293
298
  if node.tag_id == MyHTML_TAG__TEXT:
294
299
  c_text = myhtml_node_text(node, NULL)
@@ -341,12 +346,10 @@ cdef class Node:
341
346
  node = node.next
342
347
  continue
343
348
 
344
- next_node = Node()
345
- next_node._init(node, self.parser)
349
+ next_node = Node.new(node, self.parser)
346
350
  yield next_node
347
351
  node = node.next
348
352
 
349
-
350
353
  def traverse(self, include_text=False):
351
354
  """Iterate over all child and next nodes starting from the current level.
352
355
 
@@ -360,16 +363,15 @@ cdef class Node:
360
363
  node
361
364
  """
362
365
  cdef Stack stack = Stack(_STACK_SIZE)
363
- cdef myhtml_tree_node_t* current_node = NULL;
364
- cdef Node next_node;
366
+ cdef myhtml_tree_node_t* current_node = NULL
367
+ cdef Node next_node
365
368
 
366
369
  stack.push(self.node)
367
370
 
368
371
  while not stack.is_empty():
369
372
  current_node = stack.pop()
370
373
  if current_node != NULL and not (current_node.tag_id == MyHTML_TAG__TEXT and not include_text):
371
- next_node = Node()
372
- next_node._init(current_node, self.parser)
374
+ next_node = Node.new(current_node, self.parser)
373
375
  yield next_node
374
376
 
375
377
  if current_node.next is not NULL:
@@ -398,8 +400,7 @@ cdef class Node:
398
400
  """Return the child node."""
399
401
  cdef Node node
400
402
  if self.node.child:
401
- node = Node()
402
- node._init(self.node.child, self.parser)
403
+ node = Node.new(self.node.child, self.parser)
403
404
  return node
404
405
  return None
405
406
 
@@ -408,8 +409,7 @@ cdef class Node:
408
409
  """Return the parent node."""
409
410
  cdef Node node
410
411
  if self.node.parent:
411
- node = Node()
412
- node._init(self.node.parent, self.parser)
412
+ node = Node.new(self.node.parent, self.parser)
413
413
  return node
414
414
  return None
415
415
 
@@ -418,8 +418,7 @@ cdef class Node:
418
418
  """Return next node."""
419
419
  cdef Node node
420
420
  if self.node.next:
421
- node = Node()
422
- node._init(self.node.next, self.parser)
421
+ node = Node.new(self.node.next, self.parser)
423
422
  return node
424
423
  return None
425
424
 
@@ -428,8 +427,7 @@ cdef class Node:
428
427
  """Return previous node."""
429
428
  cdef Node node
430
429
  if self.node.prev:
431
- node = Node()
432
- node._init(self.node.prev, self.parser)
430
+ node = Node.new(self.node.prev, self.parser)
433
431
  return node
434
432
  return None
435
433
 
@@ -438,8 +436,7 @@ cdef class Node:
438
436
  """Return last child node."""
439
437
  cdef Node node
440
438
  if self.node.last_child:
441
- node = Node()
442
- node._init(self.node.last_child, self.parser)
439
+ node = Node.new(self.node.last_child, self.parser)
443
440
  return node
444
441
  return None
445
442
 
@@ -539,8 +536,8 @@ cdef class Node:
539
536
  if delete_empty:
540
537
  myhtml_node_delete(self.node)
541
538
  return
542
- cdef myhtml_tree_node_t* next_node;
543
- cdef myhtml_tree_node_t* current_node;
539
+ cdef myhtml_tree_node_t* next_node
540
+ cdef myhtml_tree_node_t* current_node
544
541
 
545
542
  if self.node.child.next != NULL:
546
543
  current_node = self.node.child
@@ -574,6 +571,8 @@ cdef class Node:
574
571
  '<html><body><div>Hello world!</div></body></html>'
575
572
 
576
573
  """
574
+ # ensure cython can recast element to a Node so that decompose will be called sooner.
575
+ cdef Node element
577
576
  for tag in tags:
578
577
  for element in self.css(tag):
579
578
  element.decompose(recursive=recursive)
@@ -600,7 +599,7 @@ cdef class Node:
600
599
 
601
600
  Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
602
601
  """
603
-
602
+ cdef Node element
604
603
  for tag in tags:
605
604
  for element in self.css(tag):
606
605
  element.unwrap(delete_empty)
@@ -788,7 +787,7 @@ cdef class Node:
788
787
 
789
788
  Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
790
789
  """
791
-
790
+ cdef Node element
792
791
  for tag in tags:
793
792
  for element in self.css(tag):
794
793
  element.unwrap(delete_empty)
@@ -847,6 +846,7 @@ cdef class Node:
847
846
  The query to check.
848
847
 
849
848
  """
849
+ cdef Node node
850
850
  if self.parser.cached_script_texts is None:
851
851
  nodes = find_nodes(self.parser, self.node, 'script')
852
852
  text_nodes = []
@@ -895,6 +895,7 @@ cdef class Node:
895
895
  if not isinstance(other, Node):
896
896
  return False
897
897
  return self.html == other.html
898
+
898
899
  @property
899
900
  def text_content(self):
900
901
  """Returns the text of the node if it is a text node.
@@ -948,8 +949,8 @@ cdef class Node:
948
949
  while not stack.is_empty():
949
950
  current_node = stack.pop()
950
951
 
951
- if current_node.tag_id == MyHTML_TAG__TEXT and current_node.prev and \
952
- current_node.prev.tag_id == MyHTML_TAG__TEXT:
952
+ if (current_node.tag_id == MyHTML_TAG__TEXT and current_node.prev and
953
+ current_node.prev.tag_id == MyHTML_TAG__TEXT):
953
954
  left_text = myhtml_node_text(current_node.prev, &left_length)
954
955
  right_text = myhtml_node_text(current_node, &right_length)
955
956
  if left_text and right_text:
@@ -1,4 +1,6 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_SetObject
3
+
2
4
 
3
5
  @cython.final
4
6
  cdef class CSSSelector:
@@ -28,35 +30,33 @@ cdef class CSSSelector:
28
30
 
29
31
  return collection
30
32
 
31
-
32
- cdef _create_css_parser(self):
33
+ cdef int _create_css_parser(self) except -1:
33
34
  cdef mystatus_t status
34
35
 
35
36
  cdef mycss_t *mycss = mycss_create()
36
37
  status = mycss_init(mycss)
37
38
 
38
39
  if status != 0:
39
- raise RuntimeError("Can't init MyCSS object.")
40
- # return
40
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS object.")
41
+ return -1
41
42
 
42
43
  self.css_entry = mycss_entry_create()
43
44
  status = mycss_entry_init(mycss, self.css_entry)
44
45
 
45
46
  if status != 0:
46
- raise RuntimeError("Can't init MyCSS Entry object.")
47
-
48
-
47
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS Entry object.")
48
+ return -1
49
+ return 0
49
50
 
50
- cdef _prepare_selector(self, mycss_entry_t *css_entry,
51
- const char *selector, size_t selector_size):
52
- cdef mystatus_t out_status;
53
- self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry),
54
- myencoding_t.MyENCODING_UTF_8,
55
- selector, selector_size,
56
- &out_status)
51
+ cdef int _prepare_selector(self, mycss_entry_t *css_entry, const char *selector, size_t selector_size) except -1:
52
+ cdef mystatus_t out_status
53
+ self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry), myencoding_t.MyENCODING_UTF_8,
54
+ selector, selector_size, &out_status)
57
55
 
58
56
  if (self.selectors_list == NULL) or (self.selectors_list.flags and MyCSS_SELECTORS_FLAGS_SELECTOR_BAD):
59
- raise ValueError("Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
57
+ PyErr_SetObject(ValueError, "Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
58
+ return -1
59
+ return 0
60
60
 
61
61
  def __dealloc__(self):
62
62
  mycss_selectors_list_destroy(mycss_entry_selectors(self.css_entry), self.selectors_list, 1)
@@ -77,12 +77,11 @@ cdef class Selector:
77
77
  cdef Node node
78
78
  cdef list nodes
79
79
 
80
- def __init__(self, Node node, query):
80
+ def __init__(self, Node node, str query):
81
81
  """custom init, because __cinit__ doesn't accept C types"""
82
82
  self.node = node
83
83
  self.nodes = find_nodes(node.parser, node.node, query) if query else [node, ]
84
84
 
85
-
86
85
  cpdef css(self, str query):
87
86
  """Evaluate CSS selector against current scope."""
88
87
  cdef Node current_node
@@ -106,6 +105,7 @@ cdef class Selector:
106
105
  def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
107
106
  """Filter all current matches given text."""
108
107
  nodes = []
108
+ cdef Node node
109
109
  for node in self.nodes:
110
110
  node_text = node.text(deep=deep, separator=separator, strip=strip)
111
111
  if node_text and text in node_text:
@@ -116,6 +116,7 @@ cdef class Selector:
116
116
  def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
117
117
  """Returns True if any node in the current search scope contains specified text"""
118
118
  nodes = []
119
+ cdef Node node
119
120
  for node in self.nodes:
120
121
  node_text = node.text(deep=deep, separator=separator, strip=strip)
121
122
  if node_text and text in node_text:
@@ -142,7 +143,8 @@ cdef class Selector:
142
143
 
143
144
  Similar to `string-length` in XPath.
144
145
  """
145
- nodes = []
146
+ cdef list nodes = []
147
+ cdef Node node
146
148
  for node in self.nodes:
147
149
  attr = node.attributes.get(attribute)
148
150
  if attr and start and start in attr:
@@ -157,16 +159,15 @@ cdef class Selector:
157
159
  cdef find_nodes(HTMLParser parser, myhtml_tree_node_t *node, str query):
158
160
  cdef myhtml_collection_t *collection
159
161
  cdef CSSSelector selector = CSSSelector(query)
160
-
161
- result = list()
162
+ cdef Node n
163
+ cdef list result = []
162
164
  collection = selector.find(node)
163
165
 
164
166
  if collection == NULL:
165
167
  return result
166
168
 
167
169
  for i in range(collection.length):
168
- n = Node()
169
- n._init(collection.list[i], parser)
170
+ n = Node.new(collection.list[i], parser)
170
171
  result.append(n)
171
172
  myhtml_collection_destroy(collection)
172
173
  return result
@@ -176,6 +177,7 @@ cdef bool find_matches(HTMLParser parser, myhtml_tree_node_t *node, tuple select
176
177
  cdef myhtml_collection_t *collection
177
178
  cdef CSSSelector selector
178
179
  cdef int collection_size
180
+ cdef str query
179
181
 
180
182
  for query in selectors:
181
183
  selector = CSSSelector(query)
@@ -1,5 +1,6 @@
1
1
  include "../utils.pxi"
2
2
 
3
+
3
4
  def create_tag(tag: str):
4
5
  """
5
6
  Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,