selectolax 0.3.28__cp312-cp312-win32.whl → 0.3.34__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/__init__.py CHANGED
@@ -2,9 +2,7 @@
2
2
 
3
3
 
4
4
  __author__ = """Artem Golubin"""
5
- __email__ = 'me@rushter.com'
6
- __version__ = '0.3.28'
5
+ __email__ = "me@rushter.com"
6
+ __version__ = "0.3.34"
7
7
 
8
- from . import parser
9
- from . import lexbor
10
- from . import modest
8
+ from . import lexbor, modest, parser
@@ -1,5 +1,6 @@
1
1
  cimport cython
2
2
 
3
+
3
4
  @cython.final
4
5
  cdef class LexborAttributes:
5
6
  """A dict-like object that represents attributes."""
@@ -23,16 +24,32 @@ cdef class LexborAttributes:
23
24
  yield key.decode(_ENCODING)
24
25
  attr = attr.next
25
26
 
26
- def __setitem__(self, str key, value):
27
- value = str(value)
27
+ def __setitem__(self, str key, object value):
28
+ value = value
28
29
  bytes_key = key.encode(_ENCODING)
29
- bytes_value = value.encode(_ENCODING)
30
-
31
- lxb_dom_element_set_attribute(
32
- <lxb_dom_element_t *> self.node,
33
- <lxb_char_t *> bytes_key, len(bytes_key),
34
- <lxb_char_t *> bytes_value, len(bytes_value),
35
- )
30
+ bytes_value = value.encode(_ENCODING) if value else b""
31
+ cdef lxb_dom_attr_t *attr
32
+ cdef lxb_dom_document_t *doc
33
+
34
+ if value is None:
35
+ # N.B. This is suboptimal, but there is not API to set empty attributes
36
+ attr = lxb_dom_element_set_attribute(
37
+ <lxb_dom_element_t *> self.node,
38
+ <lxb_char_t *> bytes_key, len(bytes_key),
39
+ NULL, 0
40
+ )
41
+ doc = (<lxb_dom_node_t*>attr).owner_document
42
+ lexbor_str_destroy(attr.value, doc.text, 0)
43
+ attr.value = NULL
44
+
45
+ elif isinstance(value, str) or isinstance(value, unicode) :
46
+ lxb_dom_element_set_attribute(
47
+ <lxb_dom_element_t *> self.node,
48
+ <lxb_char_t *> bytes_key, len(bytes_key),
49
+ <lxb_char_t *> bytes_value, len(bytes_value),
50
+ )
51
+ else:
52
+ raise TypeError("Expected str or unicode, got %s" % type(value))
36
53
 
37
54
  def __delitem__(self, key):
38
55
  try:
@@ -1,4 +1,5 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_SetNone
2
3
 
3
4
  _TAG_TO_NAME = {
4
5
  0x0005: "- doctype",
@@ -6,26 +7,29 @@ _TAG_TO_NAME = {
6
7
  0x0004: "-comment",
7
8
  }
8
9
  ctypedef fused str_or_LexborNode:
9
- basestring
10
+ str
10
11
  bytes
11
12
  LexborNode
12
13
 
13
14
  cdef inline bytes to_bytes(str_or_LexborNode value):
14
15
  cdef bytes bytes_val
15
- if isinstance(value, (str, unicode)):
16
- bytes_val = value.encode(_ENCODING)
16
+ if isinstance(value, unicode):
17
+ bytes_val = <bytes>value.encode("utf-8")
17
18
  elif isinstance(value, bytes):
18
- bytes_val = <char*> value
19
+ bytes_val = <bytes>value
19
20
  return bytes_val
20
21
 
22
+
21
23
  @cython.final
22
24
  cdef class LexborNode:
23
25
  """A class that represents HTML node (element)."""
24
26
 
25
- cdef _cinit(self, lxb_dom_node_t *node, LexborHTMLParser parser):
26
- self.parser = parser
27
- self.node = node
28
- return self
27
+ @staticmethod
28
+ cdef LexborNode new(lxb_dom_node_t *node, LexborHTMLParser parser):
29
+ cdef LexborNode lxbnode = LexborNode.__new__(LexborNode)
30
+ lxbnode.node = node
31
+ lxbnode.parser = parser
32
+ return lxbnode
29
33
 
30
34
  @property
31
35
  def mem_id(self):
@@ -41,8 +45,7 @@ cdef class LexborNode:
41
45
  """Return the first child node."""
42
46
  cdef LexborNode node
43
47
  if self.node.first_child:
44
- node = LexborNode()
45
- node._cinit(<lxb_dom_node_t *> self.node.first_child, self.parser)
48
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.first_child, self.parser)
46
49
  return node
47
50
  return None
48
51
 
@@ -50,9 +53,8 @@ cdef class LexborNode:
50
53
  def parent(self):
51
54
  """Return the parent node."""
52
55
  cdef LexborNode node
53
- if self.node.parent:
54
- node = LexborNode()
55
- node._cinit(<lxb_dom_node_t *> self.node.parent, self.parser)
56
+ if self.node.parent != NULL:
57
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.parent, self.parser)
56
58
  return node
57
59
  return None
58
60
 
@@ -60,9 +62,8 @@ cdef class LexborNode:
60
62
  def next(self):
61
63
  """Return next node."""
62
64
  cdef LexborNode node
63
- if self.node.next:
64
- node = LexborNode()
65
- node._cinit(<lxb_dom_node_t *> self.node.next, self.parser)
65
+ if self.node.next != NULL:
66
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.next, self.parser)
66
67
  return node
67
68
  return None
68
69
 
@@ -70,9 +71,8 @@ cdef class LexborNode:
70
71
  def prev(self):
71
72
  """Return previous node."""
72
73
  cdef LexborNode node
73
- if self.node.prev:
74
- node = LexborNode()
75
- node._cinit(<lxb_dom_node_t *> self.node.prev, self.parser)
74
+ if self.node.prev != NULL:
75
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.prev, self.parser)
76
76
  return node
77
77
  return None
78
78
 
@@ -80,9 +80,8 @@ cdef class LexborNode:
80
80
  def last_child(self):
81
81
  """Return last child node."""
82
82
  cdef LexborNode node
83
- if self.node.last_child:
84
- node = LexborNode()
85
- node._cinit(<lxb_dom_node_t *> self.node.last_child, self.parser)
83
+ if self.node.last_child != NULL:
84
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.last_child, self.parser)
86
85
  return node
87
86
  return None
88
87
 
@@ -181,6 +180,12 @@ cdef class LexborNode:
181
180
  Matches pattern `query` against HTML tree.
182
181
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
183
182
 
183
+ Special selectors:
184
+
185
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
186
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
187
+
188
+
184
189
  Parameters
185
190
  ----------
186
191
  query : str
@@ -256,7 +261,6 @@ cdef class LexborNode:
256
261
  text = c_text.decode(_ENCODING)
257
262
  return text
258
263
 
259
-
260
264
  def decompose(self, bool recursive=True):
261
265
  """Remove the current node from the tree.
262
266
 
@@ -273,6 +277,9 @@ cdef class LexborNode:
273
277
  >>> tag.decompose()
274
278
 
275
279
  """
280
+ if self.node == <lxb_dom_node_t *> lxb_dom_document_root(&self.parser.document.dom_document):
281
+ raise SelectolaxError("Decomposing the root node is not allowed.")
282
+
276
283
  if recursive:
277
284
  lxb_dom_node_destroy_deep(<lxb_dom_node_t *> self.node)
278
285
  else:
@@ -298,11 +305,11 @@ cdef class LexborNode:
298
305
  '<html><body><div>Hello world!</div></body></html>'
299
306
 
300
307
  """
308
+ cdef LexborNode element
301
309
  for tag in tags:
302
310
  for element in self.css(tag):
303
311
  element.decompose(recursive=recursive)
304
312
 
305
-
306
313
  @property
307
314
  def attributes(self):
308
315
  """Get all attributes that belong to the current node.
@@ -410,15 +417,18 @@ cdef class LexborNode:
410
417
  node = node.next
411
418
  continue
412
419
 
413
- next_node = LexborNode()
414
- next_node._cinit(<lxb_dom_node_t *> node, self.parser)
420
+ next_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
415
421
  yield next_node
416
422
  node = node.next
417
423
 
418
-
419
- def unwrap(self):
424
+ def unwrap(self, bint delete_empty=False):
420
425
  """Replace node with whatever is inside this node.
421
426
 
427
+ Parameters
428
+ ----------
429
+ delete_empty : bool, default False
430
+ If True, removes empty tags.
431
+
422
432
  Examples
423
433
  --------
424
434
 
@@ -427,11 +437,14 @@ cdef class LexborNode:
427
437
  >>> tree.html
428
438
  '<html><head></head><body><div>Hello world!</div></body></html>'
429
439
 
440
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
430
441
  """
431
442
  if self.node.first_child == NULL:
443
+ if delete_empty:
444
+ lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
432
445
  return
433
- cdef lxb_dom_node_t* next_node;
434
- cdef lxb_dom_node_t* current_node;
446
+ cdef lxb_dom_node_t* next_node
447
+ cdef lxb_dom_node_t* current_node
435
448
 
436
449
  if self.node.first_child.next != NULL:
437
450
  current_node = self.node.first_child
@@ -445,7 +458,7 @@ cdef class LexborNode:
445
458
  lxb_dom_node_insert_before(self.node, self.node.first_child)
446
459
  lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
447
460
 
448
- def unwrap_tags(self, list tags):
461
+ def unwrap_tags(self, list tags, bint delete_empty = False):
449
462
  """Unwraps specified tags from the HTML tree.
450
463
 
451
464
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -454,6 +467,8 @@ cdef class LexborNode:
454
467
  ----------
455
468
  tags : list
456
469
  List of tags to remove.
470
+ delete_empty : bool, default False
471
+ If True, removes empty tags.
457
472
 
458
473
  Examples
459
474
  --------
@@ -462,12 +477,50 @@ cdef class LexborNode:
462
477
  >>> tree.body.unwrap_tags(['i','a'])
463
478
  >>> tree.body.html
464
479
  '<body><div>Hello world!</div></body>'
465
- """
466
480
 
481
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
482
+ """
483
+ cdef LexborNode element
467
484
  for tag in tags:
468
485
  for element in self.css(tag):
469
- element.unwrap()
486
+ element.unwrap(delete_empty)
487
+
488
+ def merge_text_nodes(self):
489
+ """Iterates over all text nodes and merges all text nodes that are close to each other.
470
490
 
491
+ This is useful for text extraction.
492
+ Use it when you need to strip HTML tags and merge "dangling" text.
493
+
494
+ Examples
495
+ --------
496
+
497
+ >>> tree = LexborHTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
498
+ >>> node = tree.css_first('div')
499
+ >>> tree.unwrap_tags(["strong"])
500
+ >>> tree.text(deep=True, separator=" ", strip=True)
501
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
502
+ >>> node.merge_text_nodes()
503
+ >>> tree.text(deep=True, separator=" ", strip=True)
504
+ "John Doe"
505
+ """
506
+ cdef lxb_dom_node_t *node = self.node.first_child
507
+ cdef lxb_dom_node_t *next_node
508
+ cdef lxb_char_t *left_text
509
+ cdef lxb_char_t *right_text
510
+ cdef size_t left_length, right_length
511
+
512
+ while node != NULL:
513
+ next_node = node.next
514
+ if node.type == LXB_DOM_NODE_TYPE_TEXT and node.prev and node.prev.type == LXB_DOM_NODE_TYPE_TEXT:
515
+ left_text = lxb_dom_node_text_content(node.prev, &left_length)
516
+ right_text = lxb_dom_node_text_content(node, &right_length)
517
+ if left_text and right_text:
518
+ combined = (<bytes>left_text[:left_length]) + (<bytes>right_text[:right_length])
519
+ lxb_dom_node_text_content_set(node, combined, len(combined))
520
+ lxb_dom_node_remove(node.prev)
521
+ if node.first_child:
522
+ LexborNode.new(node, self.parser).merge_text_nodes()
523
+ node = next_node
471
524
 
472
525
  def traverse(self, include_text=False):
473
526
  """Iterate over all child and next nodes starting from the current level.
@@ -487,8 +540,7 @@ cdef class LexborNode:
487
540
 
488
541
  while node != NULL:
489
542
  if not (not include_text and node.type == LXB_DOM_NODE_TYPE_TEXT):
490
- lxb_node = LexborNode()
491
- lxb_node._cinit(<lxb_dom_node_t *> node, self.parser)
543
+ lxb_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
492
544
  yield lxb_node
493
545
 
494
546
  if node.first_child != NULL:
@@ -552,7 +604,6 @@ cdef class LexborNode:
552
604
  else:
553
605
  raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
554
606
 
555
-
556
607
  def insert_before(self, str_or_LexborNode value):
557
608
  """
558
609
  Insert a node before the current Node.
@@ -727,7 +778,7 @@ cdef class LexborNode:
727
778
  >>> selector.child.raw_value
728
779
  b'&#x3C;test&#x3E;'
729
780
  """
730
- raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
781
+ raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
731
782
 
732
783
  def scripts_contain(self, str query):
733
784
  """Returns True if any of the script tags contain specified text.
@@ -740,6 +791,7 @@ cdef class LexborNode:
740
791
  The query to check.
741
792
 
742
793
  """
794
+ cdef LexborNode node
743
795
  if self.parser.cached_script_texts is None:
744
796
  nodes = self.parser.selector.find('script', self)
745
797
  text_nodes = []
@@ -764,6 +816,7 @@ cdef class LexborNode:
764
816
  queries : tuple of str
765
817
 
766
818
  """
819
+ cdef LexborNode node
767
820
  if self.parser.cached_script_srcs is None:
768
821
  nodes = self.parser.selector.find('script', self)
769
822
  src_nodes = []
@@ -819,31 +872,44 @@ cdef class LexborNode:
819
872
  """
820
873
  cdef unsigned char * text
821
874
  cdef lxb_dom_node_t* node = <lxb_dom_node_t*> self.node.first_child
822
-
823
- container = TextContainer()
875
+ cdef TextContainer container
824
876
  if self.node == NULL or self.node.type != LXB_DOM_NODE_TYPE_TEXT:
825
877
  return None
878
+
826
879
  text = <unsigned char *> lexbor_str_data_noi(&(<lxb_dom_character_data_t *> self.node).data)
827
880
  if text != NULL:
881
+ container = TextContainer.new_with_defaults()
828
882
  py_text = text.decode(_ENCODING)
829
883
  container.append(py_text)
830
884
  return container.text
885
+
886
+
887
+ @cython.internal
831
888
  @cython.final
832
889
  cdef class TextContainer:
833
890
  cdef str _text
834
- cdef public str separator
835
- cdef public bool strip
891
+ cdef str separator
892
+ cdef bint strip
893
+
894
+ @staticmethod
895
+ cdef TextContainer new_with_defaults():
896
+ cdef TextContainer cls = TextContainer.__new__(TextContainer)
897
+ cls._text = ''
898
+ cls.separator = ''
899
+ cls.strip = False
900
+ return cls
836
901
 
837
902
  def __init__(self, str separator = '', bool strip = False):
838
903
  self._text = ""
839
904
  self.separator = separator
840
905
  self.strip = strip
841
906
 
842
- def append(self, node_text):
907
+ def append(self, str node_text):
843
908
  if self.strip:
844
909
  self._text += node_text.strip() + self.separator
845
910
  else:
846
911
  self._text += node_text + self.separator
912
+
847
913
  @property
848
914
  def text(self):
849
915
  if self.separator and self._text and self._text.endswith(self.separator):
@@ -852,7 +918,7 @@ cdef class TextContainer:
852
918
 
853
919
 
854
920
  cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
855
- cdef unsigned char *text;
921
+ cdef unsigned char *text
856
922
  cdef lxb_tag_id_t tag_id = lxb_dom_node_tag_id_noi(node)
857
923
  if tag_id != LXB_TAG__TEXT:
858
924
  return LEXBOR_ACTION_OK
@@ -860,8 +926,15 @@ cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
860
926
  text = <unsigned char*> lexbor_str_data_noi(&(<lxb_dom_text_t *> node).char_data.data)
861
927
  if not text:
862
928
  return LEXBOR_ACTION_OK
863
- py_str = text.decode(_ENCODING)
864
- cdef object cls
865
- cls = <object> ctx
929
+
930
+ try:
931
+ py_str = text.decode(_ENCODING)
932
+
933
+ except Exception as e:
934
+ PyErr_SetNone(e)
935
+ return LEXBOR_ACTION_STOP
936
+
937
+ cdef TextContainer cls
938
+ cls = <TextContainer> ctx
866
939
  cls.append(py_str)
867
940
  return LEXBOR_ACTION_OK
@@ -1,4 +1,7 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_SetObject
3
+ from cpython.list cimport PyList_GET_SIZE
4
+
2
5
 
3
6
  @cython.final
4
7
  cdef class LexborCSSSelector:
@@ -8,21 +11,22 @@ cdef class LexborCSSSelector:
8
11
  self.results = []
9
12
  self.current_node = None
10
13
 
11
- cdef _create_css_parser(self):
14
+ cdef int _create_css_parser(self) except -1:
12
15
  cdef lxb_status_t status
13
16
 
14
-
15
17
  self.parser = lxb_css_parser_create()
16
18
  status = lxb_css_parser_init(self.parser, NULL)
17
19
 
18
20
  if status != LXB_STATUS_OK:
19
- raise SelectolaxError("Can't initialize CSS parser.")
21
+ PyErr_SetObject(SelectolaxError, "Can't initialize CSS parser.")
22
+ return -1
20
23
 
21
24
  self.css_selectors = lxb_css_selectors_create()
22
25
  status = lxb_css_selectors_init(self.css_selectors)
23
26
 
24
27
  if status != LXB_STATUS_OK:
25
- raise SelectolaxError("Can't initialize CSS selector.")
28
+ PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
29
+ return -1
26
30
 
27
31
  lxb_css_parser_selectors_set(self.parser, self.css_selectors)
28
32
 
@@ -30,14 +34,18 @@ cdef class LexborCSSSelector:
30
34
  status = lxb_selectors_init(self.selectors)
31
35
  lxb_selectors_opt_set(self.selectors, LXB_SELECTORS_OPT_MATCH_ROOT)
32
36
  if status != LXB_STATUS_OK:
33
- raise SelectolaxError("Can't initialize CSS selector.")
34
-
37
+ PyErr_SetObject(SelectolaxError, "Can't initialize CSS selector.")
38
+ return -1
39
+ return 0
35
40
 
36
- cpdef find(self, str query, LexborNode node):
41
+ cpdef list find(self, str query, LexborNode node):
37
42
  cdef lxb_css_selector_list_t* selectors
38
43
  cdef lxb_char_t* c_selector
39
44
  cdef lxb_css_selector_list_t * selectors_list
40
45
 
46
+ if not isinstance(query, str):
47
+ raise TypeError("Query must be a string.")
48
+
41
49
  bytes_query = query.encode(_ENCODING)
42
50
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t>len(query))
43
51
 
@@ -54,28 +62,32 @@ cdef class LexborCSSSelector:
54
62
  lxb_css_selector_list_destroy_memory(selectors_list)
55
63
  return results
56
64
 
57
- cpdef any_matches(self, str query, LexborNode node):
65
+ cpdef int any_matches(self, str query, LexborNode node) except -1:
58
66
  cdef lxb_css_selector_list_t * selectors
59
67
  cdef lxb_char_t * c_selector
60
68
  cdef lxb_css_selector_list_t * selectors_list
69
+ cdef int result
70
+
71
+ if not isinstance(query, str):
72
+ raise TypeError("Query must be a string.")
61
73
 
62
74
  bytes_query = query.encode(_ENCODING)
63
75
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t> len(query))
64
76
 
65
77
  if selectors_list == NULL:
66
- raise SelectolaxError("Can't parse CSS selector.")
78
+ PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
67
79
 
68
80
  self.results = []
69
81
  status = lxb_selectors_find(self.selectors, node.node, selectors_list,
70
82
  <lxb_selectors_cb_f> css_matcher_callback, <void *> self)
71
83
  if status != LXB_STATUS_OK:
72
- raise SelectolaxError("Can't parse CSS selector.")
73
- result = bool(self.results)
84
+ lxb_css_selector_list_destroy_memory(selectors_list)
85
+ PyErr_SetObject(SelectolaxError, "Can't parse CSS selector.")
86
+ result = PyList_GET_SIZE(self.results) > 0
74
87
  self.results = []
75
88
  lxb_css_selector_list_destroy_memory(selectors_list)
76
89
  return result
77
90
 
78
-
79
91
  def __dealloc__(self):
80
92
  if self.selectors != NULL:
81
93
  lxb_selectors_destroy(self.selectors, True)
@@ -85,7 +97,6 @@ cdef class LexborCSSSelector:
85
97
  lxb_css_selectors_destroy(self.css_selectors, True)
86
98
 
87
99
 
88
-
89
100
  cdef class LexborSelector:
90
101
  """An advanced CSS selector that supports additional operations.
91
102
 
@@ -100,10 +111,9 @@ cdef class LexborSelector:
100
111
  self.node = node
101
112
  self.nodes = self.node.parser.selector.find(query, self.node) if query else [node, ]
102
113
 
103
-
104
114
  cpdef css(self, str query):
105
115
  """Evaluate CSS selector against current scope."""
106
- raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
116
+ raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
107
117
 
108
118
  @property
109
119
  def matches(self) -> list:
@@ -117,7 +127,7 @@ cdef class LexborSelector:
117
127
 
118
128
  def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
119
129
  """Filter all current matches given text."""
120
- nodes = []
130
+ cdef list nodes = []
121
131
  for node in self.nodes:
122
132
  node_text = node.text(deep=deep, separator=separator, strip=strip)
123
133
  if node_text and text in node_text:
@@ -127,7 +137,7 @@ cdef class LexborSelector:
127
137
 
128
138
  def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
129
139
  """Returns True if any node in the current search scope contains specified text"""
130
- nodes = []
140
+ cdef LexborNode node
131
141
  for node in self.nodes:
132
142
  node_text = node.text(deep=deep, separator=separator, strip=strip)
133
143
  if node_text and text in node_text:
@@ -139,7 +149,7 @@ cdef class LexborSelector:
139
149
 
140
150
  Similar to `string-length` in XPath.
141
151
  """
142
- nodes = []
152
+ cdef list nodes = []
143
153
  for node in self.nodes:
144
154
  attr = node.attributes.get(attribute)
145
155
  if attr and start and start in attr:
@@ -154,7 +164,7 @@ cdef class LexborSelector:
154
164
 
155
165
  Similar to `string-length` in XPath.
156
166
  """
157
- nodes = []
167
+ cdef LexborNode node
158
168
  for node in self.nodes:
159
169
  attr = node.attributes.get(attribute)
160
170
  if attr and start and start in attr:
@@ -169,16 +179,15 @@ cdef class LexborSelector:
169
179
 
170
180
  cdef lxb_status_t css_finder_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
171
181
  cdef LexborNode lxb_node
172
- cdef object cls
173
- cls = <object> ctx
174
- lxb_node = LexborNode()
175
- lxb_node._cinit(<lxb_dom_node_t *> node, cls.current_node.parser)
182
+ cdef LexborCSSSelector cls
183
+ cls = <LexborCSSSelector> ctx
184
+ lxb_node = LexborNode.new(<lxb_dom_node_t *> node, cls.current_node.parser)
176
185
  cls.results.append(lxb_node)
177
186
  return LXB_STATUS_OK
178
187
 
179
188
  cdef lxb_status_t css_matcher_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx):
180
189
  cdef LexborNode lxb_node
181
- cdef object cls
182
- cls = <object> ctx
190
+ cdef LexborCSSSelector cls
191
+ cls = <LexborCSSSelector> ctx
183
192
  cls.results.append(True)
184
193
  return LXB_STATUS_STOP
@@ -1,5 +1,6 @@
1
1
  include "../utils.pxi"
2
2
 
3
+
3
4
  def create_tag(tag: str):
4
5
  """
5
6
  Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,