selectolax 0.3.29__cp310-cp310-win32.whl → 0.3.34__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/lexbor.pyx CHANGED
@@ -1,4 +1,5 @@
1
- from cpython cimport bool
1
+ from cpython.bool cimport bool
2
+ from cpython.exc cimport PyErr_SetObject
2
3
 
3
4
  _ENCODING = 'UTF-8'
4
5
 
@@ -24,10 +25,8 @@ cdef class LexborHTMLParser:
24
25
  html : str (unicode) or bytes
25
26
  """
26
27
  def __init__(self, html):
27
-
28
28
  cdef size_t html_len
29
- cdef char* html_chars
30
-
29
+ cdef object bytes_html
31
30
  bytes_html, html_len = preprocess_input(html)
32
31
  self._parse_html(bytes_html, html_len)
33
32
  self.raw_html = bytes_html
@@ -39,22 +38,27 @@ cdef class LexborHTMLParser:
39
38
  self._selector = LexborCSSSelector()
40
39
  return self._selector
41
40
 
42
-
43
- cdef _parse_html(self, char *html, size_t html_len):
41
+ cdef int _parse_html(self, char *html, size_t html_len) except -1:
44
42
  cdef lxb_status_t status
45
43
 
46
44
  with nogil:
47
45
  self.document = lxb_html_document_create()
48
46
 
49
47
  if self.document == NULL:
50
- raise SelectolaxError("Failed to initialize object for HTML Document.")
48
+ PyErr_SetObject(SelectolaxError, "Failed to initialize object for HTML Document.")
49
+ return -1
51
50
 
52
51
  with nogil:
53
52
  status = lxb_html_document_parse(self.document, <lxb_char_t *> html, html_len)
53
+
54
54
  if status != 0x0000:
55
- raise SelectolaxError("Can't parse HTML.")
55
+ PyErr_SetObject(SelectolaxError, "Can't parse HTML.")
56
+ return -1
56
57
 
57
- assert self.document != NULL
58
+ if self.document == NULL:
59
+ PyErr_SetObject(RuntimeError, "document is NULL even after html was parsed correctly")
60
+ return -1
61
+ return 0
58
62
 
59
63
  def __dealloc__(self):
60
64
  if self.document != NULL:
@@ -68,7 +72,7 @@ cdef class LexborHTMLParser:
68
72
  """Returns root node."""
69
73
  if self.document == NULL:
70
74
  return None
71
- return LexborNode()._cinit(<lxb_dom_node_t *> lxb_dom_document_root(&self.document.dom_document), self)
75
+ return LexborNode.new(<lxb_dom_node_t *> lxb_dom_document_root(&self.document.dom_document), self)
72
76
 
73
77
  @property
74
78
  def body(self):
@@ -77,7 +81,7 @@ cdef class LexborHTMLParser:
77
81
  body = lxb_html_document_body_element_noi(self.document)
78
82
  if body == NULL:
79
83
  return None
80
- return LexborNode()._cinit(<lxb_dom_node_t *> body, self)
84
+ return LexborNode.new(<lxb_dom_node_t *> body, self)
81
85
 
82
86
  @property
83
87
  def head(self):
@@ -86,7 +90,7 @@ cdef class LexborHTMLParser:
86
90
  head = lxb_html_document_head_element_noi(self.document)
87
91
  if head == NULL:
88
92
  return None
89
- return LexborNode()._cinit(<lxb_dom_node_t *> head, self)
93
+ return LexborNode.new(<lxb_dom_node_t *> head, self)
90
94
 
91
95
  def tags(self, str name):
92
96
  """Returns a list of tags that match specified name.
@@ -96,6 +100,12 @@ cdef class LexborHTMLParser:
96
100
  name : str (e.g. div)
97
101
 
98
102
  """
103
+
104
+ if not name:
105
+ raise ValueError("Tag name cannot be empty")
106
+ if len(name) > 100:
107
+ raise ValueError("Tag name is too long")
108
+
99
109
  cdef lxb_dom_collection_t* collection = NULL
100
110
  cdef lxb_status_t status
101
111
  pybyte_name = name.encode('UTF-8')
@@ -116,7 +126,7 @@ cdef class LexborHTMLParser:
116
126
  raise SelectolaxError("Can't locate elements.")
117
127
 
118
128
  for i in range(lxb_dom_collection_length_noi(collection)):
119
- node = LexborNode()._cinit(
129
+ node = LexborNode.new(
120
130
  <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i),
121
131
  self
122
132
  )
@@ -150,7 +160,7 @@ cdef class LexborHTMLParser:
150
160
  """Return HTML representation of the page."""
151
161
  if self.document == NULL:
152
162
  return None
153
- node = LexborNode()._cinit(<lxb_dom_node_t *> &self.document.dom_document, self)
163
+ node = LexborNode.new(<lxb_dom_node_t *> &self.document.dom_document, self)
154
164
  return node.html
155
165
 
156
166
  def css(self, str query):
@@ -159,6 +169,11 @@ cdef class LexborHTMLParser:
159
169
  Matches pattern `query` against HTML tree.
160
170
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
161
171
 
172
+ Special selectors:
173
+
174
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
175
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
176
+
162
177
  Parameters
163
178
  ----------
164
179
  query : str
@@ -232,7 +247,7 @@ cdef class LexborHTMLParser:
232
247
 
233
248
  for i in range(lxb_dom_collection_length_noi(collection)):
234
249
  if recursive:
235
- lxb_dom_node_destroy_deep( <lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
250
+ lxb_dom_node_destroy_deep(<lxb_dom_node_t*> lxb_dom_collection_element_noi(collection, i))
236
251
  else:
237
252
  lxb_dom_node_destroy(<lxb_dom_node_t *> lxb_dom_collection_element_noi(collection, i))
238
253
  lxb_dom_collection_destroy(collection, <bint> True)
@@ -273,7 +288,6 @@ cdef class LexborHTMLParser:
273
288
  """
274
289
  return self.root.scripts_contain(query)
275
290
 
276
-
277
291
  def script_srcs_contain(self, tuple queries):
278
292
  """Returns True if any of the script SRCs attributes contain on of the specified text.
279
293
 
@@ -289,6 +303,26 @@ cdef class LexborHTMLParser:
289
303
  def css_matches(self, str selector):
290
304
  return self.root.css_matches(selector)
291
305
 
306
+ def merge_text_nodes(self):
307
+ """Iterates over all text nodes and merges all text nodes that are close to each other.
308
+
309
+ This is useful for text extraction.
310
+ Use it when you need to strip HTML tags and merge "dangling" text.
311
+
312
+ Examples
313
+ --------
314
+
315
+ >>> tree = LexborHTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
316
+ >>> node = tree.css_first('div')
317
+ >>> tree.unwrap_tags(["strong"])
318
+ >>> tree.text(deep=True, separator=" ", strip=True)
319
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
320
+ >>> node.merge_text_nodes()
321
+ >>> tree.text(deep=True, separator=" ", strip=True)
322
+ "John Doe"
323
+ """
324
+ return self.root.merge_text_nodes()
325
+
292
326
  @staticmethod
293
327
  cdef LexborHTMLParser from_document(lxb_html_document_t *document, bytes raw_html):
294
328
  obj = <LexborHTMLParser> LexborHTMLParser.__new__(LexborHTMLParser)
@@ -303,6 +337,7 @@ cdef class LexborHTMLParser:
303
337
  """Clone the current tree."""
304
338
  cdef lxb_html_document_t* cloned_document
305
339
  cdef lxb_dom_node_t* cloned_node
340
+ cdef LexborHTMLParser cls
306
341
 
307
342
  with nogil:
308
343
  cloned_document = lxb_html_document_create()
@@ -327,6 +362,7 @@ cdef class LexborHTMLParser:
327
362
 
328
363
  cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
329
364
  return cls
365
+
330
366
  def unwrap_tags(self, list tags, delete_empty = False):
331
367
  """Unwraps specified tags from the HTML tree.
332
368
 
@@ -347,5 +383,6 @@ cdef class LexborHTMLParser:
347
383
  >>> tree.body.html
348
384
  '<body><div>Hello world!</div></body>'
349
385
  """
350
- if self.root is not None:
386
+ # faster to check if the document is empty which should determine if we have a root
387
+ if self.document != NULL:
351
388
  self.root.unwrap_tags(tags, delete_empty=delete_empty)
@@ -1,4 +1,5 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_NoMemory
2
3
 
3
4
  from libc.stdlib cimport free
4
5
  from libc.stdlib cimport malloc
@@ -8,12 +9,15 @@ from libc.string cimport memcpy
8
9
  DEF _STACK_SIZE = 100
9
10
  DEF _ENCODING = 'UTF-8'
10
11
 
12
+
11
13
  @cython.final
12
14
  cdef class Stack:
13
15
  def __cinit__(self, size_t capacity=25):
14
16
  self.capacity = capacity
15
17
  self.top = 0
16
18
  self._stack = <myhtml_tree_node_t**> malloc(capacity * sizeof(myhtml_tree_node_t))
19
+ if self._stack == NULL:
20
+ raise MemoryError("Failed to allocate memory for stack")
17
21
 
18
22
  def __dealloc__(self):
19
23
  free(self._stack)
@@ -21,9 +25,10 @@ cdef class Stack:
21
25
  cdef bint is_empty(self):
22
26
  return self.top <= 0
23
27
 
24
- cdef push(self, myhtml_tree_node_t* res):
28
+ cdef int push(self, myhtml_tree_node_t* res) except -1:
25
29
  if self.top >= self.capacity:
26
- self.resize()
30
+ if self.resize() < 0:
31
+ return -1
27
32
  self._stack[self.top] = res
28
33
  self.top += 1
29
34
 
@@ -31,10 +36,13 @@ cdef class Stack:
31
36
  self.top = self.top - 1
32
37
  return self._stack[self.top]
33
38
 
34
- cdef resize(self):
39
+ cdef int resize(self) except -1:
35
40
  self.capacity *= 2
36
41
  self._stack = <myhtml_tree_node_t**> realloc(<void*> self._stack, self.capacity * sizeof(myhtml_tree_node_t))
37
-
42
+ if self._stack == NULL:
43
+ PyErr_NoMemory()
44
+ return -1
45
+ return 0
38
46
 
39
47
  cdef class _Attributes:
40
48
  """A dict-like object that represents attributes."""
@@ -128,25 +136,24 @@ cdef class _Attributes:
128
136
  tag_name = c_text.decode(_ENCODING, 'ignore') if c_text != NULL else 'unknown'
129
137
  return "<%s attributes, %s items>" % (tag_name, len(self))
130
138
 
131
-
132
-
133
139
  ctypedef fused str_or_Node:
134
- basestring
140
+ str
135
141
  bytes
136
142
  Node
137
143
 
138
-
139
144
  cdef class Node:
140
145
  """A class that represents HTML node (element)."""
141
146
  cdef myhtml_tree_node_t *node
142
147
  cdef public HTMLParser parser
143
148
 
144
-
145
- cdef _init(self, myhtml_tree_node_t *node, HTMLParser parser):
146
- # custom init, because __cinit__ doesn't accept C types
147
- self.node = node
149
+ @staticmethod
150
+ cdef Node new(myhtml_tree_node_t *node, HTMLParser parser):
151
+ # custom __init__ for C, because __cinit__ doesn't accept C types
152
+ cdef Node cls = Node.__new__(Node)
153
+ cls.node = node
148
154
  # Keep reference to the selector object, so myhtml structures will not be garbage collected prematurely
149
- self.parser = parser
155
+ cls.parser = parser
156
+ return cls
150
157
 
151
158
  @property
152
159
  def attributes(self):
@@ -286,7 +293,7 @@ cdef class Node:
286
293
  cdef inline _text_deep(self, myhtml_tree_node_t *node, separator='', strip=False):
287
294
  text = ""
288
295
  cdef Stack stack = Stack(_STACK_SIZE)
289
- cdef myhtml_tree_node_t* current_node = NULL;
296
+ cdef myhtml_tree_node_t* current_node = NULL
290
297
 
291
298
  if node.tag_id == MyHTML_TAG__TEXT:
292
299
  c_text = myhtml_node_text(node, NULL)
@@ -339,12 +346,10 @@ cdef class Node:
339
346
  node = node.next
340
347
  continue
341
348
 
342
- next_node = Node()
343
- next_node._init(node, self.parser)
349
+ next_node = Node.new(node, self.parser)
344
350
  yield next_node
345
351
  node = node.next
346
352
 
347
-
348
353
  def traverse(self, include_text=False):
349
354
  """Iterate over all child and next nodes starting from the current level.
350
355
 
@@ -358,16 +363,15 @@ cdef class Node:
358
363
  node
359
364
  """
360
365
  cdef Stack stack = Stack(_STACK_SIZE)
361
- cdef myhtml_tree_node_t* current_node = NULL;
362
- cdef Node next_node;
366
+ cdef myhtml_tree_node_t* current_node = NULL
367
+ cdef Node next_node
363
368
 
364
369
  stack.push(self.node)
365
370
 
366
371
  while not stack.is_empty():
367
372
  current_node = stack.pop()
368
373
  if current_node != NULL and not (current_node.tag_id == MyHTML_TAG__TEXT and not include_text):
369
- next_node = Node()
370
- next_node._init(current_node, self.parser)
374
+ next_node = Node.new(current_node, self.parser)
371
375
  yield next_node
372
376
 
373
377
  if current_node.next is not NULL:
@@ -396,8 +400,7 @@ cdef class Node:
396
400
  """Return the child node."""
397
401
  cdef Node node
398
402
  if self.node.child:
399
- node = Node()
400
- node._init(self.node.child, self.parser)
403
+ node = Node.new(self.node.child, self.parser)
401
404
  return node
402
405
  return None
403
406
 
@@ -406,8 +409,7 @@ cdef class Node:
406
409
  """Return the parent node."""
407
410
  cdef Node node
408
411
  if self.node.parent:
409
- node = Node()
410
- node._init(self.node.parent, self.parser)
412
+ node = Node.new(self.node.parent, self.parser)
411
413
  return node
412
414
  return None
413
415
 
@@ -416,8 +418,7 @@ cdef class Node:
416
418
  """Return next node."""
417
419
  cdef Node node
418
420
  if self.node.next:
419
- node = Node()
420
- node._init(self.node.next, self.parser)
421
+ node = Node.new(self.node.next, self.parser)
421
422
  return node
422
423
  return None
423
424
 
@@ -426,8 +427,7 @@ cdef class Node:
426
427
  """Return previous node."""
427
428
  cdef Node node
428
429
  if self.node.prev:
429
- node = Node()
430
- node._init(self.node.prev, self.parser)
430
+ node = Node.new(self.node.prev, self.parser)
431
431
  return node
432
432
  return None
433
433
 
@@ -436,8 +436,7 @@ cdef class Node:
436
436
  """Return last child node."""
437
437
  cdef Node node
438
438
  if self.node.last_child:
439
- node = Node()
440
- node._init(self.node.last_child, self.parser)
439
+ node = Node.new(self.node.last_child, self.parser)
441
440
  return node
442
441
  return None
443
442
 
@@ -537,8 +536,8 @@ cdef class Node:
537
536
  if delete_empty:
538
537
  myhtml_node_delete(self.node)
539
538
  return
540
- cdef myhtml_tree_node_t* next_node;
541
- cdef myhtml_tree_node_t* current_node;
539
+ cdef myhtml_tree_node_t* next_node
540
+ cdef myhtml_tree_node_t* current_node
542
541
 
543
542
  if self.node.child.next != NULL:
544
543
  current_node = self.node.child
@@ -572,6 +571,8 @@ cdef class Node:
572
571
  '<html><body><div>Hello world!</div></body></html>'
573
572
 
574
573
  """
574
+ # ensure cython can recast element to a Node so that decompose will be called sooner.
575
+ cdef Node element
575
576
  for tag in tags:
576
577
  for element in self.css(tag):
577
578
  element.decompose(recursive=recursive)
@@ -595,10 +596,10 @@ cdef class Node:
595
596
  >>> tree.body.unwrap_tags(['i','a'])
596
597
  >>> tree.body.html
597
598
  '<body><div>Hello world!</div></body>'
598
-
599
+
599
600
  Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
600
601
  """
601
-
602
+ cdef Node element
602
603
  for tag in tags:
603
604
  for element in self.css(tag):
604
605
  element.unwrap(delete_empty)
@@ -783,10 +784,10 @@ cdef class Node:
783
784
  >>> tree.body.unwrap_tags(['i','a'])
784
785
  >>> tree.body.html
785
786
  '<body><div>Hello world!</div></body>'
786
-
787
+
787
788
  Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
788
789
  """
789
-
790
+ cdef Node element
790
791
  for tag in tags:
791
792
  for element in self.css(tag):
792
793
  element.unwrap(delete_empty)
@@ -845,6 +846,7 @@ cdef class Node:
845
846
  The query to check.
846
847
 
847
848
  """
849
+ cdef Node node
848
850
  if self.parser.cached_script_texts is None:
849
851
  nodes = find_nodes(self.parser, self.node, 'script')
850
852
  text_nodes = []
@@ -893,6 +895,7 @@ cdef class Node:
893
895
  if not isinstance(other, Node):
894
896
  return False
895
897
  return self.html == other.html
898
+
896
899
  @property
897
900
  def text_content(self):
898
901
  """Returns the text of the node if it is a text node.
@@ -946,8 +949,8 @@ cdef class Node:
946
949
  while not stack.is_empty():
947
950
  current_node = stack.pop()
948
951
 
949
- if current_node.tag_id == MyHTML_TAG__TEXT and current_node.prev and \
950
- current_node.prev.tag_id == MyHTML_TAG__TEXT:
952
+ if (current_node.tag_id == MyHTML_TAG__TEXT and current_node.prev and
953
+ current_node.prev.tag_id == MyHTML_TAG__TEXT):
951
954
  left_text = myhtml_node_text(current_node.prev, &left_length)
952
955
  right_text = myhtml_node_text(current_node, &right_length)
953
956
  if left_text and right_text:
@@ -978,8 +981,8 @@ cdef inline str append_text(str text, str node_text, str separator='', bint stri
978
981
 
979
982
  cdef inline bytes to_bytes(str_or_Node value):
980
983
  cdef bytes bytes_val
981
- if isinstance(value, (str, unicode)):
982
- bytes_val = value.encode(_ENCODING)
984
+ if isinstance(value, unicode):
985
+ bytes_val = <bytes>value.encode("utf-8")
983
986
  elif isinstance(value, bytes):
984
- bytes_val = <char*> value
987
+ bytes_val = <bytes>value
985
988
  return bytes_val
@@ -1,4 +1,6 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_SetObject
3
+
2
4
 
3
5
  @cython.final
4
6
  cdef class CSSSelector:
@@ -28,35 +30,33 @@ cdef class CSSSelector:
28
30
 
29
31
  return collection
30
32
 
31
-
32
- cdef _create_css_parser(self):
33
+ cdef int _create_css_parser(self) except -1:
33
34
  cdef mystatus_t status
34
35
 
35
36
  cdef mycss_t *mycss = mycss_create()
36
37
  status = mycss_init(mycss)
37
38
 
38
39
  if status != 0:
39
- raise RuntimeError("Can't init MyCSS object.")
40
- # return
40
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS object.")
41
+ return -1
41
42
 
42
43
  self.css_entry = mycss_entry_create()
43
44
  status = mycss_entry_init(mycss, self.css_entry)
44
45
 
45
46
  if status != 0:
46
- raise RuntimeError("Can't init MyCSS Entry object.")
47
-
48
-
47
+ PyErr_SetObject(RuntimeError, "Can't init MyCSS Entry object.")
48
+ return -1
49
+ return 0
49
50
 
50
- cdef _prepare_selector(self, mycss_entry_t *css_entry,
51
- const char *selector, size_t selector_size):
52
- cdef mystatus_t out_status;
53
- self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry),
54
- myencoding_t.MyENCODING_UTF_8,
55
- selector, selector_size,
56
- &out_status)
51
+ cdef int _prepare_selector(self, mycss_entry_t *css_entry, const char *selector, size_t selector_size) except -1:
52
+ cdef mystatus_t out_status
53
+ self.selectors_list = mycss_selectors_parse(mycss_entry_selectors(css_entry), myencoding_t.MyENCODING_UTF_8,
54
+ selector, selector_size, &out_status)
57
55
 
58
56
  if (self.selectors_list == NULL) or (self.selectors_list.flags and MyCSS_SELECTORS_FLAGS_SELECTOR_BAD):
59
- raise ValueError("Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
57
+ PyErr_SetObject(ValueError, "Bad CSS Selectors: %s" % self.c_selector.decode('utf-8'))
58
+ return -1
59
+ return 0
60
60
 
61
61
  def __dealloc__(self):
62
62
  mycss_selectors_list_destroy(mycss_entry_selectors(self.css_entry), self.selectors_list, 1)
@@ -77,12 +77,11 @@ cdef class Selector:
77
77
  cdef Node node
78
78
  cdef list nodes
79
79
 
80
- def __init__(self, Node node, query):
80
+ def __init__(self, Node node, str query):
81
81
  """custom init, because __cinit__ doesn't accept C types"""
82
82
  self.node = node
83
83
  self.nodes = find_nodes(node.parser, node.node, query) if query else [node, ]
84
84
 
85
-
86
85
  cpdef css(self, str query):
87
86
  """Evaluate CSS selector against current scope."""
88
87
  cdef Node current_node
@@ -106,6 +105,7 @@ cdef class Selector:
106
105
  def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
107
106
  """Filter all current matches given text."""
108
107
  nodes = []
108
+ cdef Node node
109
109
  for node in self.nodes:
110
110
  node_text = node.text(deep=deep, separator=separator, strip=strip)
111
111
  if node_text and text in node_text:
@@ -116,6 +116,7 @@ cdef class Selector:
116
116
  def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
117
117
  """Returns True if any node in the current search scope contains specified text"""
118
118
  nodes = []
119
+ cdef Node node
119
120
  for node in self.nodes:
120
121
  node_text = node.text(deep=deep, separator=separator, strip=strip)
121
122
  if node_text and text in node_text:
@@ -142,7 +143,8 @@ cdef class Selector:
142
143
 
143
144
  Similar to `string-length` in XPath.
144
145
  """
145
- nodes = []
146
+ cdef list nodes = []
147
+ cdef Node node
146
148
  for node in self.nodes:
147
149
  attr = node.attributes.get(attribute)
148
150
  if attr and start and start in attr:
@@ -157,16 +159,15 @@ cdef class Selector:
157
159
  cdef find_nodes(HTMLParser parser, myhtml_tree_node_t *node, str query):
158
160
  cdef myhtml_collection_t *collection
159
161
  cdef CSSSelector selector = CSSSelector(query)
160
-
161
- result = list()
162
+ cdef Node n
163
+ cdef list result = []
162
164
  collection = selector.find(node)
163
165
 
164
166
  if collection == NULL:
165
167
  return result
166
168
 
167
169
  for i in range(collection.length):
168
- n = Node()
169
- n._init(collection.list[i], parser)
170
+ n = Node.new(collection.list[i], parser)
170
171
  result.append(n)
171
172
  myhtml_collection_destroy(collection)
172
173
  return result
@@ -176,6 +177,7 @@ cdef bool find_matches(HTMLParser parser, myhtml_tree_node_t *node, tuple select
176
177
  cdef myhtml_collection_t *collection
177
178
  cdef CSSSelector selector
178
179
  cdef int collection_size
180
+ cdef str query
179
181
 
180
182
  for query in selectors:
181
183
  selector = CSSSelector(query)
@@ -1,5 +1,6 @@
1
1
  include "../utils.pxi"
2
2
 
3
+
3
4
  def create_tag(tag: str):
4
5
  """
5
6
  Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag,