justhtml 0.6.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
justhtml/selector.py CHANGED
@@ -1,6 +1,11 @@
1
1
  # CSS Selector implementation for JustHTML
2
2
  # Supports a subset of CSS selectors for querying the DOM
3
3
 
4
+ from __future__ import annotations
5
+
6
+ from functools import lru_cache
7
+ from typing import Any
8
+
4
9
 
5
10
  class SelectorError(ValueError):
6
11
  """Raised when a CSS selector is invalid."""
@@ -8,30 +13,33 @@ class SelectorError(ValueError):
8
13
 
9
14
  # Token types for the CSS selector lexer
10
15
  class TokenType:
11
- TAG = "TAG" # div, span, etc.
12
- ID = "ID" # #foo
13
- CLASS = "CLASS" # .bar
14
- UNIVERSAL = "UNIVERSAL" # *
15
- ATTR_START = "ATTR_START" # [
16
- ATTR_END = "ATTR_END" # ]
17
- ATTR_OP = "ATTR_OP" # =, ~=, |=, ^=, $=, *=
18
- STRING = "STRING" # "value" or 'value' or unquoted
19
- COMBINATOR = "COMBINATOR" # >, +, ~, or whitespace (descendant)
20
- COMMA = "COMMA" # ,
21
- COLON = "COLON" # :
22
- PAREN_OPEN = "PAREN_OPEN" # (
23
- PAREN_CLOSE = "PAREN_CLOSE" # )
24
- EOF = "EOF"
16
+ TAG: str = "TAG" # div, span, etc.
17
+ ID: str = "ID" # #foo
18
+ CLASS: str = "CLASS" # .bar
19
+ UNIVERSAL: str = "UNIVERSAL" # *
20
+ ATTR_START: str = "ATTR_START" # [
21
+ ATTR_END: str = "ATTR_END" # ]
22
+ ATTR_OP: str = "ATTR_OP" # =, ~=, |=, ^=, $=, *=
23
+ STRING: str = "STRING" # "value" or 'value' or unquoted
24
+ COMBINATOR: str = "COMBINATOR" # >, +, ~, or whitespace (descendant)
25
+ COMMA: str = "COMMA" # ,
26
+ COLON: str = "COLON" # :
27
+ PAREN_OPEN: str = "PAREN_OPEN" # (
28
+ PAREN_CLOSE: str = "PAREN_CLOSE" # )
29
+ EOF: str = "EOF"
25
30
 
26
31
 
27
32
  class Token:
28
33
  __slots__ = ("type", "value")
29
34
 
30
- def __init__(self, token_type, value=None):
35
+ type: str
36
+ value: str | None
37
+
38
+ def __init__(self, token_type: str, value: str | None = None) -> None:
31
39
  self.type = token_type
32
40
  self.value = value
33
41
 
34
- def __repr__(self):
42
+ def __repr__(self) -> str:
35
43
  return f"Token({self.type}, {self.value!r})"
36
44
 
37
45
 
@@ -40,45 +48,49 @@ class SelectorTokenizer:
40
48
 
41
49
  __slots__ = ("length", "pos", "selector")
42
50
 
43
- def __init__(self, selector):
51
+ selector: str
52
+ pos: int
53
+ length: int
54
+
55
+ def __init__(self, selector: str) -> None:
44
56
  self.selector = selector
45
57
  self.pos = 0
46
58
  self.length = len(selector)
47
59
 
48
- def _peek(self, offset=0):
60
+ def _peek(self, offset: int = 0) -> str:
49
61
  pos = self.pos + offset
50
62
  if pos < self.length:
51
63
  return self.selector[pos]
52
64
  return ""
53
65
 
54
- def _advance(self):
66
+ def _advance(self) -> str:
55
67
  ch = self._peek()
56
68
  self.pos += 1
57
69
  return ch
58
70
 
59
- def _skip_whitespace(self):
71
+ def _skip_whitespace(self) -> None:
60
72
  while self.pos < self.length and self.selector[self.pos] in " \t\n\r\f":
61
73
  self.pos += 1
62
74
 
63
- def _is_name_start(self, ch):
75
+ def _is_name_start(self, ch: str) -> bool:
64
76
  # CSS identifier start: letter, underscore, or non-ASCII
65
77
  return ch.isalpha() or ch == "_" or ch == "-" or ord(ch) > 127
66
78
 
67
- def _is_name_char(self, ch):
79
+ def _is_name_char(self, ch: str) -> bool:
68
80
  # CSS identifier continuation: name-start or digit
69
81
  return self._is_name_start(ch) or ch.isdigit()
70
82
 
71
- def _read_name(self):
83
+ def _read_name(self) -> str:
72
84
  start = self.pos
73
85
  while self.pos < self.length and self._is_name_char(self.selector[self.pos]):
74
86
  self.pos += 1
75
87
  return self.selector[start : self.pos]
76
88
 
77
- def _read_string(self, quote):
89
+ def _read_string(self, quote: str) -> str:
78
90
  # Skip opening quote
79
91
  self.pos += 1
80
92
  start = self.pos
81
- parts = []
93
+ parts: list[str] = []
82
94
 
83
95
  while self.pos < self.length:
84
96
  ch = self.selector[self.pos]
@@ -105,7 +117,7 @@ class SelectorTokenizer:
105
117
 
106
118
  raise SelectorError(f"Unterminated string in selector: {self.selector!r}")
107
119
 
108
- def _read_unquoted_attr_value(self):
120
+ def _read_unquoted_attr_value(self) -> str:
109
121
  # Read an unquoted attribute value (CSS identifier)
110
122
  start = self.pos
111
123
  while self.pos < self.length:
@@ -115,8 +127,8 @@ class SelectorTokenizer:
115
127
  self.pos += 1
116
128
  return self.selector[start : self.pos]
117
129
 
118
- def tokenize(self):
119
- tokens = []
130
+ def tokenize(self) -> list[Token]:
131
+ tokens: list[Token] = []
120
132
  pending_whitespace = False
121
133
 
122
134
  while self.pos < self.length:
@@ -284,21 +296,34 @@ class SimpleSelector:
284
296
 
285
297
  __slots__ = ("arg", "name", "operator", "type", "value")
286
298
 
287
- TYPE_TAG = "tag"
288
- TYPE_ID = "id"
289
- TYPE_CLASS = "class"
290
- TYPE_UNIVERSAL = "universal"
291
- TYPE_ATTR = "attr"
292
- TYPE_PSEUDO = "pseudo"
293
-
294
- def __init__(self, selector_type, name=None, operator=None, value=None, arg=None):
299
+ TYPE_TAG: str = "tag"
300
+ TYPE_ID: str = "id"
301
+ TYPE_CLASS: str = "class"
302
+ TYPE_UNIVERSAL: str = "universal"
303
+ TYPE_ATTR: str = "attr"
304
+ TYPE_PSEUDO: str = "pseudo"
305
+
306
+ type: str
307
+ name: str | None
308
+ operator: str | None
309
+ value: str | None
310
+ arg: str | None
311
+
312
+ def __init__(
313
+ self,
314
+ selector_type: str,
315
+ name: str | None = None,
316
+ operator: str | None = None,
317
+ value: str | None = None,
318
+ arg: str | None = None,
319
+ ) -> None:
295
320
  self.type = selector_type
296
321
  self.name = name
297
322
  self.operator = operator
298
323
  self.value = value
299
324
  self.arg = arg # For :not() and :nth-child()
300
325
 
301
- def __repr__(self):
326
+ def __repr__(self) -> str:
302
327
  parts = [f"SimpleSelector({self.type!r}"]
303
328
  if self.name:
304
329
  parts.append(f", name={self.name!r}")
@@ -317,10 +342,12 @@ class CompoundSelector:
317
342
 
318
343
  __slots__ = ("selectors",)
319
344
 
320
- def __init__(self, selectors=None):
345
+ selectors: list[SimpleSelector]
346
+
347
+ def __init__(self, selectors: list[SimpleSelector] | None = None) -> None:
321
348
  self.selectors = selectors or []
322
349
 
323
- def __repr__(self):
350
+ def __repr__(self) -> str:
324
351
  return f"CompoundSelector({self.selectors!r})"
325
352
 
326
353
 
@@ -329,12 +356,14 @@ class ComplexSelector:
329
356
 
330
357
  __slots__ = ("parts",)
331
358
 
332
- def __init__(self):
359
+ parts: list[tuple[str | None, CompoundSelector]]
360
+
361
+ def __init__(self) -> None:
333
362
  # List of (combinator, compound_selector) tuples
334
363
  # First item has combinator=None
335
364
  self.parts = []
336
365
 
337
- def __repr__(self):
366
+ def __repr__(self) -> str:
338
367
  return f"ComplexSelector({self.parts!r})"
339
368
 
340
369
 
@@ -343,43 +372,55 @@ class SelectorList:
343
372
 
344
373
  __slots__ = ("selectors",)
345
374
 
346
- def __init__(self, selectors=None):
375
+ selectors: list[ComplexSelector]
376
+
377
+ def __init__(self, selectors: list[ComplexSelector] | None = None) -> None:
347
378
  self.selectors = selectors or []
348
379
 
349
- def __repr__(self):
380
+ def __repr__(self) -> str:
350
381
  return f"SelectorList({self.selectors!r})"
351
382
 
352
383
 
384
+ # Type alias for parsed selectors
385
+ ParsedSelector = ComplexSelector | SelectorList
386
+
387
+
353
388
  class SelectorParser:
354
389
  """Parses a list of tokens into a selector AST."""
355
390
 
356
391
  __slots__ = ("pos", "tokens")
357
392
 
358
- def __init__(self, tokens):
393
+ tokens: list[Token]
394
+ pos: int
395
+
396
+ def __init__(self, tokens: list[Token]) -> None:
359
397
  self.tokens = tokens
360
398
  self.pos = 0
361
399
 
362
- def _peek(self):
400
+ def _peek(self) -> Token:
363
401
  if self.pos < len(self.tokens):
364
402
  return self.tokens[self.pos]
365
403
  return Token(TokenType.EOF)
366
404
 
367
- def _advance(self):
405
+ def _advance(self) -> Token:
368
406
  token = self._peek()
369
407
  self.pos += 1
370
408
  return token
371
409
 
372
- def _expect(self, token_type):
410
+ def _expect(self, token_type: str) -> Token:
373
411
  token = self._peek()
374
412
  if token.type != token_type:
375
413
  raise SelectorError(f"Expected {token_type}, got {token.type}")
376
414
  return self._advance()
377
415
 
378
- def parse(self):
416
+ def parse(self) -> ParsedSelector:
379
417
  """Parse a complete selector (possibly comma-separated list)."""
380
- selectors = []
418
+ selectors: list[ComplexSelector] = []
381
419
  # parse_selector() validates non-empty input, so first selector always exists
382
- selectors.append(self._parse_complex_selector())
420
+ first = self._parse_complex_selector()
421
+ if first is None: # pragma: no cover
422
+ raise SelectorError("Empty selector")
423
+ selectors.append(first)
383
424
 
384
425
  while self._peek().type == TokenType.COMMA:
385
426
  self._advance() # consume comma
@@ -394,7 +435,7 @@ class SelectorParser:
394
435
  return selectors[0]
395
436
  return SelectorList(selectors)
396
437
 
397
- def _parse_complex_selector(self):
438
+ def _parse_complex_selector(self) -> ComplexSelector | None:
398
439
  """Parse a complex selector (compound selectors with combinators)."""
399
440
  complex_sel = ComplexSelector()
400
441
 
@@ -414,9 +455,9 @@ class SelectorParser:
414
455
 
415
456
  return complex_sel
416
457
 
417
- def _parse_compound_selector(self):
458
+ def _parse_compound_selector(self) -> CompoundSelector | None:
418
459
  """Parse a compound selector (sequence of simple selectors)."""
419
- simple_selectors = []
460
+ simple_selectors: list[SimpleSelector] = []
420
461
 
421
462
  while True:
422
463
  token = self._peek()
@@ -450,7 +491,7 @@ class SelectorParser:
450
491
  return None
451
492
  return CompoundSelector(simple_selectors)
452
493
 
453
- def _parse_attribute_selector(self):
494
+ def _parse_attribute_selector(self) -> SimpleSelector:
454
495
  """Parse an attribute selector [attr], [attr=value], etc."""
455
496
  self._expect(TokenType.ATTR_START)
456
497
 
@@ -467,7 +508,7 @@ class SelectorParser:
467
508
 
468
509
  return SimpleSelector(SimpleSelector.TYPE_ATTR, name=attr_name, operator=operator, value=value)
469
510
 
470
- def _parse_pseudo_selector(self):
511
+ def _parse_pseudo_selector(self) -> SimpleSelector:
471
512
  """Parse a pseudo-class selector like :first-child or :not(selector)."""
472
513
  self._expect(TokenType.COLON)
473
514
  name = self._expect(TokenType.TAG).value
@@ -475,7 +516,7 @@ class SelectorParser:
475
516
  # Functional pseudo-class
476
517
  if self._peek().type == TokenType.PAREN_OPEN:
477
518
  self._advance()
478
- arg = None
519
+ arg: str | None = None
479
520
  if self._peek().type == TokenType.STRING:
480
521
  arg = self._advance().value
481
522
  self._expect(TokenType.PAREN_CLOSE)
@@ -489,7 +530,15 @@ class SelectorMatcher:
489
530
 
490
531
  __slots__ = ()
491
532
 
492
- def matches(self, node, selector):
533
+ def _unquote_pseudo_arg(self, arg: str) -> str:
534
+ arg = arg.strip()
535
+ if len(arg) >= 2 and arg[0] == arg[-1] and arg[0] in ('"', "'"):
536
+ quote = arg[0]
537
+ # Minimal unescaping for common cases like :contains("click me")
538
+ return arg[1:-1].replace("\\" + quote, quote).replace("\\\\", "\\")
539
+ return arg
540
+
541
+ def matches(self, node: Any, selector: ParsedSelector | CompoundSelector | SimpleSelector) -> bool:
493
542
  """Check if a node matches a parsed selector."""
494
543
  if isinstance(selector, SelectorList):
495
544
  return any(self.matches(node, sel) for sel in selector.selectors)
@@ -501,7 +550,7 @@ class SelectorMatcher:
501
550
  return self._matches_simple(node, selector)
502
551
  return False
503
552
 
504
- def _matches_complex(self, node, selector):
553
+ def _matches_complex(self, node: Any, selector: ComplexSelector) -> bool:
505
554
  """Match a complex selector (with combinators)."""
506
555
  # Work backwards from the rightmost compound selector
507
556
  parts = selector.parts
@@ -557,11 +606,11 @@ class SelectorMatcher:
557
606
 
558
607
  return True
559
608
 
560
- def _matches_compound(self, node, compound):
609
+ def _matches_compound(self, node: Any, compound: CompoundSelector) -> bool:
561
610
  """Match a compound selector (all simple selectors must match)."""
562
611
  return all(self._matches_simple(node, simple) for simple in compound.selectors)
563
612
 
564
- def _matches_simple(self, node, selector):
613
+ def _matches_simple(self, node: Any, selector: SimpleSelector) -> bool:
565
614
  """Match a simple selector against a node."""
566
615
  # Text nodes and other non-element nodes don't match element selectors
567
616
  if not hasattr(node, "name") or node.name.startswith("#"):
@@ -574,7 +623,7 @@ class SelectorMatcher:
574
623
 
575
624
  if sel_type == SimpleSelector.TYPE_TAG:
576
625
  # HTML tag names are case-insensitive
577
- return node.name.lower() == selector.name.lower()
626
+ return bool(node.name.lower() == (selector.name.lower() if selector.name else ""))
578
627
 
579
628
  if sel_type == SimpleSelector.TYPE_ID:
580
629
  node_id = node.attrs.get("id", "") if node.attrs else ""
@@ -593,13 +642,13 @@ class SelectorMatcher:
593
642
 
594
643
  return False
595
644
 
596
- def _matches_attribute(self, node, selector):
645
+ def _matches_attribute(self, node: Any, selector: SimpleSelector) -> bool:
597
646
  """Match an attribute selector."""
598
647
  attrs = node.attrs or {}
599
- attr_name = selector.name.lower() # Attribute names are case-insensitive in HTML
648
+ attr_name = (selector.name or "").lower() # Attribute names are case-insensitive in HTML
600
649
 
601
650
  # Check if attribute exists (for any case)
602
- attr_value = None
651
+ attr_value: str | None = None
603
652
  for name, value in attrs.items():
604
653
  if name.lower() == attr_name:
605
654
  attr_value = value
@@ -612,7 +661,7 @@ class SelectorMatcher:
612
661
  if selector.operator is None:
613
662
  return True
614
663
 
615
- value = selector.value
664
+ value = selector.value or ""
616
665
  op = selector.operator
617
666
 
618
667
  if op == "=":
@@ -641,9 +690,9 @@ class SelectorMatcher:
641
690
 
642
691
  return False
643
692
 
644
- def _matches_pseudo(self, node, selector):
693
+ def _matches_pseudo(self, node: Any, selector: SimpleSelector) -> bool:
645
694
  """Match a pseudo-class selector."""
646
- name = selector.name.lower()
695
+ name = (selector.name or "").lower()
647
696
 
648
697
  if name == "first-child":
649
698
  return self._is_first_child(node)
@@ -684,6 +733,17 @@ class SelectorMatcher:
684
733
  return parent.name in ("#document", "#document-fragment")
685
734
  return False
686
735
 
736
+ if name == "contains":
737
+ if selector.arg is None:
738
+ raise SelectorError(":contains() requires a string argument")
739
+ needle = self._unquote_pseudo_arg(selector.arg)
740
+ if needle == "":
741
+ return True
742
+ # Non-standard (jQuery-style) pseudo-class: match elements whose descendant
743
+ # text contains the substring. We use `to_text()` to approximate textContent.
744
+ haystack: str = node.to_text(separator=" ", strip=True)
745
+ return needle in haystack
746
+
687
747
  if name == "first-of-type":
688
748
  return self._is_first_of_type(node)
689
749
 
@@ -699,43 +759,43 @@ class SelectorMatcher:
699
759
  # Unknown pseudo-class - don't match
700
760
  raise SelectorError(f"Unsupported pseudo-class: :{name}")
701
761
 
702
- def _get_element_children(self, parent):
762
+ def _get_element_children(self, parent: Any) -> list[Any]:
703
763
  """Get only element children (exclude text, comments, etc.)."""
704
764
  if not parent or not parent.has_child_nodes():
705
765
  return []
706
- return [c for c in parent.children if hasattr(c, "name") and not c.name.startswith("#")]
766
+ return [c for c in parent.children if not c.name.startswith("#")]
707
767
 
708
- def _get_previous_sibling(self, node):
768
+ def _get_previous_sibling(self, node: Any) -> Any | None:
709
769
  """Get the previous element sibling. Returns None if node is first or not found."""
710
770
  parent = node.parent
711
771
  if not parent:
712
772
  return None
713
773
 
714
- prev = None
774
+ prev: Any | None = None
715
775
  for child in parent.children:
716
776
  if child is node:
717
777
  return prev
718
- if hasattr(child, "name") and not child.name.startswith("#"):
778
+ if not child.name.startswith("#"):
719
779
  prev = child
720
780
  return None # node not in parent.children (detached)
721
781
 
722
- def _is_first_child(self, node):
782
+ def _is_first_child(self, node: Any) -> bool:
723
783
  """Check if node is the first element child of its parent."""
724
784
  parent = node.parent
725
785
  if not parent:
726
786
  return False
727
787
  elements = self._get_element_children(parent)
728
- return elements and elements[0] is node
788
+ return bool(elements) and elements[0] is node
729
789
 
730
- def _is_last_child(self, node):
790
+ def _is_last_child(self, node: Any) -> bool:
731
791
  """Check if node is the last element child of its parent."""
732
792
  parent = node.parent
733
793
  if not parent:
734
794
  return False
735
795
  elements = self._get_element_children(parent)
736
- return elements and elements[-1] is node
796
+ return bool(elements) and elements[-1] is node
737
797
 
738
- def _is_first_of_type(self, node):
798
+ def _is_first_of_type(self, node: Any) -> bool:
739
799
  """Check if node is the first sibling of its type."""
740
800
  parent = node.parent
741
801
  if not parent:
@@ -746,19 +806,19 @@ class SelectorMatcher:
746
806
  return child is node
747
807
  return False
748
808
 
749
- def _is_last_of_type(self, node):
809
+ def _is_last_of_type(self, node: Any) -> bool:
750
810
  """Check if node is the last sibling of its type."""
751
811
  parent = node.parent
752
812
  if not parent:
753
813
  return False
754
814
  node_name = node.name.lower()
755
- last_of_type = None
815
+ last_of_type: Any | None = None
756
816
  for child in self._get_element_children(parent):
757
817
  if child.name.lower() == node_name:
758
818
  last_of_type = child
759
819
  return last_of_type is node
760
820
 
761
- def _parse_nth_expression(self, expr):
821
+ def _parse_nth_expression(self, expr: str | None) -> tuple[int, int] | None:
762
822
  """Parse an nth-child expression like '2n+1', 'odd', 'even', '3'."""
763
823
  if not expr:
764
824
  return None
@@ -807,7 +867,7 @@ class SelectorMatcher:
807
867
 
808
868
  return (a, b)
809
869
 
810
- def _matches_nth(self, index, a, b):
870
+ def _matches_nth(self, index: int, a: int, b: int) -> bool:
811
871
  """Check if 1-based index matches An+B formula."""
812
872
  if a == 0:
813
873
  return index == b
@@ -819,7 +879,7 @@ class SelectorMatcher:
819
879
  # a < 0: need diff <= 0 and diff divisible by abs(a)
820
880
  return diff <= 0 and diff % a == 0
821
881
 
822
- def _matches_nth_child(self, node, arg):
882
+ def _matches_nth_child(self, node: Any, arg: str | None) -> bool:
823
883
  """Match :nth-child(An+B)."""
824
884
  parent = node.parent
825
885
  if not parent:
@@ -836,7 +896,7 @@ class SelectorMatcher:
836
896
  return self._matches_nth(i + 1, a, b)
837
897
  return False
838
898
 
839
- def _matches_nth_of_type(self, node, arg):
899
+ def _matches_nth_of_type(self, node: Any, arg: str | None) -> bool:
840
900
  """Match :nth-of-type(An+B)."""
841
901
  parent = node.parent
842
902
  if not parent:
@@ -858,22 +918,72 @@ class SelectorMatcher:
858
918
  return False
859
919
 
860
920
 
861
- def parse_selector(selector_string):
921
+ def parse_selector(selector_string: str) -> ParsedSelector:
862
922
  """Parse a CSS selector string into an AST."""
863
923
  if not selector_string or not selector_string.strip():
864
924
  raise SelectorError("Empty selector")
865
925
 
866
- tokenizer = SelectorTokenizer(selector_string.strip())
926
+ return _parse_selector_cached(selector_string.strip())
927
+
928
+
929
+ @lru_cache(maxsize=512)
930
+ def _parse_selector_cached(selector_string: str) -> ParsedSelector:
931
+ tokenizer = SelectorTokenizer(selector_string)
867
932
  tokens = tokenizer.tokenize()
868
933
  parser = SelectorParser(tokens)
869
934
  return parser.parse()
870
935
 
871
936
 
872
937
  # Global matcher instance
873
- _matcher = SelectorMatcher()
938
+ _matcher: SelectorMatcher = SelectorMatcher()
939
+
940
+
941
+ def _is_simple_tag_selector(selector: str) -> bool:
942
+ if not selector:
943
+ return False
944
+ ch0 = selector[0]
945
+ if not (ch0.isalpha() or ch0 == "_" or ch0 == "-" or ord(ch0) > 127):
946
+ return False
947
+ for ch in selector[1:]:
948
+ if ch.isalnum() or ch == "_" or ch == "-" or ord(ch) > 127:
949
+ continue
950
+ return False
951
+ return True
874
952
 
875
953
 
876
- def query(root, selector_string):
954
+ def _query_descendants_tag(node: Any, tag_lower: str, results: list[Any]) -> None:
955
+ results_append = results.append
956
+
957
+ stack: list[Any] = []
958
+
959
+ root_children = node.children
960
+ if root_children:
961
+ stack.extend(reversed(root_children))
962
+
963
+ if node.name == "template" and node.namespace == "html":
964
+ template_content = node.template_content
965
+ if template_content:
966
+ stack.append(template_content)
967
+
968
+ while stack:
969
+ current = stack.pop()
970
+
971
+ name = current.name
972
+ if not name.startswith("#"):
973
+ if name == tag_lower or name.lower() == tag_lower:
974
+ results_append(current)
975
+
976
+ children = current.children
977
+ if children:
978
+ stack.extend(reversed(children))
979
+
980
+ if name == "template" and current.namespace == "html":
981
+ template_content = current.template_content
982
+ if template_content:
983
+ stack.append(template_content)
984
+
985
+
986
+ def query(root: Any, selector_string: str) -> list[Any]:
877
987
  """
878
988
  Query the DOM tree starting from root, returning all matching elements.
879
989
 
@@ -887,30 +997,56 @@ def query(root, selector_string):
887
997
  Returns:
888
998
  A list of matching nodes
889
999
  """
890
- selector = parse_selector(selector_string)
891
- results = []
1000
+ selector_string = selector_string.strip()
1001
+ if not selector_string:
1002
+ raise SelectorError("Empty selector")
1003
+
1004
+ results: list[Any] = []
1005
+
1006
+ if _is_simple_tag_selector(selector_string):
1007
+ _query_descendants_tag(root, selector_string.lower(), results)
1008
+ return results
1009
+
1010
+ selector = _parse_selector_cached(selector_string)
892
1011
  _query_descendants(root, selector, results)
893
1012
  return results
894
1013
 
895
1014
 
896
- def _query_descendants(node, selector, results):
897
- """Recursively search for matching nodes in descendants."""
898
- # Only recurse into children (not the node itself)
899
- if node.has_child_nodes():
900
- for child in node.children:
901
- # Check if this child matches
902
- if hasattr(child, "name") and not child.name.startswith("#"):
903
- if _matcher.matches(child, selector):
904
- results.append(child)
905
- # Recurse into child's descendants
906
- _query_descendants(child, selector, results)
1015
+ def _query_descendants(node: Any, selector: ParsedSelector, results: list[Any]) -> None:
1016
+ """Search for matching nodes in descendants."""
1017
+ matcher_matches = _matcher.matches
1018
+ results_append = results.append
1019
+
1020
+ # querySelectorAll searches descendants of root, not including root itself.
1021
+ stack: list[Any] = []
1022
+
1023
+ root_children = node.children
1024
+ if root_children:
1025
+ stack.extend(reversed(root_children))
1026
+
1027
+ if node.name == "template" and node.namespace == "html":
1028
+ template_content = node.template_content
1029
+ if template_content:
1030
+ stack.append(template_content)
1031
+
1032
+ while stack:
1033
+ current = stack.pop()
1034
+
1035
+ name = current.name
1036
+ if not name.startswith("#") and matcher_matches(current, selector):
1037
+ results_append(current)
1038
+
1039
+ children = current.children
1040
+ if children:
1041
+ stack.extend(reversed(children))
907
1042
 
908
- # Also check template content if present
909
- if hasattr(node, "template_content") and node.template_content:
910
- _query_descendants(node.template_content, selector, results)
1043
+ if name == "template" and current.namespace == "html":
1044
+ template_content = current.template_content
1045
+ if template_content:
1046
+ stack.append(template_content)
911
1047
 
912
1048
 
913
- def matches(node, selector_string):
1049
+ def matches(node: Any, selector_string: str) -> bool:
914
1050
  """
915
1051
  Check if a node matches a CSS selector.
916
1052