justhtml 0.12.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

@@ -3,23 +3,30 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any, Literal
7
7
 
8
8
  from .constants import (
9
+ FORMAT_MARKER,
9
10
  FORMATTING_ELEMENTS,
10
11
  HEADING_ELEMENTS,
11
12
  )
12
13
  from .node import SimpleDomNode, TemplateNode
13
- from .tokens import CharacterTokens, CommentToken, EOFToken, Tag, TokenSinkResult
14
+ from .tokens import AnyToken, CharacterTokens, CommentToken, DoctypeToken, EOFToken, Tag, TokenSinkResult
14
15
  from .treebuilder_utils import (
15
16
  InsertionMode,
16
17
  doctype_error_and_quirks,
17
18
  is_all_whitespace,
18
19
  )
19
20
 
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ ModeResultTuple = tuple[str, InsertionMode, AnyToken] | tuple[str, InsertionMode, AnyToken, bool]
25
+ "Result is (instruction, mode, token) or (instruction, mode, token, force_html)"
26
+
20
27
 
21
28
  class TreeBuilderModesMixin:
22
- def _handle_doctype(self, token: Any) -> Any:
29
+ def _handle_doctype(self, token: DoctypeToken) -> Literal[0]:
23
30
  if self.mode != InsertionMode.INITIAL:
24
31
  self._parse_error("unexpected-doctype")
25
32
  return TokenSinkResult.Continue
@@ -37,7 +44,7 @@ class TreeBuilderModesMixin:
37
44
  self.mode = InsertionMode.BEFORE_HTML
38
45
  return TokenSinkResult.Continue
39
46
 
40
- def _mode_initial(self, token: Any) -> Any:
47
+ def _mode_initial(self, token: Any) -> ModeResultTuple | None:
41
48
  if isinstance(token, CharacterTokens):
42
49
  if is_all_whitespace(token.data):
43
50
  return None
@@ -54,13 +61,13 @@ class TreeBuilderModesMixin:
54
61
  return ("reprocess", InsertionMode.BEFORE_HTML, token)
55
62
  # Only Tags remain - no DOCTYPE seen, so quirks mode
56
63
  if token.kind == Tag.START:
57
- self._parse_error("expected-doctype-but-got-start-tag", tag_name=token.name, token=token)
64
+ self._parse_error("expected-doctype-but-got-start-tag", tag_name=token.name)
58
65
  else:
59
- self._parse_error("expected-doctype-but-got-end-tag", tag_name=token.name, token=token)
66
+ self._parse_error("expected-doctype-but-got-end-tag", tag_name=token.name)
60
67
  self._set_quirks_mode("quirks")
61
68
  return ("reprocess", InsertionMode.BEFORE_HTML, token)
62
69
 
63
- def _mode_before_html(self, token: Any) -> Any:
70
+ def _mode_before_html(self, token: AnyToken) -> ModeResultTuple | None:
64
71
  if isinstance(token, CharacterTokens) and is_all_whitespace(token.data):
65
72
  return None
66
73
  if isinstance(token, CommentToken):
@@ -93,7 +100,7 @@ class TreeBuilderModesMixin:
93
100
  self.mode = InsertionMode.BEFORE_HEAD
94
101
  return ("reprocess", InsertionMode.BEFORE_HEAD, token)
95
102
 
96
- def _mode_before_head(self, token: Any) -> Any:
103
+ def _mode_before_head(self, token: AnyToken) -> ModeResultTuple | None:
97
104
  if isinstance(token, CharacterTokens):
98
105
  data = token.data or ""
99
106
  if "\x00" in data:
@@ -136,7 +143,7 @@ class TreeBuilderModesMixin:
136
143
  self.mode = InsertionMode.IN_HEAD
137
144
  return ("reprocess", InsertionMode.IN_HEAD, token)
138
145
 
139
- def _mode_in_head(self, token: Any) -> Any:
146
+ def _mode_in_head(self, token: AnyToken) -> ModeResultTuple | None:
140
147
  if isinstance(token, CharacterTokens):
141
148
  if is_all_whitespace(token.data):
142
149
  self._append_text(token.data)
@@ -212,7 +219,7 @@ class TreeBuilderModesMixin:
212
219
  self.mode = InsertionMode.AFTER_HEAD
213
220
  return ("reprocess", InsertionMode.AFTER_HEAD, token)
214
221
 
215
- def _mode_in_head_noscript(self, token: Any) -> Any:
222
+ def _mode_in_head_noscript(self, token: AnyToken) -> ModeResultTuple | None:
216
223
  """Handle tokens in 'in head noscript' insertion mode (scripting disabled)."""
217
224
  if isinstance(token, CharacterTokens):
218
225
  data = token.data or ""
@@ -261,15 +268,11 @@ class TreeBuilderModesMixin:
261
268
  # All token types are handled above - CharacterTokens, CommentToken, Tag, EOFToken
262
269
  return None # pragma: no cover
263
270
 
264
- def _mode_after_head(self, token: Any) -> Any:
271
+ def _mode_after_head(self, token: AnyToken) -> ModeResultTuple | None:
265
272
  if isinstance(token, CharacterTokens):
266
273
  data = token.data or ""
267
274
  if "\x00" in data:
268
- self._parse_error("invalid-codepoint-in-body")
269
275
  data = data.replace("\x00", "")
270
- if "\x0c" in data:
271
- self._parse_error("invalid-codepoint-in-body")
272
- data = data.replace("\x0c", "")
273
276
  if not data or is_all_whitespace(data):
274
277
  if data:
275
278
  self._append_text(data)
@@ -331,6 +334,10 @@ class TreeBuilderModesMixin:
331
334
  self.mode = InsertionMode.IN_HEAD
332
335
  return ("reprocess", InsertionMode.IN_HEAD, token)
333
336
  if token.kind == Tag.END and token.name == "template":
337
+ has_template = any(node.name == "template" for node in self.open_elements)
338
+ if not has_template:
339
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
340
+ return None
334
341
  return self._mode_in_head(token)
335
342
  if token.kind == Tag.END and token.name == "body":
336
343
  self._insert_body_if_missing()
@@ -350,7 +357,7 @@ class TreeBuilderModesMixin:
350
357
  self._insert_body_if_missing()
351
358
  return ("reprocess", InsertionMode.IN_BODY, token)
352
359
 
353
- def _mode_text(self, token: Any) -> Any:
360
+ def _mode_text(self, token: AnyToken) -> ModeResultTuple | None:
354
361
  if isinstance(token, CharacterTokens):
355
362
  self._append_text(token.data)
356
363
  return None
@@ -366,11 +373,11 @@ class TreeBuilderModesMixin:
366
373
  self.mode = self.original_mode or InsertionMode.IN_BODY
367
374
  return None
368
375
 
369
- def _mode_in_body(self, token: Any) -> Any:
376
+ def _mode_in_body(self, token: Any) -> ModeResultTuple | None:
370
377
  handler = self._BODY_TOKEN_HANDLERS.get(type(token))
371
378
  return handler(self, token) if handler else None
372
379
 
373
- def _handle_characters_in_body(self, token: Any) -> Any:
380
+ def _handle_characters_in_body(self, token: CharacterTokens) -> None:
374
381
  data = token.data or ""
375
382
  if "\x00" in data:
376
383
  self._parse_error("invalid-codepoint")
@@ -384,11 +391,11 @@ class TreeBuilderModesMixin:
384
391
  self._append_text(data)
385
392
  return
386
393
 
387
- def _handle_comment_in_body(self, token: Any) -> Any:
394
+ def _handle_comment_in_body(self, token: CommentToken) -> None:
388
395
  self._append_comment(token.data)
389
396
  return
390
397
 
391
- def _handle_tag_in_body(self, token: Any) -> Any:
398
+ def _handle_tag_in_body(self, token: Tag) -> ModeResultTuple | None:
392
399
  if token.kind == Tag.START:
393
400
  handler = self._BODY_START_HANDLERS.get(token.name)
394
401
  if handler:
@@ -412,7 +419,7 @@ class TreeBuilderModesMixin:
412
419
  self._any_other_end_tag(token.name)
413
420
  return None
414
421
 
415
- def _handle_eof_in_body(self, token: Any) -> Any:
422
+ def _handle_eof_in_body(self, token: EOFToken) -> ModeResultTuple | None:
416
423
  # If we're in a template, handle EOF in template mode first
417
424
  if self.template_modes:
418
425
  return self._mode_in_template(token)
@@ -447,17 +454,19 @@ class TreeBuilderModesMixin:
447
454
  # Body mode start tag handlers
448
455
  # ---------------------
449
456
 
450
- def _handle_body_start_html(self, token: Any) -> Any:
457
+ def _handle_body_start_html(self, token: Tag) -> None:
451
458
  if self.template_modes:
452
459
  self._parse_error("unexpected-start-tag", tag_name=token.name)
453
460
  return
461
+ # Per spec: parse error; merge attributes onto existing <html>.
462
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
454
463
  # In IN_BODY mode, html element is always at open_elements[0]
455
464
  if self.open_elements: # pragma: no branch
456
465
  html = self.open_elements[0]
457
466
  self._add_missing_attributes(html, token.attrs)
458
467
  return
459
468
 
460
- def _handle_body_start_body(self, token: Any) -> Any:
469
+ def _handle_body_start_body(self, token: Tag) -> None:
461
470
  if self.template_modes:
462
471
  self._parse_error("unexpected-start-tag", tag_name=token.name)
463
472
  return
@@ -471,19 +480,19 @@ class TreeBuilderModesMixin:
471
480
  self.frameset_ok = False
472
481
  return
473
482
 
474
- def _handle_body_start_head(self, token: Any) -> Any:
483
+ def _handle_body_start_head(self, token: Tag) -> None:
475
484
  self._parse_error("unexpected-start-tag", tag_name=token.name)
476
485
  return
477
486
 
478
- def _handle_body_start_in_head(self, token: Any) -> Any:
487
+ def _handle_body_start_in_head(self, token: Tag) -> ModeResultTuple | None:
479
488
  return self._mode_in_head(token)
480
489
 
481
- def _handle_body_start_block_with_p(self, token: Any) -> Any:
490
+ def _handle_body_start_block_with_p(self, token: Tag) -> None:
482
491
  self._close_p_element()
483
492
  self._insert_element(token, push=True)
484
493
  return
485
494
 
486
- def _handle_body_start_heading(self, token: Any) -> Any:
495
+ def _handle_body_start_heading(self, token: Tag) -> None:
487
496
  self._close_p_element()
488
497
  if self.open_elements and self.open_elements[-1].name in HEADING_ELEMENTS:
489
498
  self._parse_error("unexpected-start-tag", tag_name=token.name)
@@ -492,14 +501,14 @@ class TreeBuilderModesMixin:
492
501
  self.frameset_ok = False
493
502
  return
494
503
 
495
- def _handle_body_start_pre_listing(self, token: Any) -> Any:
504
+ def _handle_body_start_pre_listing(self, token: Tag) -> None:
496
505
  self._close_p_element()
497
506
  self._insert_element(token, push=True)
498
507
  self.ignore_lf = True
499
508
  self.frameset_ok = False
500
509
  return
501
510
 
502
- def _handle_body_start_form(self, token: Any) -> Any:
511
+ def _handle_body_start_form(self, token: Tag) -> None:
503
512
  if self.form_element is not None:
504
513
  self._parse_error("unexpected-start-tag", tag_name=token.name)
505
514
  return
@@ -509,7 +518,7 @@ class TreeBuilderModesMixin:
509
518
  self.frameset_ok = False
510
519
  return
511
520
 
512
- def _handle_body_start_button(self, token: Any) -> Any:
521
+ def _handle_body_start_button(self, token: Tag) -> None:
513
522
  if self._has_in_scope("button"):
514
523
  self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=token.name)
515
524
  self._close_element_by_name("button")
@@ -517,19 +526,19 @@ class TreeBuilderModesMixin:
517
526
  self.frameset_ok = False
518
527
  return
519
528
 
520
- def _handle_body_start_paragraph(self, token: Any) -> Any:
529
+ def _handle_body_start_paragraph(self, token: Tag) -> None:
521
530
  self._close_p_element()
522
531
  self._insert_element(token, push=True)
523
532
  return
524
533
 
525
- def _handle_body_start_math(self, token: Any) -> Any:
534
+ def _handle_body_start_math(self, token: Tag) -> None:
526
535
  self._reconstruct_active_formatting_elements()
527
536
  attrs = self._prepare_foreign_attributes("math", token.attrs)
528
537
  new_tag = Tag(Tag.START, token.name, attrs, token.self_closing)
529
538
  self._insert_element(new_tag, push=not token.self_closing, namespace="math")
530
539
  return
531
540
 
532
- def _handle_body_start_svg(self, token: Any) -> Any:
541
+ def _handle_body_start_svg(self, token: Tag) -> None:
533
542
  self._reconstruct_active_formatting_elements()
534
543
  adjusted_name = self._adjust_svg_tag_name(token.name)
535
544
  attrs = self._prepare_foreign_attributes("svg", token.attrs)
@@ -537,7 +546,7 @@ class TreeBuilderModesMixin:
537
546
  self._insert_element(new_tag, push=not token.self_closing, namespace="svg")
538
547
  return
539
548
 
540
- def _handle_body_start_li(self, token: Any) -> Any:
549
+ def _handle_body_start_li(self, token: Tag) -> None:
541
550
  self.frameset_ok = False
542
551
  self._close_p_element()
543
552
  if self._has_in_list_item_scope("li"):
@@ -545,7 +554,7 @@ class TreeBuilderModesMixin:
545
554
  self._insert_element(token, push=True)
546
555
  return
547
556
 
548
- def _handle_body_start_dd_dt(self, token: Any) -> Any:
557
+ def _handle_body_start_dd_dt(self, token: Tag) -> None:
549
558
  self.frameset_ok = False
550
559
  self._close_p_element()
551
560
  name = token.name
@@ -574,6 +583,10 @@ class TreeBuilderModesMixin:
574
583
  # 3. Find formatting element
575
584
  formatting_element_index = self._find_active_formatting_index(subject)
576
585
  if formatting_element_index is None:
586
+ # html5lib reports a parse error when an end tag for a formatting
587
+ # element triggers the adoption agency algorithm but no matching
588
+ # active formatting entry exists.
589
+ self._parse_error("adoption-agency-1.3")
577
590
  return
578
591
 
579
592
  formatting_element_entry = self.active_formatting[formatting_element_index]
@@ -607,7 +620,7 @@ class TreeBuilderModesMixin:
607
620
  if furthest_block is None:
608
621
  # formatting_element is known to be on the stack
609
622
  while True:
610
- popped = self.open_elements.pop()
623
+ popped = self._pop_current()
611
624
  if popped is formatting_element:
612
625
  break
613
626
  self._remove_formatting_entry(formatting_element_index)
@@ -651,6 +664,10 @@ class TreeBuilderModesMixin:
651
664
  # 10.4 Replace entry with new element
652
665
  entry = self.active_formatting[node_formatting_index]
653
666
  new_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
667
+ if self.tokenizer is not None and self.tokenizer.track_node_locations:
668
+ new_element._origin_pos = entry["node"].origin_offset
669
+ new_element._origin_line = entry["node"].origin_line
670
+ new_element._origin_col = entry["node"].origin_col
654
671
  entry["node"] = new_element
655
672
  self.open_elements[self.open_elements.index(node)] = new_element
656
673
  node = new_element
@@ -684,6 +701,10 @@ class TreeBuilderModesMixin:
684
701
  # 12. Create new formatting element
685
702
  entry = self.active_formatting[formatting_element_index]
686
703
  new_formatting_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
704
+ if self.tokenizer is not None and self.tokenizer.track_node_locations:
705
+ new_formatting_element._origin_pos = entry["node"].origin_offset
706
+ new_formatting_element._origin_line = entry["node"].origin_line
707
+ new_formatting_element._origin_col = entry["node"].origin_col
687
708
  entry["node"] = new_formatting_element
688
709
 
689
710
  # 13. Move children of furthest block
@@ -706,8 +727,9 @@ class TreeBuilderModesMixin:
706
727
  furthest_block_index = self.open_elements.index(furthest_block)
707
728
  self.open_elements.insert(furthest_block_index + 1, new_formatting_element)
708
729
 
709
- def _handle_body_start_a(self, token: Any) -> Any:
730
+ def _handle_body_start_a(self, token: Tag) -> None:
710
731
  if self._has_active_formatting_entry("a"):
732
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=token.name)
711
733
  self._adoption_agency("a")
712
734
  self._remove_last_active_formatting_by_name("a")
713
735
  self._remove_last_open_element_by_name("a")
@@ -716,7 +738,7 @@ class TreeBuilderModesMixin:
716
738
  self._append_active_formatting_entry("a", token.attrs, node)
717
739
  return
718
740
 
719
- def _handle_body_start_formatting(self, token: Any) -> Any:
741
+ def _handle_body_start_formatting(self, token: Tag) -> None:
720
742
  name = token.name
721
743
  if name == "nobr" and self._in_scope("nobr"):
722
744
  self._adoption_agency("nobr")
@@ -730,21 +752,21 @@ class TreeBuilderModesMixin:
730
752
  self._append_active_formatting_entry(name, token.attrs, node)
731
753
  return
732
754
 
733
- def _handle_body_start_applet_like(self, token: Any) -> Any:
755
+ def _handle_body_start_applet_like(self, token: Tag) -> None:
734
756
  self._reconstruct_active_formatting_elements()
735
757
  self._insert_element(token, push=True)
736
758
  self._push_formatting_marker()
737
759
  self.frameset_ok = False
738
760
  return
739
761
 
740
- def _handle_body_start_br(self, token: Any) -> Any:
762
+ def _handle_body_start_br(self, token: Tag) -> None:
741
763
  self._close_p_element()
742
764
  self._reconstruct_active_formatting_elements()
743
765
  self._insert_element(token, push=False)
744
766
  self.frameset_ok = False
745
767
  return
746
768
 
747
- def _handle_body_start_frameset(self, token: Any) -> Any:
769
+ def _handle_body_start_frameset(self, token: Tag) -> None:
748
770
  if not self.frameset_ok:
749
771
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
750
772
  return
@@ -769,17 +791,17 @@ class TreeBuilderModesMixin:
769
791
  # Body mode end tag handlers
770
792
  # ---------------------
771
793
 
772
- def _handle_body_end_body(self, token: Any) -> Any:
794
+ def _handle_body_end_body(self, token: Tag) -> None:
773
795
  if self._in_scope("body"):
774
796
  self.mode = InsertionMode.AFTER_BODY
775
797
  return
776
798
 
777
- def _handle_body_end_html(self, token: Any) -> Any:
799
+ def _handle_body_end_html(self, token: Tag) -> ModeResultTuple | None:
778
800
  if self._in_scope("body"):
779
801
  return ("reprocess", InsertionMode.AFTER_BODY, token)
780
802
  return None
781
803
 
782
- def _handle_body_end_p(self, token: Any) -> Any:
804
+ def _handle_body_end_p(self, token: Tag) -> None:
783
805
  if not self._close_p_element():
784
806
  self._parse_error("unexpected-end-tag", tag_name=token.name)
785
807
  phantom = Tag(Tag.START, "p", {}, False)
@@ -787,21 +809,21 @@ class TreeBuilderModesMixin:
787
809
  self._close_p_element()
788
810
  return
789
811
 
790
- def _handle_body_end_li(self, token: Any) -> Any:
812
+ def _handle_body_end_li(self, token: Tag) -> None:
791
813
  if not self._has_in_list_item_scope("li"):
792
814
  self._parse_error("unexpected-end-tag", tag_name=token.name)
793
815
  return
794
816
  self._pop_until_any_inclusive({"li"})
795
817
  return
796
818
 
797
- def _handle_body_end_dd_dt(self, token: Any) -> Any:
819
+ def _handle_body_end_dd_dt(self, token: Tag) -> None:
798
820
  name = token.name
799
821
  if not self._has_in_definition_scope(name):
800
822
  self._parse_error("unexpected-end-tag", tag_name=name)
801
823
  return
802
824
  self._pop_until_any_inclusive({"dd", "dt"})
803
825
 
804
- def _handle_body_end_form(self, token: Any) -> Any:
826
+ def _handle_body_end_form(self, token: Tag) -> None:
805
827
  if self.form_element is None:
806
828
  self._parse_error("unexpected-end-tag", tag_name=token.name)
807
829
  return
@@ -811,20 +833,20 @@ class TreeBuilderModesMixin:
811
833
  self._parse_error("unexpected-end-tag", tag_name=token.name)
812
834
  return
813
835
 
814
- def _handle_body_end_applet_like(self, token: Any) -> Any:
836
+ def _handle_body_end_applet_like(self, token: Tag) -> None:
815
837
  name = token.name
816
838
  if not self._in_scope(name):
817
839
  self._parse_error("unexpected-end-tag", tag_name=name)
818
840
  return
819
841
  # Element verified in scope above
820
842
  while self.open_elements: # pragma: no branch
821
- popped = self.open_elements.pop()
843
+ popped = self._pop_current()
822
844
  if popped.name == name:
823
845
  break
824
846
  self._clear_active_formatting_up_to_marker()
825
847
  return
826
848
 
827
- def _handle_body_end_heading(self, token: Any) -> Any:
849
+ def _handle_body_end_heading(self, token: Tag) -> None:
828
850
  name = token.name
829
851
  if not self._has_any_in_scope(HEADING_ELEMENTS):
830
852
  self._parse_error("unexpected-end-tag", tag_name=name)
@@ -834,12 +856,12 @@ class TreeBuilderModesMixin:
834
856
  self._parse_error("end-tag-too-early", tag_name=name)
835
857
  # Heading verified in scope by caller
836
858
  while self.open_elements: # pragma: no branch
837
- popped = self.open_elements.pop()
859
+ popped = self._pop_current()
838
860
  if popped.name in HEADING_ELEMENTS:
839
861
  break
840
862
  return
841
863
 
842
- def _handle_body_end_block(self, token: Any) -> Any:
864
+ def _handle_body_end_block(self, token: Tag) -> None:
843
865
  name = token.name
844
866
  if not self._in_scope(name):
845
867
  self._parse_error("unexpected-end-tag", tag_name=name)
@@ -850,9 +872,10 @@ class TreeBuilderModesMixin:
850
872
  self._pop_until_any_inclusive({name})
851
873
  return
852
874
 
853
- def _handle_body_end_template(self, token: Any) -> Any:
875
+ def _handle_body_end_template(self, token: Tag) -> None:
854
876
  has_template = any(node.name == "template" for node in self.open_elements)
855
877
  if not has_template:
878
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
856
879
  return
857
880
  self._generate_implied_end_tags()
858
881
  self._pop_until_inclusive("template")
@@ -863,18 +886,18 @@ class TreeBuilderModesMixin:
863
886
  self._reset_insertion_mode()
864
887
  return
865
888
 
866
- def _handle_body_start_structure_ignored(self, token: Any) -> Any:
889
+ def _handle_body_start_structure_ignored(self, token: Tag) -> None:
867
890
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
868
891
  return
869
892
 
870
- def _handle_body_start_col_or_frame(self, token: Any) -> Any:
893
+ def _handle_body_start_col_or_frame(self, token: Tag) -> None:
871
894
  if self.fragment_context is None:
872
895
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
873
896
  return
874
897
  self._insert_element(token, push=False)
875
898
  return
876
899
 
877
- def _handle_body_start_image(self, token: Any) -> Any:
900
+ def _handle_body_start_image(self, token: Tag) -> None:
878
901
  self._parse_error("image-start-tag", tag_name=token.name)
879
902
  img_token = Tag(Tag.START, "img", token.attrs, token.self_closing)
880
903
  self._reconstruct_active_formatting_elements()
@@ -882,17 +905,17 @@ class TreeBuilderModesMixin:
882
905
  self.frameset_ok = False
883
906
  return
884
907
 
885
- def _handle_body_start_void_with_formatting(self, token: Any) -> Any:
908
+ def _handle_body_start_void_with_formatting(self, token: Tag) -> None:
886
909
  self._reconstruct_active_formatting_elements()
887
910
  self._insert_element(token, push=False)
888
911
  self.frameset_ok = False
889
912
  return
890
913
 
891
- def _handle_body_start_simple_void(self, token: Any) -> Any:
914
+ def _handle_body_start_simple_void(self, token: Tag) -> None:
892
915
  self._insert_element(token, push=False)
893
916
  return
894
917
 
895
- def _handle_body_start_input(self, token: Any) -> Any:
918
+ def _handle_body_start_input(self, token: Tag) -> None:
896
919
  input_type = None
897
920
  for name, value in token.attrs.items():
898
921
  if name == "type":
@@ -903,7 +926,7 @@ class TreeBuilderModesMixin:
903
926
  self.frameset_ok = False
904
927
  return
905
928
 
906
- def _handle_body_start_table(self, token: Any) -> Any:
929
+ def _handle_body_start_table(self, token: Tag) -> None:
907
930
  if self.quirks_mode != "quirks":
908
931
  self._close_p_element()
909
932
  self._insert_element(token, push=True)
@@ -911,7 +934,7 @@ class TreeBuilderModesMixin:
911
934
  self.mode = InsertionMode.IN_TABLE
912
935
  return
913
936
 
914
- def _handle_body_start_plaintext_xmp(self, token: Any) -> Any:
937
+ def _handle_body_start_plaintext_xmp(self, token: Tag) -> None:
915
938
  self._close_p_element()
916
939
  self._insert_element(token, push=True)
917
940
  self.frameset_ok = False
@@ -923,66 +946,88 @@ class TreeBuilderModesMixin:
923
946
  self.mode = InsertionMode.TEXT
924
947
  return
925
948
 
926
- def _handle_body_start_textarea(self, token: Any) -> Any:
949
+ def _handle_body_start_textarea(self, token: Tag) -> None:
927
950
  self._insert_element(token, push=True)
928
951
  self.ignore_lf = True
929
952
  self.frameset_ok = False
930
953
  return
931
954
 
932
- def _handle_body_start_select(self, token: Any) -> Any:
955
+ def _handle_body_start_select(self, token: Tag) -> None:
933
956
  self._reconstruct_active_formatting_elements()
934
957
  self._insert_element(token, push=True)
935
958
  self.frameset_ok = False
936
959
  self._reset_insertion_mode()
937
960
  return
938
961
 
939
- def _handle_body_start_option(self, token: Any) -> Any:
962
+ def _handle_body_start_option(self, token: Tag) -> None:
940
963
  if self.open_elements and self.open_elements[-1].name == "option":
941
- self.open_elements.pop()
964
+ self._pop_current()
942
965
  self._reconstruct_active_formatting_elements()
943
966
  self._insert_element(token, push=True)
944
967
  return
945
968
 
946
- def _handle_body_start_optgroup(self, token: Any) -> Any:
969
+ def _handle_body_start_optgroup(self, token: Tag) -> None:
947
970
  if self.open_elements and self.open_elements[-1].name == "option":
948
- self.open_elements.pop()
971
+ self._pop_current()
949
972
  self._reconstruct_active_formatting_elements()
950
973
  self._insert_element(token, push=True)
951
974
  return
952
975
 
953
- def _handle_body_start_rp_rt(self, token: Any) -> Any:
976
+ def _handle_body_start_rp_rt(self, token: Tag) -> None:
954
977
  self._generate_implied_end_tags(exclude="rtc")
955
978
  self._insert_element(token, push=True)
956
979
  return
957
980
 
958
- def _handle_body_start_rb_rtc(self, token: Any) -> Any:
981
+ def _handle_body_start_rb_rtc(self, token: Tag) -> None:
959
982
  if self.open_elements and self.open_elements[-1].name in {"rb", "rp", "rt", "rtc"}:
960
983
  self._generate_implied_end_tags()
961
984
  self._insert_element(token, push=True)
962
985
  return
963
986
 
964
- def _handle_body_start_table_parse_error(self, token: Any) -> Any:
987
+ def _handle_body_start_table_parse_error(self, token: Tag) -> None:
965
988
  self._parse_error("unexpected-start-tag", tag_name=token.name)
966
989
  return
967
990
 
968
- def _handle_body_start_default(self, token: Any) -> Any:
991
+ def _handle_body_start_default(self, token: Tag) -> ModeResultTuple | None:
969
992
  self._reconstruct_active_formatting_elements()
970
993
  self._insert_element(token, push=True)
971
994
  if token.self_closing:
972
995
  self._parse_error("non-void-html-element-start-tag-with-trailing-solidus", tag_name=token.name)
973
996
  # Elements reaching here have no handler - never in FRAMESET_NEUTRAL/FORMATTING_ELEMENTS
974
997
  self.frameset_ok = False
975
- return
998
+ return None
976
999
 
977
- def _mode_in_table(self, token: Any) -> Any:
1000
+ def _mode_in_table(self, token: AnyToken) -> ModeResultTuple | None:
978
1001
  if isinstance(token, CharacterTokens):
979
1002
  data = token.data or ""
980
1003
  if "\x00" in data:
981
- self._parse_error("unexpected-null-character")
982
1004
  data = data.replace("\x00", "")
983
1005
  if not data:
984
1006
  return None
985
1007
  token = CharacterTokens(data)
1008
+
1009
+ if is_all_whitespace(data):
1010
+ self._append_text(data)
1011
+ return None
1012
+
1013
+ # html5lib-tests expect that some table foster-parenting text triggered by a
1014
+ # misnested formatting element (<a>) only produces an implied-end-tag error
1015
+ # when the table closes, not an additional character-in-table error.
1016
+ suppress_table_char_error = False
1017
+ if self.active_formatting:
1018
+ for idx in range(len(self.active_formatting) - 1, -1, -1):
1019
+ entry = self.active_formatting[idx]
1020
+ if entry is FORMAT_MARKER:
1021
+ break
1022
+ if entry["name"] == "a":
1023
+ if entry["node"] not in self.open_elements:
1024
+ suppress_table_char_error = True
1025
+ break
1026
+
1027
+ if not suppress_table_char_error:
1028
+ self.pending_table_text_should_error = True
1029
+ else:
1030
+ self.pending_table_text_should_error = False
986
1031
  self.pending_table_text = []
987
1032
  self.table_text_original_mode = self.mode
988
1033
  self.mode = InsertionMode.IN_TABLE_TEXT
@@ -1046,16 +1091,16 @@ class TreeBuilderModesMixin:
1046
1091
  if input_type == "hidden":
1047
1092
  self._parse_error("unexpected-hidden-input-in-table")
1048
1093
  self._insert_element(token, push=True)
1049
- self.open_elements.pop() # push=True always adds to stack
1094
+ self._pop_current() # push=True always adds to stack
1050
1095
  return None
1051
1096
  if name == "form":
1052
1097
  self._parse_error("unexpected-form-in-table")
1053
1098
  if self.form_element is None:
1054
1099
  node = self._insert_element(token, push=True)
1055
1100
  self.form_element = node
1056
- self.open_elements.pop() # push=True always adds to stack
1101
+ self._pop_current() # push=True always adds to stack
1057
1102
  return None
1058
- self._parse_error("unexpected-start-tag-implies-table-voodoo", tag_name=name)
1103
+ self._parse_error("foster-parenting-start-tag", tag_name=name)
1059
1104
  previous = self.insert_from_table
1060
1105
  self.insert_from_table = True
1061
1106
  try:
@@ -1082,26 +1127,40 @@ class TreeBuilderModesMixin:
1082
1127
  if self.template_modes:
1083
1128
  return self._mode_in_template(token)
1084
1129
  if self._has_in_table_scope("table"):
1085
- self._parse_error("expected-closing-tag-but-got-eof", tag_name="table")
1130
+ self._parse_error("eof-in-table")
1086
1131
  return None
1087
1132
 
1088
- def _mode_in_table_text(self, token: Any) -> Any:
1133
+ def _mode_in_table_text(self, token: AnyToken) -> ModeResultTuple | None:
1089
1134
  if isinstance(token, CharacterTokens):
1090
1135
  # IN_TABLE mode guarantees non-empty data
1091
1136
  data = token.data
1092
- if "\x0c" in data:
1093
- self._parse_error("invalid-codepoint-in-table-text")
1094
- data = data.replace("\x0c", "")
1095
- if data:
1096
- self.pending_table_text.append(data)
1137
+ self.pending_table_text.append(data)
1097
1138
  return None
1139
+
1140
+ if (
1141
+ self.pending_table_text
1142
+ and isinstance(token, Tag)
1143
+ and token.kind == Tag.END
1144
+ and token.name == "table"
1145
+ and not is_all_whitespace("".join(self.pending_table_text))
1146
+ ):
1147
+ # If a misnested <a> exists only in the active formatting list, html5lib
1148
+ # reports the implied close when the table ends.
1149
+ if self.active_formatting:
1150
+ for idx in range(len(self.active_formatting) - 1, -1, -1):
1151
+ entry = self.active_formatting[idx]
1152
+ if entry is FORMAT_MARKER:
1153
+ break
1154
+ if entry["name"] == "a" and entry["node"] not in self.open_elements:
1155
+ self._parse_error("unexpected-implied-end-tag-in-table-view")
1156
+ break
1098
1157
  self._flush_pending_table_text()
1099
1158
  original = self.table_text_original_mode or InsertionMode.IN_TABLE
1100
1159
  self.table_text_original_mode = None
1101
1160
  self.mode = original
1102
1161
  return ("reprocess", original, token)
1103
1162
 
1104
- def _mode_in_caption(self, token: Any) -> Any:
1163
+ def _mode_in_caption(self, token: AnyToken) -> ModeResultTuple | None:
1105
1164
  if isinstance(token, CharacterTokens):
1106
1165
  return self._mode_in_body(token)
1107
1166
  if isinstance(token, CommentToken):
@@ -1147,14 +1206,14 @@ class TreeBuilderModesMixin:
1147
1206
  self._generate_implied_end_tags()
1148
1207
  # Caption verified in scope above
1149
1208
  while self.open_elements: # pragma: no branch
1150
- node = self.open_elements.pop()
1209
+ node = self._pop_current()
1151
1210
  if node.name == "caption":
1152
1211
  break
1153
1212
  self._clear_active_formatting_up_to_marker()
1154
1213
  self.mode = InsertionMode.IN_TABLE
1155
1214
  return True
1156
1215
 
1157
- def _mode_in_column_group(self, token: Any) -> Any:
1216
+ def _mode_in_column_group(self, token: AnyToken) -> ModeResultTuple | None:
1158
1217
  current = self.open_elements[-1] if self.open_elements else None
1159
1218
  if isinstance(token, CharacterTokens):
1160
1219
  data = token.data or ""
@@ -1191,7 +1250,7 @@ class TreeBuilderModesMixin:
1191
1250
  return self._mode_in_body(token)
1192
1251
  if name == "col":
1193
1252
  self._insert_element(token, push=True)
1194
- self.open_elements.pop() # push=True always adds to stack
1253
+ self._pop_current() # push=True always adds to stack
1195
1254
  return None
1196
1255
  if name == "template":
1197
1256
  # Template is handled by delegating to IN_HEAD
@@ -1249,7 +1308,7 @@ class TreeBuilderModesMixin:
1249
1308
  return None
1250
1309
  # Per spec: EOF when current is html - implicit None return
1251
1310
 
1252
- def _mode_in_table_body(self, token: Any) -> Any:
1311
+ def _mode_in_table_body(self, token: AnyToken) -> ModeResultTuple | None:
1253
1312
  if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1254
1313
  return self._mode_in_table(token)
1255
1314
  if isinstance(token, Tag):
@@ -1284,7 +1343,7 @@ class TreeBuilderModesMixin:
1284
1343
  return None
1285
1344
  # Pop tbody/tfoot/thead (stack always has elements here in normal parsing)
1286
1345
  if self.open_elements:
1287
- self.open_elements.pop()
1346
+ self._pop_current()
1288
1347
  self.mode = InsertionMode.IN_TABLE
1289
1348
  return ("reprocess", InsertionMode.IN_TABLE, token)
1290
1349
  # Empty stack edge case - go directly to IN_TABLE without reprocess
@@ -1315,7 +1374,7 @@ class TreeBuilderModesMixin:
1315
1374
  self._parse_error("unexpected-end-tag", tag_name=token.name)
1316
1375
  return None
1317
1376
  if current and current.name in {"tbody", "tfoot", "thead"}:
1318
- self.open_elements.pop()
1377
+ self._pop_current()
1319
1378
  self.mode = InsertionMode.IN_TABLE
1320
1379
  return ("reprocess", InsertionMode.IN_TABLE, token)
1321
1380
  if name in {"caption", "col", "colgroup", "td", "th", "tr"}:
@@ -1325,7 +1384,7 @@ class TreeBuilderModesMixin:
1325
1384
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1326
1385
  return self._mode_in_table(token)
1327
1386
 
1328
- def _mode_in_row(self, token: Any) -> Any:
1387
+ def _mode_in_row(self, token: AnyToken) -> ModeResultTuple | None:
1329
1388
  if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1330
1389
  return self._mode_in_table(token)
1331
1390
  if isinstance(token, Tag):
@@ -1378,14 +1437,14 @@ class TreeBuilderModesMixin:
1378
1437
  self._clear_stack_until({"tr", "template", "html"})
1379
1438
  # Pop tr if on top (may not be if stack was exhausted)
1380
1439
  if self.open_elements and self.open_elements[-1].name == "tr":
1381
- self.open_elements.pop()
1440
+ self._pop_current()
1382
1441
  # When in a template, restore template mode; otherwise use IN_TABLE_BODY
1383
1442
  if self.template_modes:
1384
1443
  self.mode = self.template_modes[-1]
1385
1444
  else:
1386
1445
  self.mode = InsertionMode.IN_TABLE_BODY
1387
1446
 
1388
- def _mode_in_cell(self, token: Any) -> Any:
1447
+ def _mode_in_cell(self, token: AnyToken) -> ModeResultTuple | None:
1389
1448
  if isinstance(token, CharacterTokens):
1390
1449
  previous = self.insert_from_table
1391
1450
  self.insert_from_table = False
@@ -1439,15 +1498,11 @@ class TreeBuilderModesMixin:
1439
1498
  return ("reprocess", self.mode, token)
1440
1499
  return self._mode_in_table(token)
1441
1500
 
1442
- def _mode_in_select(self, token: Any) -> Any:
1501
+ def _mode_in_select(self, token: AnyToken) -> ModeResultTuple | None:
1443
1502
  if isinstance(token, CharacterTokens):
1444
1503
  data = token.data or ""
1445
1504
  if "\x00" in data:
1446
- self._parse_error("invalid-codepoint-in-select")
1447
1505
  data = data.replace("\x00", "")
1448
- if "\x0c" in data:
1449
- self._parse_error("invalid-codepoint-in-select")
1450
- data = data.replace("\x0c", "")
1451
1506
  if data:
1452
1507
  self._reconstruct_active_formatting_elements()
1453
1508
  self._append_text(data)
@@ -1462,26 +1517,26 @@ class TreeBuilderModesMixin:
1462
1517
  return ("reprocess", InsertionMode.IN_BODY, token)
1463
1518
  if name == "option":
1464
1519
  if self.open_elements and self.open_elements[-1].name == "option":
1465
- self.open_elements.pop()
1520
+ self._pop_current()
1466
1521
  self._reconstruct_active_formatting_elements()
1467
1522
  self._insert_element(token, push=True)
1468
1523
  return None
1469
1524
  if name == "optgroup":
1470
1525
  if self.open_elements and self.open_elements[-1].name == "option":
1471
- self.open_elements.pop()
1526
+ self._pop_current()
1472
1527
  if self.open_elements and self.open_elements[-1].name == "optgroup":
1473
- self.open_elements.pop()
1528
+ self._pop_current()
1474
1529
  self._reconstruct_active_formatting_elements()
1475
1530
  self._insert_element(token, push=True)
1476
1531
  return None
1477
1532
  if name == "select":
1478
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1533
+ self._parse_error("unexpected-select-in-select")
1479
1534
  # select is always in scope in IN_SELECT mode
1480
1535
  self._pop_until_any_inclusive({"select"})
1481
1536
  self._reset_insertion_mode()
1482
1537
  return None
1483
1538
  if name in {"input", "textarea"}:
1484
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1539
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1485
1540
  # select is always in scope in IN_SELECT mode
1486
1541
  self._pop_until_any_inclusive({"select"})
1487
1542
  self._reset_insertion_mode()
@@ -1491,7 +1546,7 @@ class TreeBuilderModesMixin:
1491
1546
  self._insert_element(token, push=False)
1492
1547
  return None
1493
1548
  if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1494
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1549
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1495
1550
  # select is always in scope in IN_SELECT mode
1496
1551
  self._pop_until_any_inclusive({"select"})
1497
1552
  self._reset_insertion_mode()
@@ -1509,45 +1564,53 @@ class TreeBuilderModesMixin:
1509
1564
  self._append_active_formatting_entry(name, token.attrs, node)
1510
1565
  return None
1511
1566
  if name == "hr":
1567
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1512
1568
  # Per spec: pop option and optgroup before inserting hr (makes hr sibling, not child)
1513
1569
  if self.open_elements and self.open_elements[-1].name == "option":
1514
- self.open_elements.pop()
1570
+ self._pop_current()
1515
1571
  if self.open_elements and self.open_elements[-1].name == "optgroup":
1516
- self.open_elements.pop()
1572
+ self._pop_current()
1517
1573
  self._reconstruct_active_formatting_elements()
1518
1574
  self._insert_element(token, push=False)
1519
1575
  return None
1520
1576
  if name == "menuitem":
1577
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1521
1578
  self._reconstruct_active_formatting_elements()
1522
1579
  self._insert_element(token, push=True)
1523
1580
  return None
1524
1581
  # Allow common HTML elements in select (newer spec)
1525
1582
  if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1583
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1526
1584
  self._reconstruct_active_formatting_elements()
1527
1585
  self._insert_element(token, push=not token.self_closing)
1528
1586
  return None
1529
1587
  if name in {"br", "img"}:
1588
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1530
1589
  self._reconstruct_active_formatting_elements()
1531
1590
  self._insert_element(token, push=False)
1532
1591
  return None
1533
1592
  if name == "plaintext":
1534
1593
  # Per spec: plaintext element is inserted in select (consumes all remaining text)
1594
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1535
1595
  self._reconstruct_active_formatting_elements()
1536
1596
  self._insert_element(token, push=True)
1597
+ return None
1598
+ # Any other start tag: parse error, ignore.
1599
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1537
1600
  return None
1538
1601
  if name == "optgroup":
1539
1602
  if self.open_elements and self.open_elements[-1].name == "option":
1540
- self.open_elements.pop()
1603
+ self._pop_current()
1541
1604
  if self.open_elements and self.open_elements[-1].name == "optgroup":
1542
- self.open_elements.pop()
1605
+ self._pop_current()
1543
1606
  else:
1544
- self._parse_error("unexpected-end-tag", tag_name=token.name)
1607
+ self._parse_error("unexpected-end-tag-in-select", tag_name=token.name)
1545
1608
  return None
1546
1609
  if name == "option":
1547
1610
  if self.open_elements and self.open_elements[-1].name == "option":
1548
- self.open_elements.pop()
1611
+ self._pop_current()
1549
1612
  else:
1550
- self._parse_error("unexpected-end-tag", tag_name=token.name)
1613
+ self._parse_error("unexpected-end-tag-in-select", tag_name=token.name)
1551
1614
  return None
1552
1615
  if name == "select":
1553
1616
  # In IN_SELECT mode, select is always in scope - pop to it
@@ -1559,17 +1622,20 @@ class TreeBuilderModesMixin:
1559
1622
  # select is always on stack in IN_SELECT mode
1560
1623
  select_node = self._find_last_on_stack("select")
1561
1624
  fmt_index = self._find_active_formatting_index(name)
1562
- if fmt_index is not None:
1563
- target = self.active_formatting[fmt_index]["node"]
1564
- if target in self.open_elements: # pragma: no branch
1565
- select_index = self.open_elements.index(select_node)
1566
- target_index = self.open_elements.index(target)
1567
- if target_index < select_index:
1568
- self._parse_error("unexpected-end-tag", tag_name=name)
1569
- return None
1625
+ if fmt_index is None:
1626
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1627
+ return None
1628
+ target = self.active_formatting[fmt_index]["node"]
1629
+ if target in self.open_elements: # pragma: no branch
1630
+ select_index = self.open_elements.index(select_node)
1631
+ target_index = self.open_elements.index(target)
1632
+ if target_index < select_index:
1633
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1634
+ return None
1570
1635
  self._adoption_agency(name)
1571
1636
  return None
1572
1637
  if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1638
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1573
1639
  # Per HTML5 spec: these end tags in select mode close the element if it's on the stack.
1574
1640
  # But we must not pop across the select boundary (i.e., don't pop elements BEFORE select).
1575
1641
  select_idx = None
@@ -1583,14 +1649,12 @@ class TreeBuilderModesMixin:
1583
1649
  # i.e., the target is inside the select or there's no select
1584
1650
  if target_idx is not None and (select_idx is None or target_idx > select_idx):
1585
1651
  while True:
1586
- popped = self.open_elements.pop()
1652
+ popped = self._pop_current()
1587
1653
  if popped.name == name:
1588
1654
  break
1589
- else:
1590
- self._parse_error("unexpected-end-tag", tag_name=name)
1591
1655
  return None
1592
1656
  if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1593
- self._parse_error("unexpected-end-tag", tag_name=name)
1657
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1594
1658
  # select is always in scope in IN_SELECT mode
1595
1659
  self._pop_until_any_inclusive({"select"})
1596
1660
  self._reset_insertion_mode()
@@ -1601,7 +1665,7 @@ class TreeBuilderModesMixin:
1601
1665
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1602
1666
  return self._mode_in_body(token)
1603
1667
 
1604
- def _mode_in_template(self, token: Any) -> Any:
1668
+ def _mode_in_template(self, token: AnyToken) -> ModeResultTuple | None:
1605
1669
  # § The "in template" insertion mode
1606
1670
  # https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
1607
1671
  if isinstance(token, CharacterTokens):
@@ -1680,7 +1744,7 @@ class TreeBuilderModesMixin:
1680
1744
  return ("reprocess", self.mode, token)
1681
1745
  return None
1682
1746
 
1683
- def _mode_after_body(self, token: Any) -> Any:
1747
+ def _mode_after_body(self, token: AnyToken) -> ModeResultTuple | None:
1684
1748
  if isinstance(token, CharacterTokens):
1685
1749
  if is_all_whitespace(token.data):
1686
1750
  # Whitespace is processed using InBody rules (appended to body)
@@ -1701,7 +1765,7 @@ class TreeBuilderModesMixin:
1701
1765
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1702
1766
  return None
1703
1767
 
1704
- def _mode_after_after_body(self, token: Any) -> Any:
1768
+ def _mode_after_after_body(self, token: AnyToken) -> ModeResultTuple | None:
1705
1769
  if isinstance(token, CharacterTokens):
1706
1770
  if is_all_whitespace(token.data):
1707
1771
  # Per spec: whitespace characters are inserted using the rules for the "in body" mode
@@ -1728,7 +1792,7 @@ class TreeBuilderModesMixin:
1728
1792
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1729
1793
  return None
1730
1794
 
1731
- def _mode_in_frameset(self, token: Any) -> Any:
1795
+ def _mode_in_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1732
1796
  # Per HTML5 spec §13.2.6.4.16: In frameset insertion mode
1733
1797
  if isinstance(token, CharacterTokens):
1734
1798
  # Only whitespace characters allowed; ignore all others
@@ -1749,13 +1813,13 @@ class TreeBuilderModesMixin:
1749
1813
  if self.open_elements and self.open_elements[-1].name == "html":
1750
1814
  self._parse_error("unexpected-end-tag", tag_name=token.name)
1751
1815
  return None
1752
- self.open_elements.pop()
1816
+ self._pop_current()
1753
1817
  if self.open_elements and self.open_elements[-1].name != "frameset":
1754
1818
  self.mode = InsertionMode.AFTER_FRAMESET
1755
1819
  return None
1756
1820
  if token.kind == Tag.START and token.name == "frame":
1757
1821
  self._insert_element(token, push=True)
1758
- self.open_elements.pop()
1822
+ self._pop_current()
1759
1823
  return None
1760
1824
  if token.kind == Tag.START and token.name == "noframes":
1761
1825
  # Per spec: use IN_HEAD rules but preserve current mode for TEXT restoration
@@ -1770,11 +1834,14 @@ class TreeBuilderModesMixin:
1770
1834
  self._parse_error("unexpected-token-in-frameset")
1771
1835
  return None
1772
1836
 
1773
- def _mode_after_frameset(self, token: Any) -> Any:
1837
+ def _mode_after_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1774
1838
  # Per HTML5 spec §13.2.6.4.17: After frameset insertion mode
1775
1839
  if isinstance(token, CharacterTokens):
1776
- # Only whitespace characters allowed; ignore all others
1777
- whitespace = "".join(ch for ch in token.data if ch in "\t\n\f\r ")
1840
+ # Only whitespace characters allowed; non-whitespace is a parse error.
1841
+ data = token.data or ""
1842
+ whitespace = "".join(ch for ch in data if ch in "\t\n\f\r ")
1843
+ if any(ch not in "\t\n\f\r " for ch in data):
1844
+ self._parse_error("unexpected-token-after-frameset")
1778
1845
  if whitespace:
1779
1846
  self._append_text(whitespace)
1780
1847
  return None
@@ -1787,6 +1854,9 @@ class TreeBuilderModesMixin:
1787
1854
  if token.kind == Tag.END and token.name == "html":
1788
1855
  self.mode = InsertionMode.AFTER_AFTER_FRAMESET
1789
1856
  return None
1857
+ if token.kind == Tag.END and token.name == "frameset":
1858
+ self._parse_error("unexpected-token-after-frameset")
1859
+ return None
1790
1860
  if token.kind == Tag.START and token.name == "noframes":
1791
1861
  # Insert noframes element directly and switch to TEXT mode
1792
1862
  self._insert_element(token, push=True)
@@ -1799,7 +1869,7 @@ class TreeBuilderModesMixin:
1799
1869
  self.mode = InsertionMode.IN_FRAMESET
1800
1870
  return ("reprocess", InsertionMode.IN_FRAMESET, token)
1801
1871
 
1802
- def _mode_after_after_frameset(self, token: Any) -> Any:
1872
+ def _mode_after_after_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1803
1873
  # Per HTML5 spec §13.2.6.4.18: After after frameset insertion mode
1804
1874
  if isinstance(token, CharacterTokens):
1805
1875
  # Whitespace is processed using InBody rules
@@ -1830,7 +1900,7 @@ class TreeBuilderModesMixin:
1830
1900
 
1831
1901
  # Helpers ----------------------------------------------------------------
1832
1902
 
1833
- _MODE_HANDLERS = [
1903
+ _MODE_HANDLERS: list[Callable[[TreeBuilderModesMixin, AnyToken], ModeResultTuple | None]] = [
1834
1904
  _mode_initial,
1835
1905
  _mode_before_html,
1836
1906
  _mode_before_head,
@@ -1855,14 +1925,14 @@ class TreeBuilderModesMixin:
1855
1925
  _mode_in_template,
1856
1926
  ]
1857
1927
 
1858
- _BODY_TOKEN_HANDLERS = {
1928
+ _BODY_TOKEN_HANDLERS: dict[type[AnyToken], Callable[[TreeBuilderModesMixin, Any], ModeResultTuple | None]] = {
1859
1929
  CharacterTokens: _handle_characters_in_body,
1860
1930
  CommentToken: _handle_comment_in_body,
1861
1931
  Tag: _handle_tag_in_body,
1862
1932
  EOFToken: _handle_eof_in_body,
1863
1933
  }
1864
1934
 
1865
- _BODY_START_HANDLERS = {
1935
+ _BODY_START_HANDLERS: dict[str, Callable[[TreeBuilderModesMixin, Tag], ModeResultTuple | None]] = {
1866
1936
  "a": _handle_body_start_a,
1867
1937
  "address": _handle_body_start_block_with_p,
1868
1938
  "applet": _handle_body_start_applet_like,
@@ -1967,7 +2037,7 @@ class TreeBuilderModesMixin:
1967
2037
  "wbr": _handle_body_start_void_with_formatting,
1968
2038
  "xmp": _handle_body_start_plaintext_xmp,
1969
2039
  }
1970
- _BODY_END_HANDLERS = {
2040
+ _BODY_END_HANDLERS: dict[str, Callable[[TreeBuilderModesMixin, Tag], ModeResultTuple | None]] = {
1971
2041
  "address": _handle_body_end_block,
1972
2042
  "applet": _handle_body_end_applet_like,
1973
2043
  "article": _handle_body_end_block,