justhtml 0.6.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,32 @@
1
1
  # ruff: noqa: S101, RUF012
2
+ # mypy: disable-error-code="attr-defined, has-type, var-annotated, assignment"
2
3
 
4
+ from __future__ import annotations
5
+
6
+ from typing import TYPE_CHECKING, Any, Literal
3
7
 
4
8
  from .constants import (
9
+ FORMAT_MARKER,
5
10
  FORMATTING_ELEMENTS,
6
11
  HEADING_ELEMENTS,
7
12
  )
8
13
  from .node import SimpleDomNode, TemplateNode
9
- from .tokens import CharacterTokens, CommentToken, EOFToken, Tag, TokenSinkResult
14
+ from .tokens import AnyToken, CharacterTokens, CommentToken, DoctypeToken, EOFToken, Tag, TokenSinkResult
10
15
  from .treebuilder_utils import (
11
16
  InsertionMode,
12
17
  doctype_error_and_quirks,
13
18
  is_all_whitespace,
14
19
  )
15
20
 
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ ModeResultTuple = tuple[str, InsertionMode, AnyToken] | tuple[str, InsertionMode, AnyToken, bool]
25
+ "Result is (instruction, mode, token) or (instruction, mode, token, force_html)"
26
+
16
27
 
17
28
  class TreeBuilderModesMixin:
18
- def _handle_doctype(self, token):
29
+ def _handle_doctype(self, token: DoctypeToken) -> Literal[0]:
19
30
  if self.mode != InsertionMode.INITIAL:
20
31
  self._parse_error("unexpected-doctype")
21
32
  return TokenSinkResult.Continue
@@ -33,7 +44,7 @@ class TreeBuilderModesMixin:
33
44
  self.mode = InsertionMode.BEFORE_HTML
34
45
  return TokenSinkResult.Continue
35
46
 
36
- def _mode_initial(self, token):
47
+ def _mode_initial(self, token: Any) -> ModeResultTuple | None:
37
48
  if isinstance(token, CharacterTokens):
38
49
  if is_all_whitespace(token.data):
39
50
  return None
@@ -50,13 +61,13 @@ class TreeBuilderModesMixin:
50
61
  return ("reprocess", InsertionMode.BEFORE_HTML, token)
51
62
  # Only Tags remain - no DOCTYPE seen, so quirks mode
52
63
  if token.kind == Tag.START:
53
- self._parse_error("expected-doctype-but-got-start-tag", tag_name=token.name, token=token)
64
+ self._parse_error("expected-doctype-but-got-start-tag", tag_name=token.name)
54
65
  else:
55
- self._parse_error("expected-doctype-but-got-end-tag", tag_name=token.name, token=token)
66
+ self._parse_error("expected-doctype-but-got-end-tag", tag_name=token.name)
56
67
  self._set_quirks_mode("quirks")
57
68
  return ("reprocess", InsertionMode.BEFORE_HTML, token)
58
69
 
59
- def _mode_before_html(self, token):
70
+ def _mode_before_html(self, token: AnyToken) -> ModeResultTuple | None:
60
71
  if isinstance(token, CharacterTokens) and is_all_whitespace(token.data):
61
72
  return None
62
73
  if isinstance(token, CommentToken):
@@ -89,7 +100,7 @@ class TreeBuilderModesMixin:
89
100
  self.mode = InsertionMode.BEFORE_HEAD
90
101
  return ("reprocess", InsertionMode.BEFORE_HEAD, token)
91
102
 
92
- def _mode_before_head(self, token):
103
+ def _mode_before_head(self, token: AnyToken) -> ModeResultTuple | None:
93
104
  if isinstance(token, CharacterTokens):
94
105
  data = token.data or ""
95
106
  if "\x00" in data:
@@ -132,7 +143,7 @@ class TreeBuilderModesMixin:
132
143
  self.mode = InsertionMode.IN_HEAD
133
144
  return ("reprocess", InsertionMode.IN_HEAD, token)
134
145
 
135
- def _mode_in_head(self, token):
146
+ def _mode_in_head(self, token: AnyToken) -> ModeResultTuple | None:
136
147
  if isinstance(token, CharacterTokens):
137
148
  if is_all_whitespace(token.data):
138
149
  self._append_text(token.data)
@@ -208,7 +219,7 @@ class TreeBuilderModesMixin:
208
219
  self.mode = InsertionMode.AFTER_HEAD
209
220
  return ("reprocess", InsertionMode.AFTER_HEAD, token)
210
221
 
211
- def _mode_in_head_noscript(self, token):
222
+ def _mode_in_head_noscript(self, token: AnyToken) -> ModeResultTuple | None:
212
223
  """Handle tokens in 'in head noscript' insertion mode (scripting disabled)."""
213
224
  if isinstance(token, CharacterTokens):
214
225
  data = token.data or ""
@@ -257,15 +268,11 @@ class TreeBuilderModesMixin:
257
268
  # All token types are handled above - CharacterTokens, CommentToken, Tag, EOFToken
258
269
  return None # pragma: no cover
259
270
 
260
- def _mode_after_head(self, token):
271
+ def _mode_after_head(self, token: AnyToken) -> ModeResultTuple | None:
261
272
  if isinstance(token, CharacterTokens):
262
273
  data = token.data or ""
263
274
  if "\x00" in data:
264
- self._parse_error("invalid-codepoint-in-body")
265
275
  data = data.replace("\x00", "")
266
- if "\x0c" in data:
267
- self._parse_error("invalid-codepoint-in-body")
268
- data = data.replace("\x0c", "")
269
276
  if not data or is_all_whitespace(data):
270
277
  if data:
271
278
  self._append_text(data)
@@ -327,6 +334,10 @@ class TreeBuilderModesMixin:
327
334
  self.mode = InsertionMode.IN_HEAD
328
335
  return ("reprocess", InsertionMode.IN_HEAD, token)
329
336
  if token.kind == Tag.END and token.name == "template":
337
+ has_template = any(node.name == "template" for node in self.open_elements)
338
+ if not has_template:
339
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
340
+ return None
330
341
  return self._mode_in_head(token)
331
342
  if token.kind == Tag.END and token.name == "body":
332
343
  self._insert_body_if_missing()
@@ -346,7 +357,7 @@ class TreeBuilderModesMixin:
346
357
  self._insert_body_if_missing()
347
358
  return ("reprocess", InsertionMode.IN_BODY, token)
348
359
 
349
- def _mode_text(self, token):
360
+ def _mode_text(self, token: AnyToken) -> ModeResultTuple | None:
350
361
  if isinstance(token, CharacterTokens):
351
362
  self._append_text(token.data)
352
363
  return None
@@ -362,11 +373,11 @@ class TreeBuilderModesMixin:
362
373
  self.mode = self.original_mode or InsertionMode.IN_BODY
363
374
  return None
364
375
 
365
- def _mode_in_body(self, token):
376
+ def _mode_in_body(self, token: Any) -> ModeResultTuple | None:
366
377
  handler = self._BODY_TOKEN_HANDLERS.get(type(token))
367
378
  return handler(self, token) if handler else None
368
379
 
369
- def _handle_characters_in_body(self, token):
380
+ def _handle_characters_in_body(self, token: CharacterTokens) -> None:
370
381
  data = token.data or ""
371
382
  if "\x00" in data:
372
383
  self._parse_error("invalid-codepoint")
@@ -380,11 +391,11 @@ class TreeBuilderModesMixin:
380
391
  self._append_text(data)
381
392
  return
382
393
 
383
- def _handle_comment_in_body(self, token):
394
+ def _handle_comment_in_body(self, token: CommentToken) -> None:
384
395
  self._append_comment(token.data)
385
396
  return
386
397
 
387
- def _handle_tag_in_body(self, token):
398
+ def _handle_tag_in_body(self, token: Tag) -> ModeResultTuple | None:
388
399
  if token.kind == Tag.START:
389
400
  handler = self._BODY_START_HANDLERS.get(token.name)
390
401
  if handler:
@@ -408,7 +419,7 @@ class TreeBuilderModesMixin:
408
419
  self._any_other_end_tag(token.name)
409
420
  return None
410
421
 
411
- def _handle_eof_in_body(self, token):
422
+ def _handle_eof_in_body(self, token: EOFToken) -> ModeResultTuple | None:
412
423
  # If we're in a template, handle EOF in template mode first
413
424
  if self.template_modes:
414
425
  return self._mode_in_template(token)
@@ -443,17 +454,19 @@ class TreeBuilderModesMixin:
443
454
  # Body mode start tag handlers
444
455
  # ---------------------
445
456
 
446
- def _handle_body_start_html(self, token):
457
+ def _handle_body_start_html(self, token: Tag) -> None:
447
458
  if self.template_modes:
448
459
  self._parse_error("unexpected-start-tag", tag_name=token.name)
449
460
  return
461
+ # Per spec: parse error; merge attributes onto existing <html>.
462
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
450
463
  # In IN_BODY mode, html element is always at open_elements[0]
451
464
  if self.open_elements: # pragma: no branch
452
465
  html = self.open_elements[0]
453
466
  self._add_missing_attributes(html, token.attrs)
454
467
  return
455
468
 
456
- def _handle_body_start_body(self, token):
469
+ def _handle_body_start_body(self, token: Tag) -> None:
457
470
  if self.template_modes:
458
471
  self._parse_error("unexpected-start-tag", tag_name=token.name)
459
472
  return
@@ -467,19 +480,19 @@ class TreeBuilderModesMixin:
467
480
  self.frameset_ok = False
468
481
  return
469
482
 
470
- def _handle_body_start_head(self, token):
483
+ def _handle_body_start_head(self, token: Tag) -> None:
471
484
  self._parse_error("unexpected-start-tag", tag_name=token.name)
472
485
  return
473
486
 
474
- def _handle_body_start_in_head(self, token):
487
+ def _handle_body_start_in_head(self, token: Tag) -> ModeResultTuple | None:
475
488
  return self._mode_in_head(token)
476
489
 
477
- def _handle_body_start_block_with_p(self, token):
490
+ def _handle_body_start_block_with_p(self, token: Tag) -> None:
478
491
  self._close_p_element()
479
492
  self._insert_element(token, push=True)
480
493
  return
481
494
 
482
- def _handle_body_start_heading(self, token):
495
+ def _handle_body_start_heading(self, token: Tag) -> None:
483
496
  self._close_p_element()
484
497
  if self.open_elements and self.open_elements[-1].name in HEADING_ELEMENTS:
485
498
  self._parse_error("unexpected-start-tag", tag_name=token.name)
@@ -488,14 +501,14 @@ class TreeBuilderModesMixin:
488
501
  self.frameset_ok = False
489
502
  return
490
503
 
491
- def _handle_body_start_pre_listing(self, token):
504
+ def _handle_body_start_pre_listing(self, token: Tag) -> None:
492
505
  self._close_p_element()
493
506
  self._insert_element(token, push=True)
494
507
  self.ignore_lf = True
495
508
  self.frameset_ok = False
496
509
  return
497
510
 
498
- def _handle_body_start_form(self, token):
511
+ def _handle_body_start_form(self, token: Tag) -> None:
499
512
  if self.form_element is not None:
500
513
  self._parse_error("unexpected-start-tag", tag_name=token.name)
501
514
  return
@@ -505,7 +518,7 @@ class TreeBuilderModesMixin:
505
518
  self.frameset_ok = False
506
519
  return
507
520
 
508
- def _handle_body_start_button(self, token):
521
+ def _handle_body_start_button(self, token: Tag) -> None:
509
522
  if self._has_in_scope("button"):
510
523
  self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=token.name)
511
524
  self._close_element_by_name("button")
@@ -513,19 +526,19 @@ class TreeBuilderModesMixin:
513
526
  self.frameset_ok = False
514
527
  return
515
528
 
516
- def _handle_body_start_paragraph(self, token):
529
+ def _handle_body_start_paragraph(self, token: Tag) -> None:
517
530
  self._close_p_element()
518
531
  self._insert_element(token, push=True)
519
532
  return
520
533
 
521
- def _handle_body_start_math(self, token):
534
+ def _handle_body_start_math(self, token: Tag) -> None:
522
535
  self._reconstruct_active_formatting_elements()
523
536
  attrs = self._prepare_foreign_attributes("math", token.attrs)
524
537
  new_tag = Tag(Tag.START, token.name, attrs, token.self_closing)
525
538
  self._insert_element(new_tag, push=not token.self_closing, namespace="math")
526
539
  return
527
540
 
528
- def _handle_body_start_svg(self, token):
541
+ def _handle_body_start_svg(self, token: Tag) -> None:
529
542
  self._reconstruct_active_formatting_elements()
530
543
  adjusted_name = self._adjust_svg_tag_name(token.name)
531
544
  attrs = self._prepare_foreign_attributes("svg", token.attrs)
@@ -533,7 +546,7 @@ class TreeBuilderModesMixin:
533
546
  self._insert_element(new_tag, push=not token.self_closing, namespace="svg")
534
547
  return
535
548
 
536
- def _handle_body_start_li(self, token):
549
+ def _handle_body_start_li(self, token: Tag) -> None:
537
550
  self.frameset_ok = False
538
551
  self._close_p_element()
539
552
  if self._has_in_list_item_scope("li"):
@@ -541,7 +554,7 @@ class TreeBuilderModesMixin:
541
554
  self._insert_element(token, push=True)
542
555
  return
543
556
 
544
- def _handle_body_start_dd_dt(self, token):
557
+ def _handle_body_start_dd_dt(self, token: Tag) -> None:
545
558
  self.frameset_ok = False
546
559
  self._close_p_element()
547
560
  name = token.name
@@ -558,7 +571,7 @@ class TreeBuilderModesMixin:
558
571
  self._insert_element(token, push=True)
559
572
  return
560
573
 
561
- def _adoption_agency(self, subject):
574
+ def _adoption_agency(self, subject: Any) -> None:
562
575
  # 1. If the current node is the subject, and it is not in the active formatting elements list...
563
576
  if self.open_elements and self.open_elements[-1].name == subject:
564
577
  if not self._has_active_formatting_entry(subject):
@@ -570,6 +583,10 @@ class TreeBuilderModesMixin:
570
583
  # 3. Find formatting element
571
584
  formatting_element_index = self._find_active_formatting_index(subject)
572
585
  if formatting_element_index is None:
586
+ # html5lib reports a parse error when an end tag for a formatting
587
+ # element triggers the adoption agency algorithm but no matching
588
+ # active formatting entry exists.
589
+ self._parse_error("adoption-agency-1.3")
573
590
  return
574
591
 
575
592
  formatting_element_entry = self.active_formatting[formatting_element_index]
@@ -647,6 +664,10 @@ class TreeBuilderModesMixin:
647
664
  # 10.4 Replace entry with new element
648
665
  entry = self.active_formatting[node_formatting_index]
649
666
  new_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
667
+ if self.tokenizer is not None and self.tokenizer.track_node_locations:
668
+ new_element._origin_pos = entry["node"].origin_offset
669
+ new_element._origin_line = entry["node"].origin_line
670
+ new_element._origin_col = entry["node"].origin_col
650
671
  entry["node"] = new_element
651
672
  self.open_elements[self.open_elements.index(node)] = new_element
652
673
  node = new_element
@@ -680,6 +701,10 @@ class TreeBuilderModesMixin:
680
701
  # 12. Create new formatting element
681
702
  entry = self.active_formatting[formatting_element_index]
682
703
  new_formatting_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
704
+ if self.tokenizer is not None and self.tokenizer.track_node_locations:
705
+ new_formatting_element._origin_pos = entry["node"].origin_offset
706
+ new_formatting_element._origin_line = entry["node"].origin_line
707
+ new_formatting_element._origin_col = entry["node"].origin_col
683
708
  entry["node"] = new_formatting_element
684
709
 
685
710
  # 13. Move children of furthest block
@@ -702,8 +727,9 @@ class TreeBuilderModesMixin:
702
727
  furthest_block_index = self.open_elements.index(furthest_block)
703
728
  self.open_elements.insert(furthest_block_index + 1, new_formatting_element)
704
729
 
705
- def _handle_body_start_a(self, token):
730
+ def _handle_body_start_a(self, token: Tag) -> None:
706
731
  if self._has_active_formatting_entry("a"):
732
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=token.name)
707
733
  self._adoption_agency("a")
708
734
  self._remove_last_active_formatting_by_name("a")
709
735
  self._remove_last_open_element_by_name("a")
@@ -712,7 +738,7 @@ class TreeBuilderModesMixin:
712
738
  self._append_active_formatting_entry("a", token.attrs, node)
713
739
  return
714
740
 
715
- def _handle_body_start_formatting(self, token):
741
+ def _handle_body_start_formatting(self, token: Tag) -> None:
716
742
  name = token.name
717
743
  if name == "nobr" and self._in_scope("nobr"):
718
744
  self._adoption_agency("nobr")
@@ -726,21 +752,21 @@ class TreeBuilderModesMixin:
726
752
  self._append_active_formatting_entry(name, token.attrs, node)
727
753
  return
728
754
 
729
- def _handle_body_start_applet_like(self, token):
755
+ def _handle_body_start_applet_like(self, token: Tag) -> None:
730
756
  self._reconstruct_active_formatting_elements()
731
757
  self._insert_element(token, push=True)
732
758
  self._push_formatting_marker()
733
759
  self.frameset_ok = False
734
760
  return
735
761
 
736
- def _handle_body_start_br(self, token):
762
+ def _handle_body_start_br(self, token: Tag) -> None:
737
763
  self._close_p_element()
738
764
  self._reconstruct_active_formatting_elements()
739
765
  self._insert_element(token, push=False)
740
766
  self.frameset_ok = False
741
767
  return
742
768
 
743
- def _handle_body_start_frameset(self, token):
769
+ def _handle_body_start_frameset(self, token: Tag) -> None:
744
770
  if not self.frameset_ok:
745
771
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
746
772
  return
@@ -765,17 +791,17 @@ class TreeBuilderModesMixin:
765
791
  # Body mode end tag handlers
766
792
  # ---------------------
767
793
 
768
- def _handle_body_end_body(self, token):
794
+ def _handle_body_end_body(self, token: Tag) -> None:
769
795
  if self._in_scope("body"):
770
796
  self.mode = InsertionMode.AFTER_BODY
771
797
  return
772
798
 
773
- def _handle_body_end_html(self, token):
799
+ def _handle_body_end_html(self, token: Tag) -> ModeResultTuple | None:
774
800
  if self._in_scope("body"):
775
801
  return ("reprocess", InsertionMode.AFTER_BODY, token)
776
802
  return None
777
803
 
778
- def _handle_body_end_p(self, token):
804
+ def _handle_body_end_p(self, token: Tag) -> None:
779
805
  if not self._close_p_element():
780
806
  self._parse_error("unexpected-end-tag", tag_name=token.name)
781
807
  phantom = Tag(Tag.START, "p", {}, False)
@@ -783,21 +809,21 @@ class TreeBuilderModesMixin:
783
809
  self._close_p_element()
784
810
  return
785
811
 
786
- def _handle_body_end_li(self, token):
812
+ def _handle_body_end_li(self, token: Tag) -> None:
787
813
  if not self._has_in_list_item_scope("li"):
788
814
  self._parse_error("unexpected-end-tag", tag_name=token.name)
789
815
  return
790
816
  self._pop_until_any_inclusive({"li"})
791
817
  return
792
818
 
793
- def _handle_body_end_dd_dt(self, token):
819
+ def _handle_body_end_dd_dt(self, token: Tag) -> None:
794
820
  name = token.name
795
821
  if not self._has_in_definition_scope(name):
796
822
  self._parse_error("unexpected-end-tag", tag_name=name)
797
823
  return
798
824
  self._pop_until_any_inclusive({"dd", "dt"})
799
825
 
800
- def _handle_body_end_form(self, token):
826
+ def _handle_body_end_form(self, token: Tag) -> None:
801
827
  if self.form_element is None:
802
828
  self._parse_error("unexpected-end-tag", tag_name=token.name)
803
829
  return
@@ -807,7 +833,7 @@ class TreeBuilderModesMixin:
807
833
  self._parse_error("unexpected-end-tag", tag_name=token.name)
808
834
  return
809
835
 
810
- def _handle_body_end_applet_like(self, token):
836
+ def _handle_body_end_applet_like(self, token: Tag) -> None:
811
837
  name = token.name
812
838
  if not self._in_scope(name):
813
839
  self._parse_error("unexpected-end-tag", tag_name=name)
@@ -820,7 +846,7 @@ class TreeBuilderModesMixin:
820
846
  self._clear_active_formatting_up_to_marker()
821
847
  return
822
848
 
823
- def _handle_body_end_heading(self, token):
849
+ def _handle_body_end_heading(self, token: Tag) -> None:
824
850
  name = token.name
825
851
  if not self._has_any_in_scope(HEADING_ELEMENTS):
826
852
  self._parse_error("unexpected-end-tag", tag_name=name)
@@ -835,7 +861,7 @@ class TreeBuilderModesMixin:
835
861
  break
836
862
  return
837
863
 
838
- def _handle_body_end_block(self, token):
864
+ def _handle_body_end_block(self, token: Tag) -> None:
839
865
  name = token.name
840
866
  if not self._in_scope(name):
841
867
  self._parse_error("unexpected-end-tag", tag_name=name)
@@ -846,9 +872,10 @@ class TreeBuilderModesMixin:
846
872
  self._pop_until_any_inclusive({name})
847
873
  return
848
874
 
849
- def _handle_body_end_template(self, token):
875
+ def _handle_body_end_template(self, token: Tag) -> None:
850
876
  has_template = any(node.name == "template" for node in self.open_elements)
851
877
  if not has_template:
878
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
852
879
  return
853
880
  self._generate_implied_end_tags()
854
881
  self._pop_until_inclusive("template")
@@ -859,18 +886,18 @@ class TreeBuilderModesMixin:
859
886
  self._reset_insertion_mode()
860
887
  return
861
888
 
862
- def _handle_body_start_structure_ignored(self, token):
889
+ def _handle_body_start_structure_ignored(self, token: Tag) -> None:
863
890
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
864
891
  return
865
892
 
866
- def _handle_body_start_col_or_frame(self, token):
893
+ def _handle_body_start_col_or_frame(self, token: Tag) -> None:
867
894
  if self.fragment_context is None:
868
895
  self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
869
896
  return
870
897
  self._insert_element(token, push=False)
871
898
  return
872
899
 
873
- def _handle_body_start_image(self, token):
900
+ def _handle_body_start_image(self, token: Tag) -> None:
874
901
  self._parse_error("image-start-tag", tag_name=token.name)
875
902
  img_token = Tag(Tag.START, "img", token.attrs, token.self_closing)
876
903
  self._reconstruct_active_formatting_elements()
@@ -878,17 +905,17 @@ class TreeBuilderModesMixin:
878
905
  self.frameset_ok = False
879
906
  return
880
907
 
881
- def _handle_body_start_void_with_formatting(self, token):
908
+ def _handle_body_start_void_with_formatting(self, token: Tag) -> None:
882
909
  self._reconstruct_active_formatting_elements()
883
910
  self._insert_element(token, push=False)
884
911
  self.frameset_ok = False
885
912
  return
886
913
 
887
- def _handle_body_start_simple_void(self, token):
914
+ def _handle_body_start_simple_void(self, token: Tag) -> None:
888
915
  self._insert_element(token, push=False)
889
916
  return
890
917
 
891
- def _handle_body_start_input(self, token):
918
+ def _handle_body_start_input(self, token: Tag) -> None:
892
919
  input_type = None
893
920
  for name, value in token.attrs.items():
894
921
  if name == "type":
@@ -899,7 +926,7 @@ class TreeBuilderModesMixin:
899
926
  self.frameset_ok = False
900
927
  return
901
928
 
902
- def _handle_body_start_table(self, token):
929
+ def _handle_body_start_table(self, token: Tag) -> None:
903
930
  if self.quirks_mode != "quirks":
904
931
  self._close_p_element()
905
932
  self._insert_element(token, push=True)
@@ -907,7 +934,7 @@ class TreeBuilderModesMixin:
907
934
  self.mode = InsertionMode.IN_TABLE
908
935
  return
909
936
 
910
- def _handle_body_start_plaintext_xmp(self, token):
937
+ def _handle_body_start_plaintext_xmp(self, token: Tag) -> None:
911
938
  self._close_p_element()
912
939
  self._insert_element(token, push=True)
913
940
  self.frameset_ok = False
@@ -919,66 +946,88 @@ class TreeBuilderModesMixin:
919
946
  self.mode = InsertionMode.TEXT
920
947
  return
921
948
 
922
- def _handle_body_start_textarea(self, token):
949
+ def _handle_body_start_textarea(self, token: Tag) -> None:
923
950
  self._insert_element(token, push=True)
924
951
  self.ignore_lf = True
925
952
  self.frameset_ok = False
926
953
  return
927
954
 
928
- def _handle_body_start_select(self, token):
955
+ def _handle_body_start_select(self, token: Tag) -> None:
929
956
  self._reconstruct_active_formatting_elements()
930
957
  self._insert_element(token, push=True)
931
958
  self.frameset_ok = False
932
959
  self._reset_insertion_mode()
933
960
  return
934
961
 
935
- def _handle_body_start_option(self, token):
962
+ def _handle_body_start_option(self, token: Tag) -> None:
936
963
  if self.open_elements and self.open_elements[-1].name == "option":
937
964
  self.open_elements.pop()
938
965
  self._reconstruct_active_formatting_elements()
939
966
  self._insert_element(token, push=True)
940
967
  return
941
968
 
942
- def _handle_body_start_optgroup(self, token):
969
+ def _handle_body_start_optgroup(self, token: Tag) -> None:
943
970
  if self.open_elements and self.open_elements[-1].name == "option":
944
971
  self.open_elements.pop()
945
972
  self._reconstruct_active_formatting_elements()
946
973
  self._insert_element(token, push=True)
947
974
  return
948
975
 
949
- def _handle_body_start_rp_rt(self, token):
976
+ def _handle_body_start_rp_rt(self, token: Tag) -> None:
950
977
  self._generate_implied_end_tags(exclude="rtc")
951
978
  self._insert_element(token, push=True)
952
979
  return
953
980
 
954
- def _handle_body_start_rb_rtc(self, token):
981
+ def _handle_body_start_rb_rtc(self, token: Tag) -> None:
955
982
  if self.open_elements and self.open_elements[-1].name in {"rb", "rp", "rt", "rtc"}:
956
983
  self._generate_implied_end_tags()
957
984
  self._insert_element(token, push=True)
958
985
  return
959
986
 
960
- def _handle_body_start_table_parse_error(self, token):
987
+ def _handle_body_start_table_parse_error(self, token: Tag) -> None:
961
988
  self._parse_error("unexpected-start-tag", tag_name=token.name)
962
989
  return
963
990
 
964
- def _handle_body_start_default(self, token):
991
+ def _handle_body_start_default(self, token: Tag) -> ModeResultTuple | None:
965
992
  self._reconstruct_active_formatting_elements()
966
993
  self._insert_element(token, push=True)
967
994
  if token.self_closing:
968
995
  self._parse_error("non-void-html-element-start-tag-with-trailing-solidus", tag_name=token.name)
969
996
  # Elements reaching here have no handler - never in FRAMESET_NEUTRAL/FORMATTING_ELEMENTS
970
997
  self.frameset_ok = False
971
- return
998
+ return None
972
999
 
973
- def _mode_in_table(self, token):
1000
+ def _mode_in_table(self, token: AnyToken) -> ModeResultTuple | None:
974
1001
  if isinstance(token, CharacterTokens):
975
1002
  data = token.data or ""
976
1003
  if "\x00" in data:
977
- self._parse_error("unexpected-null-character")
978
1004
  data = data.replace("\x00", "")
979
1005
  if not data:
980
1006
  return None
981
1007
  token = CharacterTokens(data)
1008
+
1009
+ if is_all_whitespace(data):
1010
+ self._append_text(data)
1011
+ return None
1012
+
1013
+ # html5lib-tests expect that some table foster-parenting text triggered by a
1014
+ # misnested formatting element (<a>) only produces an implied-end-tag error
1015
+ # when the table closes, not an additional character-in-table error.
1016
+ suppress_table_char_error = False
1017
+ if self.active_formatting:
1018
+ for idx in range(len(self.active_formatting) - 1, -1, -1):
1019
+ entry = self.active_formatting[idx]
1020
+ if entry is FORMAT_MARKER:
1021
+ break
1022
+ if entry["name"] == "a":
1023
+ if entry["node"] not in self.open_elements:
1024
+ suppress_table_char_error = True
1025
+ break
1026
+
1027
+ if not suppress_table_char_error:
1028
+ self.pending_table_text_should_error = True
1029
+ else:
1030
+ self.pending_table_text_should_error = False
982
1031
  self.pending_table_text = []
983
1032
  self.table_text_original_mode = self.mode
984
1033
  self.mode = InsertionMode.IN_TABLE_TEXT
@@ -1051,7 +1100,7 @@ class TreeBuilderModesMixin:
1051
1100
  self.form_element = node
1052
1101
  self.open_elements.pop() # push=True always adds to stack
1053
1102
  return None
1054
- self._parse_error("unexpected-start-tag-implies-table-voodoo", tag_name=name)
1103
+ self._parse_error("foster-parenting-start-tag", tag_name=name)
1055
1104
  previous = self.insert_from_table
1056
1105
  self.insert_from_table = True
1057
1106
  try:
@@ -1078,26 +1127,40 @@ class TreeBuilderModesMixin:
1078
1127
  if self.template_modes:
1079
1128
  return self._mode_in_template(token)
1080
1129
  if self._has_in_table_scope("table"):
1081
- self._parse_error("expected-closing-tag-but-got-eof", tag_name="table")
1130
+ self._parse_error("eof-in-table")
1082
1131
  return None
1083
1132
 
1084
- def _mode_in_table_text(self, token):
1133
+ def _mode_in_table_text(self, token: AnyToken) -> ModeResultTuple | None:
1085
1134
  if isinstance(token, CharacterTokens):
1086
1135
  # IN_TABLE mode guarantees non-empty data
1087
1136
  data = token.data
1088
- if "\x0c" in data:
1089
- self._parse_error("invalid-codepoint-in-table-text")
1090
- data = data.replace("\x0c", "")
1091
- if data:
1092
- self.pending_table_text.append(data)
1137
+ self.pending_table_text.append(data)
1093
1138
  return None
1139
+
1140
+ if (
1141
+ self.pending_table_text
1142
+ and isinstance(token, Tag)
1143
+ and token.kind == Tag.END
1144
+ and token.name == "table"
1145
+ and not is_all_whitespace("".join(self.pending_table_text))
1146
+ ):
1147
+ # If a misnested <a> exists only in the active formatting list, html5lib
1148
+ # reports the implied close when the table ends.
1149
+ if self.active_formatting:
1150
+ for idx in range(len(self.active_formatting) - 1, -1, -1):
1151
+ entry = self.active_formatting[idx]
1152
+ if entry is FORMAT_MARKER:
1153
+ break
1154
+ if entry["name"] == "a" and entry["node"] not in self.open_elements:
1155
+ self._parse_error("unexpected-implied-end-tag-in-table-view")
1156
+ break
1094
1157
  self._flush_pending_table_text()
1095
1158
  original = self.table_text_original_mode or InsertionMode.IN_TABLE
1096
1159
  self.table_text_original_mode = None
1097
1160
  self.mode = original
1098
1161
  return ("reprocess", original, token)
1099
1162
 
1100
- def _mode_in_caption(self, token):
1163
+ def _mode_in_caption(self, token: AnyToken) -> ModeResultTuple | None:
1101
1164
  if isinstance(token, CharacterTokens):
1102
1165
  return self._mode_in_body(token)
1103
1166
  if isinstance(token, CommentToken):
@@ -1136,7 +1199,7 @@ class TreeBuilderModesMixin:
1136
1199
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1137
1200
  return self._mode_in_body(token)
1138
1201
 
1139
- def _close_caption_element(self):
1202
+ def _close_caption_element(self) -> bool:
1140
1203
  if not self._has_in_table_scope("caption"):
1141
1204
  self._parse_error("unexpected-end-tag", tag_name="caption")
1142
1205
  return False
@@ -1150,7 +1213,7 @@ class TreeBuilderModesMixin:
1150
1213
  self.mode = InsertionMode.IN_TABLE
1151
1214
  return True
1152
1215
 
1153
- def _mode_in_column_group(self, token):
1216
+ def _mode_in_column_group(self, token: AnyToken) -> ModeResultTuple | None:
1154
1217
  current = self.open_elements[-1] if self.open_elements else None
1155
1218
  if isinstance(token, CharacterTokens):
1156
1219
  data = token.data or ""
@@ -1245,7 +1308,7 @@ class TreeBuilderModesMixin:
1245
1308
  return None
1246
1309
  # Per spec: EOF when current is html - implicit None return
1247
1310
 
1248
- def _mode_in_table_body(self, token):
1311
+ def _mode_in_table_body(self, token: AnyToken) -> ModeResultTuple | None:
1249
1312
  if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1250
1313
  return self._mode_in_table(token)
1251
1314
  if isinstance(token, Tag):
@@ -1321,7 +1384,7 @@ class TreeBuilderModesMixin:
1321
1384
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1322
1385
  return self._mode_in_table(token)
1323
1386
 
1324
- def _mode_in_row(self, token):
1387
+ def _mode_in_row(self, token: AnyToken) -> ModeResultTuple | None:
1325
1388
  if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1326
1389
  return self._mode_in_table(token)
1327
1390
  if isinstance(token, Tag):
@@ -1370,7 +1433,7 @@ class TreeBuilderModesMixin:
1370
1433
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1371
1434
  return self._mode_in_table(token)
1372
1435
 
1373
- def _end_tr_element(self):
1436
+ def _end_tr_element(self) -> None:
1374
1437
  self._clear_stack_until({"tr", "template", "html"})
1375
1438
  # Pop tr if on top (may not be if stack was exhausted)
1376
1439
  if self.open_elements and self.open_elements[-1].name == "tr":
@@ -1381,7 +1444,7 @@ class TreeBuilderModesMixin:
1381
1444
  else:
1382
1445
  self.mode = InsertionMode.IN_TABLE_BODY
1383
1446
 
1384
- def _mode_in_cell(self, token):
1447
+ def _mode_in_cell(self, token: AnyToken) -> ModeResultTuple | None:
1385
1448
  if isinstance(token, CharacterTokens):
1386
1449
  previous = self.insert_from_table
1387
1450
  self.insert_from_table = False
@@ -1435,15 +1498,11 @@ class TreeBuilderModesMixin:
1435
1498
  return ("reprocess", self.mode, token)
1436
1499
  return self._mode_in_table(token)
1437
1500
 
1438
- def _mode_in_select(self, token):
1501
+ def _mode_in_select(self, token: AnyToken) -> ModeResultTuple | None:
1439
1502
  if isinstance(token, CharacterTokens):
1440
1503
  data = token.data or ""
1441
1504
  if "\x00" in data:
1442
- self._parse_error("invalid-codepoint-in-select")
1443
1505
  data = data.replace("\x00", "")
1444
- if "\x0c" in data:
1445
- self._parse_error("invalid-codepoint-in-select")
1446
- data = data.replace("\x0c", "")
1447
1506
  if data:
1448
1507
  self._reconstruct_active_formatting_elements()
1449
1508
  self._append_text(data)
@@ -1471,13 +1530,13 @@ class TreeBuilderModesMixin:
1471
1530
  self._insert_element(token, push=True)
1472
1531
  return None
1473
1532
  if name == "select":
1474
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1533
+ self._parse_error("unexpected-select-in-select")
1475
1534
  # select is always in scope in IN_SELECT mode
1476
1535
  self._pop_until_any_inclusive({"select"})
1477
1536
  self._reset_insertion_mode()
1478
1537
  return None
1479
1538
  if name in {"input", "textarea"}:
1480
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1539
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1481
1540
  # select is always in scope in IN_SELECT mode
1482
1541
  self._pop_until_any_inclusive({"select"})
1483
1542
  self._reset_insertion_mode()
@@ -1487,7 +1546,7 @@ class TreeBuilderModesMixin:
1487
1546
  self._insert_element(token, push=False)
1488
1547
  return None
1489
1548
  if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1490
- self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1549
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1491
1550
  # select is always in scope in IN_SELECT mode
1492
1551
  self._pop_until_any_inclusive({"select"})
1493
1552
  self._reset_insertion_mode()
@@ -1505,6 +1564,7 @@ class TreeBuilderModesMixin:
1505
1564
  self._append_active_formatting_entry(name, token.attrs, node)
1506
1565
  return None
1507
1566
  if name == "hr":
1567
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1508
1568
  # Per spec: pop option and optgroup before inserting hr (makes hr sibling, not child)
1509
1569
  if self.open_elements and self.open_elements[-1].name == "option":
1510
1570
  self.open_elements.pop()
@@ -1514,22 +1574,29 @@ class TreeBuilderModesMixin:
1514
1574
  self._insert_element(token, push=False)
1515
1575
  return None
1516
1576
  if name == "menuitem":
1577
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1517
1578
  self._reconstruct_active_formatting_elements()
1518
1579
  self._insert_element(token, push=True)
1519
1580
  return None
1520
1581
  # Allow common HTML elements in select (newer spec)
1521
1582
  if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1583
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1522
1584
  self._reconstruct_active_formatting_elements()
1523
1585
  self._insert_element(token, push=not token.self_closing)
1524
1586
  return None
1525
1587
  if name in {"br", "img"}:
1588
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1526
1589
  self._reconstruct_active_formatting_elements()
1527
1590
  self._insert_element(token, push=False)
1528
1591
  return None
1529
1592
  if name == "plaintext":
1530
1593
  # Per spec: plaintext element is inserted in select (consumes all remaining text)
1594
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1531
1595
  self._reconstruct_active_formatting_elements()
1532
1596
  self._insert_element(token, push=True)
1597
+ return None
1598
+ # Any other start tag: parse error, ignore.
1599
+ self._parse_error("unexpected-start-tag-in-select", tag_name=name)
1533
1600
  return None
1534
1601
  if name == "optgroup":
1535
1602
  if self.open_elements and self.open_elements[-1].name == "option":
@@ -1537,13 +1604,13 @@ class TreeBuilderModesMixin:
1537
1604
  if self.open_elements and self.open_elements[-1].name == "optgroup":
1538
1605
  self.open_elements.pop()
1539
1606
  else:
1540
- self._parse_error("unexpected-end-tag", tag_name=token.name)
1607
+ self._parse_error("unexpected-end-tag-in-select", tag_name=token.name)
1541
1608
  return None
1542
1609
  if name == "option":
1543
1610
  if self.open_elements and self.open_elements[-1].name == "option":
1544
1611
  self.open_elements.pop()
1545
1612
  else:
1546
- self._parse_error("unexpected-end-tag", tag_name=token.name)
1613
+ self._parse_error("unexpected-end-tag-in-select", tag_name=token.name)
1547
1614
  return None
1548
1615
  if name == "select":
1549
1616
  # In IN_SELECT mode, select is always in scope - pop to it
@@ -1555,17 +1622,20 @@ class TreeBuilderModesMixin:
1555
1622
  # select is always on stack in IN_SELECT mode
1556
1623
  select_node = self._find_last_on_stack("select")
1557
1624
  fmt_index = self._find_active_formatting_index(name)
1558
- if fmt_index is not None:
1559
- target = self.active_formatting[fmt_index]["node"]
1560
- if target in self.open_elements: # pragma: no branch
1561
- select_index = self.open_elements.index(select_node)
1562
- target_index = self.open_elements.index(target)
1563
- if target_index < select_index:
1564
- self._parse_error("unexpected-end-tag", tag_name=name)
1565
- return None
1625
+ if fmt_index is None:
1626
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1627
+ return None
1628
+ target = self.active_formatting[fmt_index]["node"]
1629
+ if target in self.open_elements: # pragma: no branch
1630
+ select_index = self.open_elements.index(select_node)
1631
+ target_index = self.open_elements.index(target)
1632
+ if target_index < select_index:
1633
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1634
+ return None
1566
1635
  self._adoption_agency(name)
1567
1636
  return None
1568
1637
  if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1638
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1569
1639
  # Per HTML5 spec: these end tags in select mode close the element if it's on the stack.
1570
1640
  # But we must not pop across the select boundary (i.e., don't pop elements BEFORE select).
1571
1641
  select_idx = None
@@ -1582,11 +1652,9 @@ class TreeBuilderModesMixin:
1582
1652
  popped = self.open_elements.pop()
1583
1653
  if popped.name == name:
1584
1654
  break
1585
- else:
1586
- self._parse_error("unexpected-end-tag", tag_name=name)
1587
1655
  return None
1588
1656
  if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1589
- self._parse_error("unexpected-end-tag", tag_name=name)
1657
+ self._parse_error("unexpected-end-tag-in-select", tag_name=name)
1590
1658
  # select is always in scope in IN_SELECT mode
1591
1659
  self._pop_until_any_inclusive({"select"})
1592
1660
  self._reset_insertion_mode()
@@ -1597,7 +1665,7 @@ class TreeBuilderModesMixin:
1597
1665
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1598
1666
  return self._mode_in_body(token)
1599
1667
 
1600
- def _mode_in_template(self, token):
1668
+ def _mode_in_template(self, token: AnyToken) -> ModeResultTuple | None:
1601
1669
  # § The "in template" insertion mode
1602
1670
  # https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
1603
1671
  if isinstance(token, CharacterTokens):
@@ -1676,7 +1744,7 @@ class TreeBuilderModesMixin:
1676
1744
  return ("reprocess", self.mode, token)
1677
1745
  return None
1678
1746
 
1679
- def _mode_after_body(self, token):
1747
+ def _mode_after_body(self, token: AnyToken) -> ModeResultTuple | None:
1680
1748
  if isinstance(token, CharacterTokens):
1681
1749
  if is_all_whitespace(token.data):
1682
1750
  # Whitespace is processed using InBody rules (appended to body)
@@ -1697,7 +1765,7 @@ class TreeBuilderModesMixin:
1697
1765
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1698
1766
  return None
1699
1767
 
1700
- def _mode_after_after_body(self, token):
1768
+ def _mode_after_after_body(self, token: AnyToken) -> ModeResultTuple | None:
1701
1769
  if isinstance(token, CharacterTokens):
1702
1770
  if is_all_whitespace(token.data):
1703
1771
  # Per spec: whitespace characters are inserted using the rules for the "in body" mode
@@ -1724,7 +1792,7 @@ class TreeBuilderModesMixin:
1724
1792
  assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1725
1793
  return None
1726
1794
 
1727
- def _mode_in_frameset(self, token):
1795
+ def _mode_in_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1728
1796
  # Per HTML5 spec §13.2.6.4.16: In frameset insertion mode
1729
1797
  if isinstance(token, CharacterTokens):
1730
1798
  # Only whitespace characters allowed; ignore all others
@@ -1766,11 +1834,14 @@ class TreeBuilderModesMixin:
1766
1834
  self._parse_error("unexpected-token-in-frameset")
1767
1835
  return None
1768
1836
 
1769
- def _mode_after_frameset(self, token):
1837
+ def _mode_after_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1770
1838
  # Per HTML5 spec §13.2.6.4.17: After frameset insertion mode
1771
1839
  if isinstance(token, CharacterTokens):
1772
- # Only whitespace characters allowed; ignore all others
1773
- whitespace = "".join(ch for ch in token.data if ch in "\t\n\f\r ")
1840
+ # Only whitespace characters allowed; non-whitespace is a parse error.
1841
+ data = token.data or ""
1842
+ whitespace = "".join(ch for ch in data if ch in "\t\n\f\r ")
1843
+ if any(ch not in "\t\n\f\r " for ch in data):
1844
+ self._parse_error("unexpected-token-after-frameset")
1774
1845
  if whitespace:
1775
1846
  self._append_text(whitespace)
1776
1847
  return None
@@ -1783,6 +1854,9 @@ class TreeBuilderModesMixin:
1783
1854
  if token.kind == Tag.END and token.name == "html":
1784
1855
  self.mode = InsertionMode.AFTER_AFTER_FRAMESET
1785
1856
  return None
1857
+ if token.kind == Tag.END and token.name == "frameset":
1858
+ self._parse_error("unexpected-token-after-frameset")
1859
+ return None
1786
1860
  if token.kind == Tag.START and token.name == "noframes":
1787
1861
  # Insert noframes element directly and switch to TEXT mode
1788
1862
  self._insert_element(token, push=True)
@@ -1795,7 +1869,7 @@ class TreeBuilderModesMixin:
1795
1869
  self.mode = InsertionMode.IN_FRAMESET
1796
1870
  return ("reprocess", InsertionMode.IN_FRAMESET, token)
1797
1871
 
1798
- def _mode_after_after_frameset(self, token):
1872
+ def _mode_after_after_frameset(self, token: AnyToken) -> ModeResultTuple | None:
1799
1873
  # Per HTML5 spec §13.2.6.4.18: After after frameset insertion mode
1800
1874
  if isinstance(token, CharacterTokens):
1801
1875
  # Whitespace is processed using InBody rules
@@ -1826,7 +1900,7 @@ class TreeBuilderModesMixin:
1826
1900
 
1827
1901
  # Helpers ----------------------------------------------------------------
1828
1902
 
1829
- _MODE_HANDLERS = [
1903
+ _MODE_HANDLERS: list[Callable[[TreeBuilderModesMixin, AnyToken], ModeResultTuple | None]] = [
1830
1904
  _mode_initial,
1831
1905
  _mode_before_html,
1832
1906
  _mode_before_head,
@@ -1851,14 +1925,14 @@ class TreeBuilderModesMixin:
1851
1925
  _mode_in_template,
1852
1926
  ]
1853
1927
 
1854
- _BODY_TOKEN_HANDLERS = {
1928
+ _BODY_TOKEN_HANDLERS: dict[type[AnyToken], Callable[[TreeBuilderModesMixin, Any], ModeResultTuple | None]] = {
1855
1929
  CharacterTokens: _handle_characters_in_body,
1856
1930
  CommentToken: _handle_comment_in_body,
1857
1931
  Tag: _handle_tag_in_body,
1858
1932
  EOFToken: _handle_eof_in_body,
1859
1933
  }
1860
1934
 
1861
- _BODY_START_HANDLERS = {
1935
+ _BODY_START_HANDLERS: dict[str, Callable[[TreeBuilderModesMixin, Tag], ModeResultTuple | None]] = {
1862
1936
  "a": _handle_body_start_a,
1863
1937
  "address": _handle_body_start_block_with_p,
1864
1938
  "applet": _handle_body_start_applet_like,
@@ -1963,7 +2037,7 @@ class TreeBuilderModesMixin:
1963
2037
  "wbr": _handle_body_start_void_with_formatting,
1964
2038
  "xmp": _handle_body_start_plaintext_xmp,
1965
2039
  }
1966
- _BODY_END_HANDLERS = {
2040
+ _BODY_END_HANDLERS: dict[str, Callable[[TreeBuilderModesMixin, Tag], ModeResultTuple | None]] = {
1967
2041
  "address": _handle_body_end_block,
1968
2042
  "applet": _handle_body_end_applet_like,
1969
2043
  "article": _handle_body_end_block,