justhtml 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

@@ -0,0 +1,2016 @@
1
+ # ruff: noqa: S101, RUF012
2
+ # mypy: disable-error-code="attr-defined, has-type, var-annotated, assignment"
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Any
7
+
8
+ from .constants import (
9
+ FORMATTING_ELEMENTS,
10
+ HEADING_ELEMENTS,
11
+ )
12
+ from .node import SimpleDomNode, TemplateNode
13
+ from .tokens import CharacterTokens, CommentToken, EOFToken, Tag, TokenSinkResult
14
+ from .treebuilder_utils import (
15
+ InsertionMode,
16
+ doctype_error_and_quirks,
17
+ is_all_whitespace,
18
+ )
19
+
20
+
21
+ class TreeBuilderModesMixin:
22
+ def _handle_doctype(self, token: Any) -> Any:
23
+ if self.mode != InsertionMode.INITIAL:
24
+ self._parse_error("unexpected-doctype")
25
+ return TokenSinkResult.Continue
26
+
27
+ doctype = token.doctype
28
+ parse_error, quirks_mode = doctype_error_and_quirks(doctype, self.iframe_srcdoc)
29
+
30
+ node = SimpleDomNode("!doctype", data=doctype)
31
+ self.document.append_child(node)
32
+
33
+ if parse_error:
34
+ self._parse_error("unknown-doctype")
35
+
36
+ self._set_quirks_mode(quirks_mode)
37
+ self.mode = InsertionMode.BEFORE_HTML
38
+ return TokenSinkResult.Continue
39
+
40
+ def _mode_initial(self, token: Any) -> Any:
41
+ if isinstance(token, CharacterTokens):
42
+ if is_all_whitespace(token.data):
43
+ return None
44
+ self._parse_error("expected-doctype-but-got-chars")
45
+ self._set_quirks_mode("quirks")
46
+ return ("reprocess", InsertionMode.BEFORE_HTML, token)
47
+ if isinstance(token, CommentToken):
48
+ self._append_comment_to_document(token.data)
49
+ return None
50
+ if isinstance(token, EOFToken):
51
+ self._parse_error("expected-doctype-but-got-eof")
52
+ self._set_quirks_mode("quirks")
53
+ self.mode = InsertionMode.BEFORE_HTML
54
+ return ("reprocess", InsertionMode.BEFORE_HTML, token)
55
+ # Only Tags remain - no DOCTYPE seen, so quirks mode
56
+ if token.kind == Tag.START:
57
+ self._parse_error("expected-doctype-but-got-start-tag", tag_name=token.name, token=token)
58
+ else:
59
+ self._parse_error("expected-doctype-but-got-end-tag", tag_name=token.name, token=token)
60
+ self._set_quirks_mode("quirks")
61
+ return ("reprocess", InsertionMode.BEFORE_HTML, token)
62
+
63
+ def _mode_before_html(self, token: Any) -> Any:
64
+ if isinstance(token, CharacterTokens) and is_all_whitespace(token.data):
65
+ return None
66
+ if isinstance(token, CommentToken):
67
+ self._append_comment_to_document(token.data)
68
+ return None
69
+ if isinstance(token, Tag):
70
+ if token.kind == Tag.START and token.name == "html":
71
+ self._create_root(token.attrs)
72
+ self.mode = InsertionMode.BEFORE_HEAD
73
+ return None
74
+ if token.kind == Tag.END and token.name in {"head", "body", "html", "br"}:
75
+ self._create_root({})
76
+ self.mode = InsertionMode.BEFORE_HEAD
77
+ return ("reprocess", InsertionMode.BEFORE_HEAD, token)
78
+ if token.kind == Tag.END:
79
+ # Ignore other end tags
80
+ self._parse_error("unexpected-end-tag-before-html", tag_name=token.name)
81
+ return None
82
+ if isinstance(token, EOFToken):
83
+ self._create_root({})
84
+ self.mode = InsertionMode.BEFORE_HEAD
85
+ return ("reprocess", InsertionMode.BEFORE_HEAD, token)
86
+
87
+ if isinstance(token, CharacterTokens):
88
+ stripped = token.data.lstrip("\t\n\f\r ")
89
+ if len(stripped) != len(token.data):
90
+ token = CharacterTokens(stripped)
91
+
92
+ self._create_root({})
93
+ self.mode = InsertionMode.BEFORE_HEAD
94
+ return ("reprocess", InsertionMode.BEFORE_HEAD, token)
95
+
96
+ def _mode_before_head(self, token: Any) -> Any:
97
+ if isinstance(token, CharacterTokens):
98
+ data = token.data or ""
99
+ if "\x00" in data:
100
+ self._parse_error("invalid-codepoint-before-head")
101
+ data = data.replace("\x00", "")
102
+ if not data:
103
+ return None
104
+ if is_all_whitespace(data):
105
+ return None
106
+ token = CharacterTokens(data)
107
+ if isinstance(token, CommentToken):
108
+ self._append_comment(token.data)
109
+ return None
110
+ if isinstance(token, Tag):
111
+ if token.kind == Tag.START and token.name == "html":
112
+ # Duplicate html tag - add attributes to existing html element
113
+ # Note: open_elements[0] is always html at this point (created in BEFORE_HTML mode)
114
+ html = self.open_elements[0]
115
+ self._add_missing_attributes(html, token.attrs)
116
+ return None
117
+ if token.kind == Tag.START and token.name == "head":
118
+ head = self._insert_element(token, push=True)
119
+ self.head_element = head
120
+ self.mode = InsertionMode.IN_HEAD
121
+ return None
122
+ if token.kind == Tag.END and token.name in {"head", "body", "html", "br"}:
123
+ self.head_element = self._insert_phantom("head")
124
+ self.mode = InsertionMode.IN_HEAD
125
+ return ("reprocess", InsertionMode.IN_HEAD, token)
126
+ if token.kind == Tag.END:
127
+ # Ignore other end tags
128
+ self._parse_error("unexpected-end-tag-before-head", tag_name=token.name)
129
+ return None
130
+ if isinstance(token, EOFToken):
131
+ self.head_element = self._insert_phantom("head")
132
+ self.mode = InsertionMode.IN_HEAD
133
+ return ("reprocess", InsertionMode.IN_HEAD, token)
134
+
135
+ self.head_element = self._insert_phantom("head")
136
+ self.mode = InsertionMode.IN_HEAD
137
+ return ("reprocess", InsertionMode.IN_HEAD, token)
138
+
139
+ def _mode_in_head(self, token: Any) -> Any:
140
+ if isinstance(token, CharacterTokens):
141
+ if is_all_whitespace(token.data):
142
+ self._append_text(token.data)
143
+ return None
144
+ data = token.data or ""
145
+ i = 0
146
+ while i < len(data) and data[i] in "\t\n\f\r ":
147
+ i += 1
148
+ leading_ws = data[:i]
149
+ remaining = data[i:]
150
+ if leading_ws:
151
+ current = self.open_elements[-1] if self.open_elements else None
152
+ if current is not None and current.has_child_nodes():
153
+ self._append_text(leading_ws)
154
+ self._pop_current()
155
+ self.mode = InsertionMode.AFTER_HEAD
156
+ return ("reprocess", InsertionMode.AFTER_HEAD, CharacterTokens(remaining))
157
+ if isinstance(token, CommentToken):
158
+ self._append_comment(token.data)
159
+ return None
160
+ if isinstance(token, Tag):
161
+ if token.kind == Tag.START and token.name == "html":
162
+ # Pop head and transition to AFTER_HEAD, then reprocess
163
+ self._pop_current()
164
+ self.mode = InsertionMode.AFTER_HEAD
165
+ return ("reprocess", InsertionMode.AFTER_HEAD, token)
166
+ if token.kind == Tag.START and token.name in {"base", "basefont", "bgsound", "link", "meta"}:
167
+ self._insert_element(token, push=False)
168
+ return None
169
+ if token.kind == Tag.START and token.name == "template":
170
+ self._insert_element(token, push=True)
171
+ self._push_formatting_marker()
172
+ self.frameset_ok = False
173
+ self.mode = InsertionMode.IN_TEMPLATE
174
+ self.template_modes.append(InsertionMode.IN_TEMPLATE)
175
+ return None
176
+ if token.kind == Tag.END and token.name == "template":
177
+ # Check if template is on the stack (don't use scope check as table blocks it)
178
+ has_template = any(node.name == "template" for node in self.open_elements)
179
+ if not has_template:
180
+ return None
181
+ self._generate_implied_end_tags()
182
+ self._pop_until_inclusive("template")
183
+ self._clear_active_formatting_up_to_marker()
184
+ # template_modes always non-empty here since we passed has_template check
185
+ self.template_modes.pop()
186
+ self._reset_insertion_mode()
187
+ return None
188
+ if token.kind == Tag.START and token.name in {"title", "style", "script", "noframes"}:
189
+ self._insert_element(token, push=True)
190
+ self.original_mode = self.mode
191
+ self.mode = InsertionMode.TEXT
192
+ return None
193
+ if token.kind == Tag.START and token.name == "noscript":
194
+ # Scripting is disabled: parse noscript content as HTML
195
+ self._insert_element(token, push=True)
196
+ self.mode = InsertionMode.IN_HEAD_NOSCRIPT
197
+ return None
198
+ if token.kind == Tag.END and token.name == "head":
199
+ self._pop_current()
200
+ self.mode = InsertionMode.AFTER_HEAD
201
+ return None
202
+ if token.kind == Tag.END and token.name in {"body", "html", "br"}:
203
+ self._pop_current()
204
+ self.mode = InsertionMode.AFTER_HEAD
205
+ return ("reprocess", InsertionMode.AFTER_HEAD, token)
206
+ if isinstance(token, EOFToken):
207
+ self._pop_current()
208
+ self.mode = InsertionMode.AFTER_HEAD
209
+ return ("reprocess", InsertionMode.AFTER_HEAD, token)
210
+
211
+ self._pop_current()
212
+ self.mode = InsertionMode.AFTER_HEAD
213
+ return ("reprocess", InsertionMode.AFTER_HEAD, token)
214
+
215
+ def _mode_in_head_noscript(self, token: Any) -> Any:
216
+ """Handle tokens in 'in head noscript' insertion mode (scripting disabled)."""
217
+ if isinstance(token, CharacterTokens):
218
+ data = token.data or ""
219
+ # Whitespace: process using in head rules
220
+ if is_all_whitespace(data):
221
+ return self._mode_in_head(token)
222
+ # Non-whitespace: parse error, pop noscript, reprocess in head
223
+ self._parse_error("unexpected-start-tag", tag_name="text")
224
+ self._pop_current() # Pop noscript
225
+ self.mode = InsertionMode.IN_HEAD
226
+ return ("reprocess", InsertionMode.IN_HEAD, token)
227
+ if isinstance(token, CommentToken):
228
+ return self._mode_in_head(token)
229
+ if isinstance(token, Tag):
230
+ if token.kind == Tag.START:
231
+ if token.name == "html":
232
+ return self._mode_in_body(token)
233
+ if token.name in {"basefont", "bgsound", "link", "meta", "noframes", "style"}:
234
+ return self._mode_in_head(token)
235
+ if token.name in {"head", "noscript"}:
236
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
237
+ return None # Ignore
238
+ # Any other start tag: parse error, pop noscript, reprocess in head
239
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
240
+ self._pop_current() # Pop noscript
241
+ self.mode = InsertionMode.IN_HEAD
242
+ return ("reprocess", InsertionMode.IN_HEAD, token)
243
+ # token.kind == Tag.END
244
+ if token.name == "noscript":
245
+ self._pop_current() # Pop noscript
246
+ self.mode = InsertionMode.IN_HEAD
247
+ return None
248
+ if token.name == "br":
249
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
250
+ self._pop_current() # Pop noscript
251
+ self.mode = InsertionMode.IN_HEAD
252
+ return ("reprocess", InsertionMode.IN_HEAD, token)
253
+ # Any other end tag: parse error, ignore
254
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
255
+ return None
256
+ if isinstance(token, EOFToken):
257
+ self._parse_error("expected-closing-tag-but-got-eof", tag_name="noscript")
258
+ self._pop_current() # Pop noscript
259
+ self.mode = InsertionMode.IN_HEAD
260
+ return ("reprocess", InsertionMode.IN_HEAD, token)
261
+ # All token types are handled above - CharacterTokens, CommentToken, Tag, EOFToken
262
+ return None # pragma: no cover
263
+
264
+ def _mode_after_head(self, token: Any) -> Any:
265
+ if isinstance(token, CharacterTokens):
266
+ data = token.data or ""
267
+ if "\x00" in data:
268
+ self._parse_error("invalid-codepoint-in-body")
269
+ data = data.replace("\x00", "")
270
+ if "\x0c" in data:
271
+ self._parse_error("invalid-codepoint-in-body")
272
+ data = data.replace("\x0c", "")
273
+ if not data or is_all_whitespace(data):
274
+ if data:
275
+ self._append_text(data)
276
+ return None
277
+ self._insert_body_if_missing()
278
+ return ("reprocess", InsertionMode.IN_BODY, CharacterTokens(data))
279
+ if isinstance(token, CommentToken):
280
+ self._append_comment(token.data)
281
+ return None
282
+ if isinstance(token, Tag):
283
+ if token.kind == Tag.START and token.name == "html":
284
+ self._insert_body_if_missing()
285
+ return ("reprocess", InsertionMode.IN_BODY, token)
286
+ if token.kind == Tag.START and token.name == "body":
287
+ self._insert_element(token, push=True)
288
+ self.mode = InsertionMode.IN_BODY
289
+ self.frameset_ok = False
290
+ return None
291
+ if token.kind == Tag.START and token.name == "frameset":
292
+ self._insert_element(token, push=True)
293
+ self.mode = InsertionMode.IN_FRAMESET
294
+ return None
295
+ # Special handling: input type="hidden" doesn't create body or affect frameset_ok
296
+ if token.kind == Tag.START and token.name == "input":
297
+ input_type = None
298
+ for name, value in token.attrs.items():
299
+ if name == "type":
300
+ input_type = (value or "").lower()
301
+ break
302
+ if input_type == "hidden":
303
+ # Parse error but ignore - don't create body, don't insert element
304
+ self._parse_error("unexpected-hidden-input-after-head")
305
+ return None
306
+ # Non-hidden input creates body
307
+ self._insert_body_if_missing()
308
+ return ("reprocess", InsertionMode.IN_BODY, token)
309
+ if token.kind == Tag.START and token.name in {
310
+ "base",
311
+ "basefont",
312
+ "bgsound",
313
+ "link",
314
+ "meta",
315
+ "title",
316
+ "style",
317
+ "script",
318
+ "noscript",
319
+ }:
320
+ self.open_elements.append(self.head_element)
321
+ result = self._mode_in_head(token)
322
+ # Remove the head element from wherever it is in the stack
323
+ # (it might not be at the end if we inserted other elements like <title>)
324
+ self.open_elements.remove(self.head_element)
325
+ return result
326
+ if token.kind == Tag.START and token.name == "template":
327
+ # Template in after-head needs special handling:
328
+ # Process in IN_HEAD mode, which will switch to IN_TEMPLATE
329
+ # Don't remove head from stack - let normal processing continue
330
+ self.open_elements.append(self.head_element)
331
+ self.mode = InsertionMode.IN_HEAD
332
+ return ("reprocess", InsertionMode.IN_HEAD, token)
333
+ if token.kind == Tag.END and token.name == "template":
334
+ return self._mode_in_head(token)
335
+ if token.kind == Tag.END and token.name == "body":
336
+ self._insert_body_if_missing()
337
+ return ("reprocess", InsertionMode.IN_BODY, token)
338
+ if token.kind == Tag.END and token.name in {"html", "br"}:
339
+ self._insert_body_if_missing()
340
+ return ("reprocess", InsertionMode.IN_BODY, token)
341
+ if token.kind == Tag.END:
342
+ # Ignore other end tags
343
+ self._parse_error("unexpected-end-tag-after-head", tag_name=token.name)
344
+ return None
345
+ if isinstance(token, EOFToken):
346
+ self._insert_body_if_missing()
347
+ self.mode = InsertionMode.IN_BODY
348
+ return ("reprocess", InsertionMode.IN_BODY, token)
349
+
350
+ self._insert_body_if_missing()
351
+ return ("reprocess", InsertionMode.IN_BODY, token)
352
+
353
+ def _mode_text(self, token: Any) -> Any:
354
+ if isinstance(token, CharacterTokens):
355
+ self._append_text(token.data)
356
+ return None
357
+ if isinstance(token, EOFToken):
358
+ # Get the tag name of the unclosed element
359
+ tag_name = self.open_elements[-1].name if self.open_elements else None
360
+ self._parse_error("expected-named-closing-tag-but-got-eof", tag_name=tag_name)
361
+ self._pop_current()
362
+ self.mode = self.original_mode or InsertionMode.IN_BODY
363
+ return ("reprocess", self.mode, token)
364
+ # End tag
365
+ self._pop_current()
366
+ self.mode = self.original_mode or InsertionMode.IN_BODY
367
+ return None
368
+
369
+ def _mode_in_body(self, token: Any) -> Any:
370
+ handler = self._BODY_TOKEN_HANDLERS.get(type(token))
371
+ return handler(self, token) if handler else None
372
+
373
+ def _handle_characters_in_body(self, token: Any) -> Any:
374
+ data = token.data or ""
375
+ if "\x00" in data:
376
+ self._parse_error("invalid-codepoint")
377
+ data = data.replace("\x00", "")
378
+ if is_all_whitespace(data):
379
+ self._reconstruct_active_formatting_elements()
380
+ self._append_text(data)
381
+ return
382
+ self._reconstruct_active_formatting_elements()
383
+ self.frameset_ok = False
384
+ self._append_text(data)
385
+ return
386
+
387
+ def _handle_comment_in_body(self, token: Any) -> Any:
388
+ self._append_comment(token.data)
389
+ return
390
+
391
+ def _handle_tag_in_body(self, token: Any) -> Any:
392
+ if token.kind == Tag.START:
393
+ handler = self._BODY_START_HANDLERS.get(token.name)
394
+ if handler:
395
+ return handler(self, token)
396
+ return self._handle_body_start_default(token)
397
+ name = token.name
398
+
399
+ # Special case: </br> end tag is treated as <br> start tag
400
+ if name == "br":
401
+ self._parse_error("unexpected-end-tag", tag_name=name, token=token)
402
+ br_tag = Tag(Tag.START, "br", {}, False)
403
+ return self._mode_in_body(br_tag)
404
+
405
+ if name in FORMATTING_ELEMENTS:
406
+ self._adoption_agency(name)
407
+ return None
408
+ handler = self._BODY_END_HANDLERS.get(name)
409
+ if handler:
410
+ return handler(self, token)
411
+ # Any other end tag
412
+ self._any_other_end_tag(token.name)
413
+ return None
414
+
415
+ def _handle_eof_in_body(self, token: Any) -> Any:
416
+ # If we're in a template, handle EOF in template mode first
417
+ if self.template_modes:
418
+ return self._mode_in_template(token)
419
+ # Check for unclosed elements (excluding html, body, head which are implicit)
420
+ for node in self.open_elements:
421
+ if node.name not in {
422
+ "dd",
423
+ "dt",
424
+ "li",
425
+ "optgroup",
426
+ "option",
427
+ "p",
428
+ "rb",
429
+ "rp",
430
+ "rt",
431
+ "rtc",
432
+ "tbody",
433
+ "td",
434
+ "tfoot",
435
+ "th",
436
+ "thead",
437
+ "tr",
438
+ "body",
439
+ "html",
440
+ }:
441
+ self._parse_error("expected-closing-tag-but-got-eof", tag_name=node.name)
442
+ break
443
+ self.mode = InsertionMode.AFTER_BODY
444
+ return ("reprocess", InsertionMode.AFTER_BODY, token)
445
+
446
+ # ---------------------
447
+ # Body mode start tag handlers
448
+ # ---------------------
449
+
450
+ def _handle_body_start_html(self, token: Any) -> Any:
451
+ if self.template_modes:
452
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
453
+ return
454
+ # In IN_BODY mode, html element is always at open_elements[0]
455
+ if self.open_elements: # pragma: no branch
456
+ html = self.open_elements[0]
457
+ self._add_missing_attributes(html, token.attrs)
458
+ return
459
+
460
+ def _handle_body_start_body(self, token: Any) -> Any:
461
+ if self.template_modes:
462
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
463
+ return
464
+ if len(self.open_elements) > 1:
465
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
466
+ body = self.open_elements[1] if len(self.open_elements) > 1 else None
467
+ if body and body.name == "body":
468
+ self._add_missing_attributes(body, token.attrs)
469
+ self.frameset_ok = False
470
+ return
471
+ self.frameset_ok = False
472
+ return
473
+
474
+ def _handle_body_start_head(self, token: Any) -> Any:
475
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
476
+ return
477
+
478
+ def _handle_body_start_in_head(self, token: Any) -> Any:
479
+ return self._mode_in_head(token)
480
+
481
+ def _handle_body_start_block_with_p(self, token: Any) -> Any:
482
+ self._close_p_element()
483
+ self._insert_element(token, push=True)
484
+ return
485
+
486
+ def _handle_body_start_heading(self, token: Any) -> Any:
487
+ self._close_p_element()
488
+ if self.open_elements and self.open_elements[-1].name in HEADING_ELEMENTS:
489
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
490
+ self._pop_current()
491
+ self._insert_element(token, push=True)
492
+ self.frameset_ok = False
493
+ return
494
+
495
+ def _handle_body_start_pre_listing(self, token: Any) -> Any:
496
+ self._close_p_element()
497
+ self._insert_element(token, push=True)
498
+ self.ignore_lf = True
499
+ self.frameset_ok = False
500
+ return
501
+
502
+ def _handle_body_start_form(self, token: Any) -> Any:
503
+ if self.form_element is not None:
504
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
505
+ return
506
+ self._close_p_element()
507
+ node = self._insert_element(token, push=True)
508
+ self.form_element = node
509
+ self.frameset_ok = False
510
+ return
511
+
512
+ def _handle_body_start_button(self, token: Any) -> Any:
513
+ if self._has_in_scope("button"):
514
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=token.name)
515
+ self._close_element_by_name("button")
516
+ self._insert_element(token, push=True)
517
+ self.frameset_ok = False
518
+ return
519
+
520
+ def _handle_body_start_paragraph(self, token: Any) -> Any:
521
+ self._close_p_element()
522
+ self._insert_element(token, push=True)
523
+ return
524
+
525
+ def _handle_body_start_math(self, token: Any) -> Any:
526
+ self._reconstruct_active_formatting_elements()
527
+ attrs = self._prepare_foreign_attributes("math", token.attrs)
528
+ new_tag = Tag(Tag.START, token.name, attrs, token.self_closing)
529
+ self._insert_element(new_tag, push=not token.self_closing, namespace="math")
530
+ return
531
+
532
+ def _handle_body_start_svg(self, token: Any) -> Any:
533
+ self._reconstruct_active_formatting_elements()
534
+ adjusted_name = self._adjust_svg_tag_name(token.name)
535
+ attrs = self._prepare_foreign_attributes("svg", token.attrs)
536
+ new_tag = Tag(Tag.START, adjusted_name, attrs, token.self_closing)
537
+ self._insert_element(new_tag, push=not token.self_closing, namespace="svg")
538
+ return
539
+
540
+ def _handle_body_start_li(self, token: Any) -> Any:
541
+ self.frameset_ok = False
542
+ self._close_p_element()
543
+ if self._has_in_list_item_scope("li"):
544
+ self._pop_until_any_inclusive({"li"})
545
+ self._insert_element(token, push=True)
546
+ return
547
+
548
+ def _handle_body_start_dd_dt(self, token: Any) -> Any:
549
+ self.frameset_ok = False
550
+ self._close_p_element()
551
+ name = token.name
552
+ if name == "dd":
553
+ if self._has_in_definition_scope("dd"):
554
+ self._pop_until_any_inclusive({"dd"})
555
+ if self._has_in_definition_scope("dt"):
556
+ self._pop_until_any_inclusive({"dt"})
557
+ else:
558
+ if self._has_in_definition_scope("dt"):
559
+ self._pop_until_any_inclusive({"dt"})
560
+ if self._has_in_definition_scope("dd"):
561
+ self._pop_until_any_inclusive({"dd"})
562
+ self._insert_element(token, push=True)
563
+ return
564
+
565
+ def _adoption_agency(self, subject: Any) -> None:
566
+ # 1. If the current node is the subject, and it is not in the active formatting elements list...
567
+ if self.open_elements and self.open_elements[-1].name == subject:
568
+ if not self._has_active_formatting_entry(subject):
569
+ self._pop_until_inclusive(subject)
570
+ return
571
+
572
+ # 2. Outer loop
573
+ for _ in range(8):
574
+ # 3. Find formatting element
575
+ formatting_element_index = self._find_active_formatting_index(subject)
576
+ if formatting_element_index is None:
577
+ return
578
+
579
+ formatting_element_entry = self.active_formatting[formatting_element_index]
580
+ formatting_element = formatting_element_entry["node"]
581
+
582
+ # 4. If formatting element is not in open elements
583
+ if formatting_element not in self.open_elements:
584
+ self._parse_error("adoption-agency-1.3")
585
+ self._remove_formatting_entry(formatting_element_index)
586
+ return
587
+
588
+ # 5. If formatting element is in open elements but not in scope
589
+ if not self._has_element_in_scope(formatting_element.name):
590
+ self._parse_error("adoption-agency-1.3")
591
+ return
592
+
593
+ # 6. If formatting element is not the current node
594
+ if formatting_element is not self.open_elements[-1]:
595
+ self._parse_error("adoption-agency-1.3")
596
+
597
+ # 7. Find furthest block
598
+ furthest_block = None
599
+ formatting_element_in_open_index = self.open_elements.index(formatting_element)
600
+
601
+ for i in range(formatting_element_in_open_index + 1, len(self.open_elements)):
602
+ node = self.open_elements[i]
603
+ if self._is_special_element(node):
604
+ furthest_block = node
605
+ break
606
+
607
+ if furthest_block is None:
608
+ # formatting_element is known to be on the stack
609
+ while True:
610
+ popped = self.open_elements.pop()
611
+ if popped is formatting_element:
612
+ break
613
+ self._remove_formatting_entry(formatting_element_index)
614
+ return
615
+
616
+ # 8. Bookmark
617
+ bookmark = formatting_element_index + 1
618
+
619
+ # 9. Node and Last Node
620
+ node = furthest_block
621
+ last_node = furthest_block
622
+
623
+ # 10. Inner loop
624
+ inner_loop_counter = 0
625
+ while True:
626
+ inner_loop_counter += 1
627
+
628
+ # 10.1 Node = element above node
629
+ node_index = self.open_elements.index(node)
630
+ node = self.open_elements[node_index - 1]
631
+
632
+ # 10.2 If node is formatting element, break
633
+ if node is formatting_element:
634
+ break
635
+
636
+ # 10.3 Find active formatting entry for node
637
+ node_formatting_index = self._find_active_formatting_index_by_node(node)
638
+
639
+ if inner_loop_counter > 3 and node_formatting_index is not None:
640
+ self._remove_formatting_entry(node_formatting_index)
641
+ if node_formatting_index < bookmark:
642
+ bookmark -= 1
643
+ node_formatting_index = None
644
+
645
+ if node_formatting_index is None:
646
+ node_index = self.open_elements.index(node)
647
+ self.open_elements.remove(node)
648
+ node = self.open_elements[node_index]
649
+ continue
650
+
651
+ # 10.4 Replace entry with new element
652
+ entry = self.active_formatting[node_formatting_index]
653
+ new_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
654
+ entry["node"] = new_element
655
+ self.open_elements[self.open_elements.index(node)] = new_element
656
+ node = new_element
657
+
658
+ # 10.5 If last node is furthest block, update bookmark
659
+ if last_node is furthest_block:
660
+ bookmark = node_formatting_index + 1
661
+
662
+ # 10.6 Reparent last_node
663
+ if last_node.parent:
664
+ last_node.parent.remove_child(last_node)
665
+ node.append_child(last_node)
666
+
667
+ # 10.7
668
+ last_node = node
669
+
670
+ # 11. Insert last_node into common ancestor
671
+ common_ancestor = self.open_elements[formatting_element_in_open_index - 1]
672
+ if last_node.parent:
673
+ last_node.parent.remove_child(last_node)
674
+
675
+ if self._should_foster_parenting(common_ancestor, for_tag=last_node.name):
676
+ parent, position = self._appropriate_insertion_location(common_ancestor, foster_parenting=True)
677
+ self._insert_node_at(parent, position, last_node)
678
+ else:
679
+ if type(common_ancestor) is TemplateNode and common_ancestor.template_content:
680
+ common_ancestor.template_content.append_child(last_node)
681
+ else:
682
+ common_ancestor.append_child(last_node)
683
+
684
+ # 12. Create new formatting element
685
+ entry = self.active_formatting[formatting_element_index]
686
+ new_formatting_element = self._create_element(entry["name"], entry["node"].namespace, entry["attrs"])
687
+ entry["node"] = new_formatting_element
688
+
689
+ # 13. Move children of furthest block
690
+ while furthest_block.has_child_nodes():
691
+ child = furthest_block.children[0]
692
+ furthest_block.remove_child(child)
693
+ new_formatting_element.append_child(child)
694
+
695
+ furthest_block.append_child(new_formatting_element)
696
+
697
+ # 14. Remove formatting element from active formatting and insert new at bookmark
698
+ # Per spec, bookmark is always > formatting_element_index (starts at fmt_idx+1,
699
+ # can only be set to higher values or decremented when entries above fmt_idx are removed)
700
+ self._remove_formatting_entry(formatting_element_index)
701
+ bookmark -= 1
702
+ self.active_formatting.insert(bookmark, entry)
703
+
704
+ # 15. Remove formatting element from open elements and insert new one
705
+ self.open_elements.remove(formatting_element)
706
+ furthest_block_index = self.open_elements.index(furthest_block)
707
+ self.open_elements.insert(furthest_block_index + 1, new_formatting_element)
708
+
709
+ def _handle_body_start_a(self, token: Any) -> Any:
710
+ if self._has_active_formatting_entry("a"):
711
+ self._adoption_agency("a")
712
+ self._remove_last_active_formatting_by_name("a")
713
+ self._remove_last_open_element_by_name("a")
714
+ self._reconstruct_active_formatting_elements()
715
+ node = self._insert_element(token, push=True)
716
+ self._append_active_formatting_entry("a", token.attrs, node)
717
+ return
718
+
719
+ def _handle_body_start_formatting(self, token: Any) -> Any:
720
+ name = token.name
721
+ if name == "nobr" and self._in_scope("nobr"):
722
+ self._adoption_agency("nobr")
723
+ self._remove_last_active_formatting_by_name("nobr")
724
+ self._remove_last_open_element_by_name("nobr")
725
+ self._reconstruct_active_formatting_elements()
726
+ duplicate_index = self._find_active_formatting_duplicate(name, token.attrs)
727
+ if duplicate_index is not None:
728
+ self._remove_formatting_entry(duplicate_index)
729
+ node = self._insert_element(token, push=True)
730
+ self._append_active_formatting_entry(name, token.attrs, node)
731
+ return
732
+
733
+ def _handle_body_start_applet_like(self, token: Any) -> Any:
734
+ self._reconstruct_active_formatting_elements()
735
+ self._insert_element(token, push=True)
736
+ self._push_formatting_marker()
737
+ self.frameset_ok = False
738
+ return
739
+
740
+ def _handle_body_start_br(self, token: Any) -> Any:
741
+ self._close_p_element()
742
+ self._reconstruct_active_formatting_elements()
743
+ self._insert_element(token, push=False)
744
+ self.frameset_ok = False
745
+ return
746
+
747
+ def _handle_body_start_frameset(self, token: Any) -> Any:
748
+ if not self.frameset_ok:
749
+ self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
750
+ return
751
+ # Find body element on the stack (may not exist if already in frameset)
752
+ body_index = None
753
+ for i, elem in enumerate(self.open_elements):
754
+ if elem.name == "body":
755
+ body_index = i
756
+ break
757
+ if body_index is None:
758
+ # No body on stack (e.g., nested frameset after mode reset), ignore
759
+ self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
760
+ return
761
+ body_elem = self.open_elements[body_index]
762
+ body_elem.parent.remove_child(body_elem)
763
+ self.open_elements = self.open_elements[:body_index]
764
+ self._insert_element(token, push=True)
765
+ self.mode = InsertionMode.IN_FRAMESET
766
+ return
767
+
768
+ # ---------------------
769
+ # Body mode end tag handlers
770
+ # ---------------------
771
+
772
+ def _handle_body_end_body(self, token: Any) -> Any:
773
+ if self._in_scope("body"):
774
+ self.mode = InsertionMode.AFTER_BODY
775
+ return
776
+
777
+ def _handle_body_end_html(self, token: Any) -> Any:
778
+ if self._in_scope("body"):
779
+ return ("reprocess", InsertionMode.AFTER_BODY, token)
780
+ return None
781
+
782
+ def _handle_body_end_p(self, token: Any) -> Any:
783
+ if not self._close_p_element():
784
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
785
+ phantom = Tag(Tag.START, "p", {}, False)
786
+ self._insert_element(phantom, push=True)
787
+ self._close_p_element()
788
+ return
789
+
790
+ def _handle_body_end_li(self, token: Any) -> Any:
791
+ if not self._has_in_list_item_scope("li"):
792
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
793
+ return
794
+ self._pop_until_any_inclusive({"li"})
795
+ return
796
+
797
+ def _handle_body_end_dd_dt(self, token: Any) -> Any:
798
+ name = token.name
799
+ if not self._has_in_definition_scope(name):
800
+ self._parse_error("unexpected-end-tag", tag_name=name)
801
+ return
802
+ self._pop_until_any_inclusive({"dd", "dt"})
803
+
804
+ def _handle_body_end_form(self, token: Any) -> Any:
805
+ if self.form_element is None:
806
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
807
+ return
808
+ removed = self._remove_from_open_elements(self.form_element)
809
+ self.form_element = None
810
+ if not removed:
811
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
812
+ return
813
+
814
+ def _handle_body_end_applet_like(self, token: Any) -> Any:
815
+ name = token.name
816
+ if not self._in_scope(name):
817
+ self._parse_error("unexpected-end-tag", tag_name=name)
818
+ return
819
+ # Element verified in scope above
820
+ while self.open_elements: # pragma: no branch
821
+ popped = self.open_elements.pop()
822
+ if popped.name == name:
823
+ break
824
+ self._clear_active_formatting_up_to_marker()
825
+ return
826
+
827
+ def _handle_body_end_heading(self, token: Any) -> Any:
828
+ name = token.name
829
+ if not self._has_any_in_scope(HEADING_ELEMENTS):
830
+ self._parse_error("unexpected-end-tag", tag_name=name)
831
+ return
832
+ self._generate_implied_end_tags()
833
+ if self.open_elements and self.open_elements[-1].name != name:
834
+ self._parse_error("end-tag-too-early", tag_name=name)
835
+ # Heading verified in scope by caller
836
+ while self.open_elements: # pragma: no branch
837
+ popped = self.open_elements.pop()
838
+ if popped.name in HEADING_ELEMENTS:
839
+ break
840
+ return
841
+
842
+ def _handle_body_end_block(self, token: Any) -> Any:
843
+ name = token.name
844
+ if not self._in_scope(name):
845
+ self._parse_error("unexpected-end-tag", tag_name=name)
846
+ return
847
+ self._generate_implied_end_tags()
848
+ if self.open_elements and self.open_elements[-1].name != name:
849
+ self._parse_error("end-tag-too-early", tag_name=name)
850
+ self._pop_until_any_inclusive({name})
851
+ return
852
+
853
+ def _handle_body_end_template(self, token: Any) -> Any:
854
+ has_template = any(node.name == "template" for node in self.open_elements)
855
+ if not has_template:
856
+ return
857
+ self._generate_implied_end_tags()
858
+ self._pop_until_inclusive("template")
859
+ self._clear_active_formatting_up_to_marker()
860
+ # Pop template mode if available
861
+ if self.template_modes: # pragma: no branch
862
+ self.template_modes.pop()
863
+ self._reset_insertion_mode()
864
+ return
865
+
866
+ def _handle_body_start_structure_ignored(self, token: Any) -> Any:
867
+ self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
868
+ return
869
+
870
+ def _handle_body_start_col_or_frame(self, token: Any) -> Any:
871
+ if self.fragment_context is None:
872
+ self._parse_error("unexpected-start-tag-ignored", tag_name=token.name)
873
+ return
874
+ self._insert_element(token, push=False)
875
+ return
876
+
877
+ def _handle_body_start_image(self, token: Any) -> Any:
878
+ self._parse_error("image-start-tag", tag_name=token.name)
879
+ img_token = Tag(Tag.START, "img", token.attrs, token.self_closing)
880
+ self._reconstruct_active_formatting_elements()
881
+ self._insert_element(img_token, push=False)
882
+ self.frameset_ok = False
883
+ return
884
+
885
+ def _handle_body_start_void_with_formatting(self, token: Any) -> Any:
886
+ self._reconstruct_active_formatting_elements()
887
+ self._insert_element(token, push=False)
888
+ self.frameset_ok = False
889
+ return
890
+
891
+ def _handle_body_start_simple_void(self, token: Any) -> Any:
892
+ self._insert_element(token, push=False)
893
+ return
894
+
895
+ def _handle_body_start_input(self, token: Any) -> Any:
896
+ input_type = None
897
+ for name, value in token.attrs.items():
898
+ if name == "type":
899
+ input_type = (value or "").lower()
900
+ break
901
+ self._insert_element(token, push=False)
902
+ if input_type != "hidden":
903
+ self.frameset_ok = False
904
+ return
905
+
906
+ def _handle_body_start_table(self, token: Any) -> Any:
907
+ if self.quirks_mode != "quirks":
908
+ self._close_p_element()
909
+ self._insert_element(token, push=True)
910
+ self.frameset_ok = False
911
+ self.mode = InsertionMode.IN_TABLE
912
+ return
913
+
914
+ def _handle_body_start_plaintext_xmp(self, token: Any) -> Any:
915
+ self._close_p_element()
916
+ self._insert_element(token, push=True)
917
+ self.frameset_ok = False
918
+ if token.name == "plaintext":
919
+ self.tokenizer_state_override = TokenSinkResult.Plaintext
920
+ else:
921
+ # xmp, iframe, noembed, noframes, noscript (scripting disabled)
922
+ self.original_mode = self.mode
923
+ self.mode = InsertionMode.TEXT
924
+ return
925
+
926
+ def _handle_body_start_textarea(self, token: Any) -> Any:
927
+ self._insert_element(token, push=True)
928
+ self.ignore_lf = True
929
+ self.frameset_ok = False
930
+ return
931
+
932
+ def _handle_body_start_select(self, token: Any) -> Any:
933
+ self._reconstruct_active_formatting_elements()
934
+ self._insert_element(token, push=True)
935
+ self.frameset_ok = False
936
+ self._reset_insertion_mode()
937
+ return
938
+
939
+ def _handle_body_start_option(self, token: Any) -> Any:
940
+ if self.open_elements and self.open_elements[-1].name == "option":
941
+ self.open_elements.pop()
942
+ self._reconstruct_active_formatting_elements()
943
+ self._insert_element(token, push=True)
944
+ return
945
+
946
+ def _handle_body_start_optgroup(self, token: Any) -> Any:
947
+ if self.open_elements and self.open_elements[-1].name == "option":
948
+ self.open_elements.pop()
949
+ self._reconstruct_active_formatting_elements()
950
+ self._insert_element(token, push=True)
951
+ return
952
+
953
+ def _handle_body_start_rp_rt(self, token: Any) -> Any:
954
+ self._generate_implied_end_tags(exclude="rtc")
955
+ self._insert_element(token, push=True)
956
+ return
957
+
958
+ def _handle_body_start_rb_rtc(self, token: Any) -> Any:
959
+ if self.open_elements and self.open_elements[-1].name in {"rb", "rp", "rt", "rtc"}:
960
+ self._generate_implied_end_tags()
961
+ self._insert_element(token, push=True)
962
+ return
963
+
964
+ def _handle_body_start_table_parse_error(self, token: Any) -> Any:
965
+ self._parse_error("unexpected-start-tag", tag_name=token.name)
966
+ return
967
+
968
+ def _handle_body_start_default(self, token: Any) -> Any:
969
+ self._reconstruct_active_formatting_elements()
970
+ self._insert_element(token, push=True)
971
+ if token.self_closing:
972
+ self._parse_error("non-void-html-element-start-tag-with-trailing-solidus", tag_name=token.name)
973
+ # Elements reaching here have no handler - never in FRAMESET_NEUTRAL/FORMATTING_ELEMENTS
974
+ self.frameset_ok = False
975
+ return
976
+
977
+ def _mode_in_table(self, token: Any) -> Any:
978
+ if isinstance(token, CharacterTokens):
979
+ data = token.data or ""
980
+ if "\x00" in data:
981
+ self._parse_error("unexpected-null-character")
982
+ data = data.replace("\x00", "")
983
+ if not data:
984
+ return None
985
+ token = CharacterTokens(data)
986
+ self.pending_table_text = []
987
+ self.table_text_original_mode = self.mode
988
+ self.mode = InsertionMode.IN_TABLE_TEXT
989
+ return ("reprocess", InsertionMode.IN_TABLE_TEXT, token)
990
+ if isinstance(token, CommentToken):
991
+ self._append_comment(token.data)
992
+ return None
993
+ if isinstance(token, Tag):
994
+ name = token.name
995
+ if token.kind == Tag.START:
996
+ if name == "caption":
997
+ self._clear_stack_until({"table", "template", "html"})
998
+ self._push_formatting_marker()
999
+ self._insert_element(token, push=True)
1000
+ self.mode = InsertionMode.IN_CAPTION
1001
+ return None
1002
+ if name == "colgroup":
1003
+ self._clear_stack_until({"table", "template", "html"})
1004
+ self._insert_element(token, push=True)
1005
+ self.mode = InsertionMode.IN_COLUMN_GROUP
1006
+ return None
1007
+ if name == "col":
1008
+ self._clear_stack_until({"table", "template", "html"})
1009
+ implied = Tag(Tag.START, "colgroup", {}, False)
1010
+ self._insert_element(implied, push=True)
1011
+ self.mode = InsertionMode.IN_COLUMN_GROUP
1012
+ return ("reprocess", InsertionMode.IN_COLUMN_GROUP, token)
1013
+ if name in {"tbody", "tfoot", "thead"}:
1014
+ self._clear_stack_until({"table", "template", "html"})
1015
+ self._insert_element(token, push=True)
1016
+ self.mode = InsertionMode.IN_TABLE_BODY
1017
+ return None
1018
+ if name in {"td", "th", "tr"}:
1019
+ self._clear_stack_until({"table", "template", "html"})
1020
+ implied = Tag(Tag.START, "tbody", {}, False)
1021
+ self._insert_element(implied, push=True)
1022
+ self.mode = InsertionMode.IN_TABLE_BODY
1023
+ return ("reprocess", InsertionMode.IN_TABLE_BODY, token)
1024
+ if name == "table":
1025
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1026
+ closed = self._close_table_element()
1027
+ if closed:
1028
+ return ("reprocess", self.mode, token)
1029
+ return None
1030
+ if name in {"style", "script"}:
1031
+ # Per HTML5 spec: style and script are inserted directly into the table
1032
+ # (not processed as in-head which would move them)
1033
+ self._insert_element(token, push=True)
1034
+ self.original_mode = self.mode
1035
+ self.mode = InsertionMode.TEXT
1036
+ return None
1037
+ if name == "template":
1038
+ # Template is handled by delegating to IN_HEAD
1039
+ return self._mode_in_head(token)
1040
+ if name == "input":
1041
+ input_type = None
1042
+ for attr_name, attr_value in token.attrs.items():
1043
+ if attr_name == "type":
1044
+ input_type = (attr_value or "").lower()
1045
+ break
1046
+ if input_type == "hidden":
1047
+ self._parse_error("unexpected-hidden-input-in-table")
1048
+ self._insert_element(token, push=True)
1049
+ self.open_elements.pop() # push=True always adds to stack
1050
+ return None
1051
+ if name == "form":
1052
+ self._parse_error("unexpected-form-in-table")
1053
+ if self.form_element is None:
1054
+ node = self._insert_element(token, push=True)
1055
+ self.form_element = node
1056
+ self.open_elements.pop() # push=True always adds to stack
1057
+ return None
1058
+ self._parse_error("unexpected-start-tag-implies-table-voodoo", tag_name=name)
1059
+ previous = self.insert_from_table
1060
+ self.insert_from_table = True
1061
+ try:
1062
+ return self._mode_in_body(token)
1063
+ finally:
1064
+ self.insert_from_table = previous
1065
+ else:
1066
+ if name == "table":
1067
+ self._close_table_element()
1068
+ return None
1069
+ if name in {"body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"}:
1070
+ self._parse_error("unexpected-end-tag", tag_name=name)
1071
+ return None
1072
+ self._parse_error("unexpected-end-tag-implies-table-voodoo", tag_name=name)
1073
+ previous = self.insert_from_table
1074
+ self.insert_from_table = True
1075
+ try:
1076
+ return self._mode_in_body(token)
1077
+ finally:
1078
+ self.insert_from_table = previous
1079
+ # Per spec, only CharacterTokens, CommentToken, Tag, and EOFToken exist
1080
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1081
+ # If we're in a template, handle EOF in template mode first
1082
+ if self.template_modes:
1083
+ return self._mode_in_template(token)
1084
+ if self._has_in_table_scope("table"):
1085
+ self._parse_error("expected-closing-tag-but-got-eof", tag_name="table")
1086
+ return None
1087
+
1088
+ def _mode_in_table_text(self, token: Any) -> Any:
1089
+ if isinstance(token, CharacterTokens):
1090
+ # IN_TABLE mode guarantees non-empty data
1091
+ data = token.data
1092
+ if "\x0c" in data:
1093
+ self._parse_error("invalid-codepoint-in-table-text")
1094
+ data = data.replace("\x0c", "")
1095
+ if data:
1096
+ self.pending_table_text.append(data)
1097
+ return None
1098
+ self._flush_pending_table_text()
1099
+ original = self.table_text_original_mode or InsertionMode.IN_TABLE
1100
+ self.table_text_original_mode = None
1101
+ self.mode = original
1102
+ return ("reprocess", original, token)
1103
+
1104
+ def _mode_in_caption(self, token: Any) -> Any:
1105
+ if isinstance(token, CharacterTokens):
1106
+ return self._mode_in_body(token)
1107
+ if isinstance(token, CommentToken):
1108
+ self._append_comment(token.data)
1109
+ return None
1110
+ if isinstance(token, Tag):
1111
+ name = token.name
1112
+ if token.kind == Tag.START:
1113
+ if name in {"caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr", "td", "th"}:
1114
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1115
+ if self._close_caption_element():
1116
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1117
+ # Fragment parsing with caption context: caption not on stack, ignore table structure elements
1118
+ return None
1119
+ if name == "table":
1120
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1121
+ if self._close_caption_element():
1122
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1123
+ # Fragment parsing: no caption on stack - handle in body mode
1124
+ return self._mode_in_body(token)
1125
+ return self._mode_in_body(token)
1126
+ if name == "caption":
1127
+ if not self._close_caption_element():
1128
+ return None
1129
+ return None
1130
+ if name == "table":
1131
+ if self._close_caption_element():
1132
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1133
+ return None
1134
+ if name in {"tbody", "tfoot", "thead"}:
1135
+ # These elements are never in table scope when in caption -
1136
+ # caption closes any open tbody/tfoot/thead when created
1137
+ self._parse_error("unexpected-end-tag", tag_name=name)
1138
+ return None
1139
+ return self._mode_in_body(token)
1140
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1141
+ return self._mode_in_body(token)
1142
+
1143
+ def _close_caption_element(self) -> bool:
1144
+ if not self._has_in_table_scope("caption"):
1145
+ self._parse_error("unexpected-end-tag", tag_name="caption")
1146
+ return False
1147
+ self._generate_implied_end_tags()
1148
+ # Caption verified in scope above
1149
+ while self.open_elements: # pragma: no branch
1150
+ node = self.open_elements.pop()
1151
+ if node.name == "caption":
1152
+ break
1153
+ self._clear_active_formatting_up_to_marker()
1154
+ self.mode = InsertionMode.IN_TABLE
1155
+ return True
1156
+
1157
+ def _mode_in_column_group(self, token: Any) -> Any:
1158
+ current = self.open_elements[-1] if self.open_elements else None
1159
+ if isinstance(token, CharacterTokens):
1160
+ data = token.data or ""
1161
+ # Find first non-whitespace character
1162
+ stripped = data.lstrip(" \t\n\r\f")
1163
+
1164
+ if len(stripped) < len(data):
1165
+ # Has leading whitespace - insert it
1166
+ ws = data[: len(data) - len(stripped)]
1167
+ self._append_text(ws)
1168
+
1169
+ # Continue processing non-whitespace with a new token
1170
+ non_ws_token = CharacterTokens(stripped)
1171
+ if current and current.name == "html":
1172
+ # Fragment parsing with colgroup context: drop non-whitespace characters
1173
+ # (This is the only way html can be current in IN_COLUMN_GROUP mode)
1174
+ self._parse_error("unexpected-characters-in-column-group")
1175
+ return None
1176
+ # In a template, non-whitespace characters are parse errors - ignore them
1177
+ if current and current.name == "template":
1178
+ self._parse_error("unexpected-characters-in-template-column-group")
1179
+ return None
1180
+ self._parse_error("unexpected-characters-in-column-group")
1181
+ self._pop_current()
1182
+ self.mode = InsertionMode.IN_TABLE
1183
+ return ("reprocess", InsertionMode.IN_TABLE, non_ws_token)
1184
+ if isinstance(token, CommentToken):
1185
+ self._append_comment(token.data)
1186
+ return None
1187
+ if isinstance(token, Tag):
1188
+ name = token.name
1189
+ if token.kind == Tag.START:
1190
+ if name == "html":
1191
+ return self._mode_in_body(token)
1192
+ if name == "col":
1193
+ self._insert_element(token, push=True)
1194
+ self.open_elements.pop() # push=True always adds to stack
1195
+ return None
1196
+ if name == "template":
1197
+ # Template is handled by delegating to IN_HEAD
1198
+ return self._mode_in_head(token)
1199
+ if name == "colgroup":
1200
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1201
+ # Don't pop template element - only pop actual colgroup
1202
+ if current and current.name == "colgroup":
1203
+ self._pop_current()
1204
+ self.mode = InsertionMode.IN_TABLE
1205
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1206
+ return None
1207
+ if (
1208
+ self.fragment_context
1209
+ and self.fragment_context.tag_name.lower() == "colgroup"
1210
+ and not self._has_in_table_scope("table")
1211
+ ):
1212
+ self._parse_error("unexpected-start-tag-in-column-group", tag_name=name)
1213
+ return None
1214
+ # Anything else: if we're in a colgroup, pop it and switch to IN_TABLE
1215
+ if current and current.name == "colgroup":
1216
+ self._pop_current()
1217
+ self.mode = InsertionMode.IN_TABLE
1218
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1219
+ # In template column group context (via <col> in template), ignore non-column content
1220
+ # At this point current is template - the only other case after colgroup fragment
1221
+ # and colgroup element are handled
1222
+ self._parse_error("unexpected-start-tag-in-template-column-group", tag_name=name)
1223
+ return None
1224
+ if name == "colgroup":
1225
+ if current and current.name == "colgroup":
1226
+ self._pop_current()
1227
+ self.mode = InsertionMode.IN_TABLE
1228
+ else:
1229
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1230
+ return None
1231
+ if name == "col":
1232
+ self._parse_error("unexpected-end-tag", tag_name=name)
1233
+ return None
1234
+ if name == "template":
1235
+ # Template end tag needs proper handling
1236
+ return self._mode_in_head(token)
1237
+ if current and current.name != "html": # pragma: no branch
1238
+ self._pop_current()
1239
+ self.mode = InsertionMode.IN_TABLE
1240
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1241
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1242
+ if current and current.name == "colgroup":
1243
+ self._pop_current()
1244
+ self.mode = InsertionMode.IN_TABLE
1245
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1246
+ if current and current.name == "template":
1247
+ # In template, delegate EOF handling to IN_TEMPLATE
1248
+ return self._mode_in_template(token)
1249
+ return None
1250
+ # Per spec: EOF when current is html - implicit None return
1251
+
1252
+ def _mode_in_table_body(self, token: Any) -> Any:
1253
+ if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1254
+ return self._mode_in_table(token)
1255
+ if isinstance(token, Tag):
1256
+ name = token.name
1257
+ if token.kind == Tag.START:
1258
+ if name == "tr":
1259
+ self._clear_stack_until({"tbody", "tfoot", "thead", "template", "html"})
1260
+ self._insert_element(token, push=True)
1261
+ self.mode = InsertionMode.IN_ROW
1262
+ return None
1263
+ if name in {"td", "th"}:
1264
+ self._parse_error("unexpected-cell-in-table-body")
1265
+ self._clear_stack_until({"tbody", "tfoot", "thead", "template", "html"})
1266
+ implied = Tag(Tag.START, "tr", {}, False)
1267
+ self._insert_element(implied, push=True)
1268
+ self.mode = InsertionMode.IN_ROW
1269
+ return ("reprocess", InsertionMode.IN_ROW, token)
1270
+ if name in {"caption", "col", "colgroup", "tbody", "tfoot", "thead", "table"}:
1271
+ current = self.open_elements[-1] if self.open_elements else None
1272
+ # When in a template, these tags create invalid structure - treat as "anything else"
1273
+ if current and current.name == "template":
1274
+ self._parse_error("unexpected-start-tag-in-template-table-context", tag_name=name)
1275
+ return None
1276
+ # In fragment parsing with tbody/tfoot/thead context and no tbody on stack, ignore these tags
1277
+ if (
1278
+ self.fragment_context
1279
+ and current
1280
+ and current.name == "html"
1281
+ and self.fragment_context.tag_name.lower() in {"tbody", "tfoot", "thead"}
1282
+ ):
1283
+ self._parse_error("unexpected-start-tag")
1284
+ return None
1285
+ # Pop tbody/tfoot/thead (stack always has elements here in normal parsing)
1286
+ if self.open_elements:
1287
+ self.open_elements.pop()
1288
+ self.mode = InsertionMode.IN_TABLE
1289
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1290
+ # Empty stack edge case - go directly to IN_TABLE without reprocess
1291
+ self.mode = InsertionMode.IN_TABLE # pragma: no cover
1292
+ return None # pragma: no cover
1293
+ return self._mode_in_table(token)
1294
+ if name in {"tbody", "tfoot", "thead"}:
1295
+ if not self._has_in_table_scope(name):
1296
+ self._parse_error("unexpected-end-tag", tag_name=name)
1297
+ return None
1298
+ self._clear_stack_until({"tbody", "tfoot", "thead", "template", "html"})
1299
+ self._pop_current()
1300
+ self.mode = InsertionMode.IN_TABLE
1301
+ return None
1302
+ if name == "table":
1303
+ current = self.open_elements[-1] if self.open_elements else None
1304
+ # In a template, reject </table> as there's no table element
1305
+ if current and current.name == "template":
1306
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1307
+ return None
1308
+ # In fragment parsing with tbody/tfoot/thead context and no tbody on stack, ignore </table>
1309
+ if (
1310
+ self.fragment_context
1311
+ and current
1312
+ and current.name == "html"
1313
+ and self.fragment_context.tag_name.lower() in {"tbody", "tfoot", "thead"}
1314
+ ):
1315
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1316
+ return None
1317
+ if current and current.name in {"tbody", "tfoot", "thead"}:
1318
+ self.open_elements.pop()
1319
+ self.mode = InsertionMode.IN_TABLE
1320
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1321
+ if name in {"caption", "col", "colgroup", "td", "th", "tr"}:
1322
+ self._parse_error("unexpected-end-tag", tag_name=name)
1323
+ return None
1324
+ return self._mode_in_table(token)
1325
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1326
+ return self._mode_in_table(token)
1327
+
1328
+ def _mode_in_row(self, token: Any) -> Any:
1329
+ if isinstance(token, CharacterTokens) or isinstance(token, CommentToken):
1330
+ return self._mode_in_table(token)
1331
+ if isinstance(token, Tag):
1332
+ name = token.name
1333
+ if token.kind == Tag.START:
1334
+ if name in {"td", "th"}:
1335
+ self._clear_stack_until({"tr", "template", "html"})
1336
+ self._insert_element(token, push=True)
1337
+ self._push_formatting_marker()
1338
+ self.mode = InsertionMode.IN_CELL
1339
+ return None
1340
+ if name in {"caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr", "table"}:
1341
+ if not self._has_in_table_scope("tr"):
1342
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1343
+ return None
1344
+ self._end_tr_element()
1345
+ return ("reprocess", self.mode, token)
1346
+ previous = self.insert_from_table
1347
+ self.insert_from_table = True
1348
+ try:
1349
+ return self._mode_in_body(token)
1350
+ finally:
1351
+ self.insert_from_table = previous
1352
+ else:
1353
+ if name == "tr":
1354
+ if not self._has_in_table_scope("tr"):
1355
+ self._parse_error("unexpected-end-tag", tag_name=name)
1356
+ return None
1357
+ self._end_tr_element()
1358
+ return None
1359
+ if name in {"table", "tbody", "tfoot", "thead"}:
1360
+ if self._has_in_table_scope(name):
1361
+ self._end_tr_element()
1362
+ return ("reprocess", self.mode, token)
1363
+ self._parse_error("unexpected-end-tag", tag_name=name)
1364
+ return None
1365
+ if name in {"caption", "col", "group", "td", "th"}:
1366
+ self._parse_error("unexpected-end-tag", tag_name=name)
1367
+ return None
1368
+ previous = self.insert_from_table
1369
+ self.insert_from_table = True
1370
+ try:
1371
+ return self._mode_in_body(token)
1372
+ finally:
1373
+ self.insert_from_table = previous
1374
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1375
+ return self._mode_in_table(token)
1376
+
1377
+ def _end_tr_element(self) -> None:
1378
+ self._clear_stack_until({"tr", "template", "html"})
1379
+ # Pop tr if on top (may not be if stack was exhausted)
1380
+ if self.open_elements and self.open_elements[-1].name == "tr":
1381
+ self.open_elements.pop()
1382
+ # When in a template, restore template mode; otherwise use IN_TABLE_BODY
1383
+ if self.template_modes:
1384
+ self.mode = self.template_modes[-1]
1385
+ else:
1386
+ self.mode = InsertionMode.IN_TABLE_BODY
1387
+
1388
+ def _mode_in_cell(self, token: Any) -> Any:
1389
+ if isinstance(token, CharacterTokens):
1390
+ previous = self.insert_from_table
1391
+ self.insert_from_table = False
1392
+ try:
1393
+ return self._mode_in_body(token)
1394
+ finally:
1395
+ self.insert_from_table = previous
1396
+ if isinstance(token, CommentToken):
1397
+ self._append_comment(token.data)
1398
+ return None
1399
+ if isinstance(token, Tag):
1400
+ name = token.name
1401
+ if token.kind == Tag.START:
1402
+ if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"}:
1403
+ if self._close_table_cell():
1404
+ return ("reprocess", self.mode, token)
1405
+ # Per spec: if we reach here in IN_CELL mode with no cell to close,
1406
+ # we're in a fragment context with td/th as context element and no table structure.
1407
+ # Issue parse error and ignore the token.
1408
+ self._parse_error("unexpected-start-tag-in-cell-fragment", tag_name=name)
1409
+ return None
1410
+ previous = self.insert_from_table
1411
+ self.insert_from_table = False
1412
+ try:
1413
+ return self._mode_in_body(token)
1414
+ finally:
1415
+ self.insert_from_table = previous
1416
+ else:
1417
+ if name in {"td", "th"}:
1418
+ if not self._has_in_table_scope(name):
1419
+ self._parse_error("unexpected-end-tag", tag_name=name)
1420
+ return None
1421
+ self._end_table_cell(name)
1422
+ return None
1423
+ if name in {"table", "tbody", "tfoot", "thead", "tr"}:
1424
+ # Per HTML5 spec: only close cell if the element is actually in scope
1425
+ # Otherwise it's a parse error and we ignore the token
1426
+ if not self._has_in_table_scope(name):
1427
+ self._parse_error("unexpected-end-tag", tag_name=name)
1428
+ return None
1429
+ self._close_table_cell()
1430
+ return ("reprocess", self.mode, token)
1431
+ previous = self.insert_from_table
1432
+ self.insert_from_table = False
1433
+ try:
1434
+ return self._mode_in_body(token)
1435
+ finally:
1436
+ self.insert_from_table = previous
1437
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1438
+ if self._close_table_cell():
1439
+ return ("reprocess", self.mode, token)
1440
+ return self._mode_in_table(token)
1441
+
1442
+ def _mode_in_select(self, token: Any) -> Any:
1443
+ if isinstance(token, CharacterTokens):
1444
+ data = token.data or ""
1445
+ if "\x00" in data:
1446
+ self._parse_error("invalid-codepoint-in-select")
1447
+ data = data.replace("\x00", "")
1448
+ if "\x0c" in data:
1449
+ self._parse_error("invalid-codepoint-in-select")
1450
+ data = data.replace("\x0c", "")
1451
+ if data:
1452
+ self._reconstruct_active_formatting_elements()
1453
+ self._append_text(data)
1454
+ return None
1455
+ if isinstance(token, CommentToken):
1456
+ self._append_comment(token.data)
1457
+ return None
1458
+ if isinstance(token, Tag):
1459
+ name = token.name
1460
+ if token.kind == Tag.START:
1461
+ if name == "html":
1462
+ return ("reprocess", InsertionMode.IN_BODY, token)
1463
+ if name == "option":
1464
+ if self.open_elements and self.open_elements[-1].name == "option":
1465
+ self.open_elements.pop()
1466
+ self._reconstruct_active_formatting_elements()
1467
+ self._insert_element(token, push=True)
1468
+ return None
1469
+ if name == "optgroup":
1470
+ if self.open_elements and self.open_elements[-1].name == "option":
1471
+ self.open_elements.pop()
1472
+ if self.open_elements and self.open_elements[-1].name == "optgroup":
1473
+ self.open_elements.pop()
1474
+ self._reconstruct_active_formatting_elements()
1475
+ self._insert_element(token, push=True)
1476
+ return None
1477
+ if name == "select":
1478
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1479
+ # select is always in scope in IN_SELECT mode
1480
+ self._pop_until_any_inclusive({"select"})
1481
+ self._reset_insertion_mode()
1482
+ return None
1483
+ if name in {"input", "textarea"}:
1484
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1485
+ # select is always in scope in IN_SELECT mode
1486
+ self._pop_until_any_inclusive({"select"})
1487
+ self._reset_insertion_mode()
1488
+ return ("reprocess", self.mode, token)
1489
+ if name == "keygen":
1490
+ self._reconstruct_active_formatting_elements()
1491
+ self._insert_element(token, push=False)
1492
+ return None
1493
+ if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1494
+ self._parse_error("unexpected-start-tag-implies-end-tag", tag_name=name)
1495
+ # select is always in scope in IN_SELECT mode
1496
+ self._pop_until_any_inclusive({"select"})
1497
+ self._reset_insertion_mode()
1498
+ return ("reprocess", self.mode, token)
1499
+ if name in {"script", "template"}:
1500
+ return self._mode_in_head(token)
1501
+ if name in {"svg", "math"}:
1502
+ # For foreign elements, honor the self-closing flag
1503
+ self._reconstruct_active_formatting_elements()
1504
+ self._insert_element(token, push=not token.self_closing, namespace=name)
1505
+ return None
1506
+ if name in FORMATTING_ELEMENTS:
1507
+ self._reconstruct_active_formatting_elements()
1508
+ node = self._insert_element(token, push=True)
1509
+ self._append_active_formatting_entry(name, token.attrs, node)
1510
+ return None
1511
+ if name == "hr":
1512
+ # Per spec: pop option and optgroup before inserting hr (makes hr sibling, not child)
1513
+ if self.open_elements and self.open_elements[-1].name == "option":
1514
+ self.open_elements.pop()
1515
+ if self.open_elements and self.open_elements[-1].name == "optgroup":
1516
+ self.open_elements.pop()
1517
+ self._reconstruct_active_formatting_elements()
1518
+ self._insert_element(token, push=False)
1519
+ return None
1520
+ if name == "menuitem":
1521
+ self._reconstruct_active_formatting_elements()
1522
+ self._insert_element(token, push=True)
1523
+ return None
1524
+ # Allow common HTML elements in select (newer spec)
1525
+ if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1526
+ self._reconstruct_active_formatting_elements()
1527
+ self._insert_element(token, push=not token.self_closing)
1528
+ return None
1529
+ if name in {"br", "img"}:
1530
+ self._reconstruct_active_formatting_elements()
1531
+ self._insert_element(token, push=False)
1532
+ return None
1533
+ if name == "plaintext":
1534
+ # Per spec: plaintext element is inserted in select (consumes all remaining text)
1535
+ self._reconstruct_active_formatting_elements()
1536
+ self._insert_element(token, push=True)
1537
+ return None
1538
+ if name == "optgroup":
1539
+ if self.open_elements and self.open_elements[-1].name == "option":
1540
+ self.open_elements.pop()
1541
+ if self.open_elements and self.open_elements[-1].name == "optgroup":
1542
+ self.open_elements.pop()
1543
+ else:
1544
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1545
+ return None
1546
+ if name == "option":
1547
+ if self.open_elements and self.open_elements[-1].name == "option":
1548
+ self.open_elements.pop()
1549
+ else:
1550
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1551
+ return None
1552
+ if name == "select":
1553
+ # In IN_SELECT mode, select is always in scope - pop to it
1554
+ self._pop_until_any_inclusive({"select"})
1555
+ self._reset_insertion_mode()
1556
+ return None
1557
+ # Handle end tags for allowed HTML elements in select
1558
+ if name == "a" or name in FORMATTING_ELEMENTS:
1559
+ # select is always on stack in IN_SELECT mode
1560
+ select_node = self._find_last_on_stack("select")
1561
+ fmt_index = self._find_active_formatting_index(name)
1562
+ if fmt_index is not None:
1563
+ target = self.active_formatting[fmt_index]["node"]
1564
+ if target in self.open_elements: # pragma: no branch
1565
+ select_index = self.open_elements.index(select_node)
1566
+ target_index = self.open_elements.index(target)
1567
+ if target_index < select_index:
1568
+ self._parse_error("unexpected-end-tag", tag_name=name)
1569
+ return None
1570
+ self._adoption_agency(name)
1571
+ return None
1572
+ if name in {"p", "div", "span", "button", "datalist", "selectedcontent"}:
1573
+ # Per HTML5 spec: these end tags in select mode close the element if it's on the stack.
1574
+ # But we must not pop across the select boundary (i.e., don't pop elements BEFORE select).
1575
+ select_idx = None
1576
+ target_idx = None
1577
+ for i, node in enumerate(self.open_elements):
1578
+ if node.name == "select" and select_idx is None:
1579
+ select_idx = i
1580
+ if node.name == name:
1581
+ target_idx = i # Track the LAST occurrence
1582
+ # Only pop if target exists and is AFTER (or at same level as) select
1583
+ # i.e., the target is inside the select or there's no select
1584
+ if target_idx is not None and (select_idx is None or target_idx > select_idx):
1585
+ while True:
1586
+ popped = self.open_elements.pop()
1587
+ if popped.name == name:
1588
+ break
1589
+ else:
1590
+ self._parse_error("unexpected-end-tag", tag_name=name)
1591
+ return None
1592
+ if name in {"caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "table"}:
1593
+ self._parse_error("unexpected-end-tag", tag_name=name)
1594
+ # select is always in scope in IN_SELECT mode
1595
+ self._pop_until_any_inclusive({"select"})
1596
+ self._reset_insertion_mode()
1597
+ return ("reprocess", self.mode, token)
1598
+ # Any other end tag: parse error, ignore
1599
+ self._parse_error("unexpected-end-tag", tag_name=name)
1600
+ return None
1601
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1602
+ return self._mode_in_body(token)
1603
+
1604
+ def _mode_in_template(self, token: Any) -> Any:
1605
+ # § The "in template" insertion mode
1606
+ # https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
1607
+ if isinstance(token, CharacterTokens):
1608
+ return self._mode_in_body(token)
1609
+ if isinstance(token, CommentToken):
1610
+ return self._mode_in_body(token)
1611
+ if isinstance(token, Tag):
1612
+ if token.kind == Tag.START:
1613
+ # Table-related tags switch template mode
1614
+ if token.name in {"caption", "colgroup", "tbody", "tfoot", "thead"}:
1615
+ self.template_modes.pop()
1616
+ self.template_modes.append(InsertionMode.IN_TABLE)
1617
+ self.mode = InsertionMode.IN_TABLE
1618
+ return ("reprocess", InsertionMode.IN_TABLE, token)
1619
+ if token.name == "col":
1620
+ self.template_modes.pop()
1621
+ self.template_modes.append(InsertionMode.IN_COLUMN_GROUP)
1622
+ self.mode = InsertionMode.IN_COLUMN_GROUP
1623
+ return ("reprocess", InsertionMode.IN_COLUMN_GROUP, token)
1624
+ if token.name == "tr":
1625
+ self.template_modes.pop()
1626
+ self.template_modes.append(InsertionMode.IN_TABLE_BODY)
1627
+ self.mode = InsertionMode.IN_TABLE_BODY
1628
+ return ("reprocess", InsertionMode.IN_TABLE_BODY, token)
1629
+ if token.name in {"td", "th"}:
1630
+ self.template_modes.pop()
1631
+ self.template_modes.append(InsertionMode.IN_ROW)
1632
+ self.mode = InsertionMode.IN_ROW
1633
+ return ("reprocess", InsertionMode.IN_ROW, token)
1634
+ # Default: pop template mode and push IN_BODY
1635
+ if token.name not in {
1636
+ "base",
1637
+ "basefont",
1638
+ "bgsound",
1639
+ "link",
1640
+ "meta",
1641
+ "noframes",
1642
+ "script",
1643
+ "style",
1644
+ "template",
1645
+ "title",
1646
+ }:
1647
+ self.template_modes.pop()
1648
+ self.template_modes.append(InsertionMode.IN_BODY)
1649
+ self.mode = InsertionMode.IN_BODY
1650
+ return ("reprocess", InsertionMode.IN_BODY, token)
1651
+ if token.kind == Tag.END and token.name == "template":
1652
+ return self._mode_in_head(token)
1653
+ # Head-related tags process in InHead
1654
+ if token.name in {
1655
+ "base",
1656
+ "basefont",
1657
+ "bgsound",
1658
+ "link",
1659
+ "meta",
1660
+ "noframes",
1661
+ "script",
1662
+ "style",
1663
+ "template",
1664
+ "title",
1665
+ }:
1666
+ return self._mode_in_head(token)
1667
+ if isinstance(token, EOFToken):
1668
+ # Check if template is on the stack (don't use _in_scope as table blocks it)
1669
+ has_template = any(node.name == "template" for node in self.open_elements)
1670
+ if not has_template:
1671
+ return None
1672
+ # Parse error for EOF in template
1673
+ self._parse_error("expected-closing-tag-but-got-eof", tag_name="template")
1674
+ # Pop until template, then handle EOF in reset mode
1675
+ self._pop_until_inclusive("template")
1676
+ self._clear_active_formatting_up_to_marker()
1677
+ # template_modes is always non-empty when template is on stack
1678
+ self.template_modes.pop()
1679
+ self._reset_insertion_mode()
1680
+ return ("reprocess", self.mode, token)
1681
+ return None
1682
+
1683
+ def _mode_after_body(self, token: Any) -> Any:
1684
+ if isinstance(token, CharacterTokens):
1685
+ if is_all_whitespace(token.data):
1686
+ # Whitespace is processed using InBody rules (appended to body)
1687
+ # but we stay in AfterBody mode
1688
+ self._mode_in_body(token)
1689
+ return None
1690
+ return ("reprocess", InsertionMode.IN_BODY, token)
1691
+ if isinstance(token, CommentToken):
1692
+ self._append_comment(token.data, parent=self.open_elements[0])
1693
+ return None
1694
+ if isinstance(token, Tag):
1695
+ if token.kind == Tag.START and token.name == "html":
1696
+ return ("reprocess", InsertionMode.IN_BODY, token)
1697
+ if token.kind == Tag.END and token.name == "html":
1698
+ self.mode = InsertionMode.AFTER_AFTER_BODY
1699
+ return None
1700
+ return ("reprocess", InsertionMode.IN_BODY, token)
1701
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1702
+ return None
1703
+
1704
+ def _mode_after_after_body(self, token: Any) -> Any:
1705
+ if isinstance(token, CharacterTokens):
1706
+ if is_all_whitespace(token.data):
1707
+ # Per spec: whitespace characters are inserted using the rules for the "in body" mode
1708
+ # Process with InBody rules but stay in AfterAfterBody mode
1709
+ self._mode_in_body(token)
1710
+ return None
1711
+ # Non-whitespace character: parse error, reprocess in IN_BODY
1712
+ self._parse_error("unexpected-char-after-body")
1713
+ return ("reprocess", InsertionMode.IN_BODY, token)
1714
+ if isinstance(token, CommentToken):
1715
+ if self.fragment_context is not None:
1716
+ # html is always on stack in fragment parsing
1717
+ html_node = self._find_last_on_stack("html")
1718
+ html_node.append_child(SimpleDomNode("#comment", data=token.data))
1719
+ return None
1720
+ self._append_comment_to_document(token.data)
1721
+ return None
1722
+ if isinstance(token, Tag):
1723
+ if token.kind == Tag.START and token.name == "html":
1724
+ return ("reprocess", InsertionMode.IN_BODY, token)
1725
+ # Any other tag: parse error, reprocess in IN_BODY
1726
+ self._parse_error("unexpected-token-after-body")
1727
+ return ("reprocess", InsertionMode.IN_BODY, token)
1728
+ assert isinstance(token, EOFToken), f"Unexpected token type: {type(token)}"
1729
+ return None
1730
+
1731
+ def _mode_in_frameset(self, token: Any) -> Any:
1732
+ # Per HTML5 spec §13.2.6.4.16: In frameset insertion mode
1733
+ if isinstance(token, CharacterTokens):
1734
+ # Only whitespace characters allowed; ignore all others
1735
+ whitespace = "".join(ch for ch in token.data if ch in "\t\n\f\r ")
1736
+ if whitespace:
1737
+ self._append_text(whitespace)
1738
+ return None
1739
+ if isinstance(token, CommentToken):
1740
+ self._append_comment(token.data)
1741
+ return None
1742
+ if isinstance(token, Tag):
1743
+ if token.kind == Tag.START and token.name == "html":
1744
+ return ("reprocess", InsertionMode.IN_BODY, token)
1745
+ if token.kind == Tag.START and token.name == "frameset":
1746
+ self._insert_element(token, push=True)
1747
+ return None
1748
+ if token.kind == Tag.END and token.name == "frameset":
1749
+ if self.open_elements and self.open_elements[-1].name == "html":
1750
+ self._parse_error("unexpected-end-tag", tag_name=token.name)
1751
+ return None
1752
+ self.open_elements.pop()
1753
+ if self.open_elements and self.open_elements[-1].name != "frameset":
1754
+ self.mode = InsertionMode.AFTER_FRAMESET
1755
+ return None
1756
+ if token.kind == Tag.START and token.name == "frame":
1757
+ self._insert_element(token, push=True)
1758
+ self.open_elements.pop()
1759
+ return None
1760
+ if token.kind == Tag.START and token.name == "noframes":
1761
+ # Per spec: use IN_HEAD rules but preserve current mode for TEXT restoration
1762
+ self._insert_element(token, push=True)
1763
+ self.original_mode = self.mode
1764
+ self.mode = InsertionMode.TEXT
1765
+ return None
1766
+ if isinstance(token, EOFToken):
1767
+ if self.open_elements and self.open_elements[-1].name != "html":
1768
+ self._parse_error("expected-closing-tag-but-got-eof", tag_name=self.open_elements[-1].name)
1769
+ return None
1770
+ self._parse_error("unexpected-token-in-frameset")
1771
+ return None
1772
+
1773
+ def _mode_after_frameset(self, token: Any) -> Any:
1774
+ # Per HTML5 spec §13.2.6.4.17: After frameset insertion mode
1775
+ if isinstance(token, CharacterTokens):
1776
+ # Only whitespace characters allowed; ignore all others
1777
+ whitespace = "".join(ch for ch in token.data if ch in "\t\n\f\r ")
1778
+ if whitespace:
1779
+ self._append_text(whitespace)
1780
+ return None
1781
+ if isinstance(token, CommentToken):
1782
+ self._append_comment(token.data)
1783
+ return None
1784
+ if isinstance(token, Tag):
1785
+ if token.kind == Tag.START and token.name == "html":
1786
+ return ("reprocess", InsertionMode.IN_BODY, token)
1787
+ if token.kind == Tag.END and token.name == "html":
1788
+ self.mode = InsertionMode.AFTER_AFTER_FRAMESET
1789
+ return None
1790
+ if token.kind == Tag.START and token.name == "noframes":
1791
+ # Insert noframes element directly and switch to TEXT mode
1792
+ self._insert_element(token, push=True)
1793
+ self.original_mode = self.mode
1794
+ self.mode = InsertionMode.TEXT
1795
+ return None
1796
+ if isinstance(token, EOFToken):
1797
+ return None
1798
+ self._parse_error("unexpected-token-after-frameset")
1799
+ self.mode = InsertionMode.IN_FRAMESET
1800
+ return ("reprocess", InsertionMode.IN_FRAMESET, token)
1801
+
1802
+ def _mode_after_after_frameset(self, token: Any) -> Any:
1803
+ # Per HTML5 spec §13.2.6.4.18: After after frameset insertion mode
1804
+ if isinstance(token, CharacterTokens):
1805
+ # Whitespace is processed using InBody rules
1806
+ # but we stay in AfterAfterFrameset mode
1807
+ if is_all_whitespace(token.data):
1808
+ self._mode_in_body(token)
1809
+ return None
1810
+ # Non-whitespace falls through to "Anything else"
1811
+ if isinstance(token, CommentToken):
1812
+ self._append_comment_to_document(token.data)
1813
+ return None
1814
+ if isinstance(token, Tag):
1815
+ if token.kind == Tag.START and token.name == "html":
1816
+ return ("reprocess", InsertionMode.IN_BODY, token)
1817
+ if token.kind == Tag.START and token.name == "noframes":
1818
+ # Insert noframes element directly and switch to TEXT mode
1819
+ self._insert_element(token, push=True)
1820
+ self.original_mode = self.mode
1821
+ self.mode = InsertionMode.TEXT
1822
+ return None
1823
+ # Other tags fall through to "Anything else"
1824
+ if isinstance(token, EOFToken):
1825
+ return None
1826
+ # Anything else: parse error, reprocess in IN_FRAMESET
1827
+ self._parse_error("unexpected-token-after-after-frameset")
1828
+ self.mode = InsertionMode.IN_FRAMESET
1829
+ return ("reprocess", InsertionMode.IN_FRAMESET, token)
1830
+
1831
+ # Helpers ----------------------------------------------------------------
1832
+
1833
+ _MODE_HANDLERS = [
1834
+ _mode_initial,
1835
+ _mode_before_html,
1836
+ _mode_before_head,
1837
+ _mode_in_head,
1838
+ _mode_in_head_noscript,
1839
+ _mode_after_head,
1840
+ _mode_text,
1841
+ _mode_in_body,
1842
+ _mode_after_body,
1843
+ _mode_after_after_body,
1844
+ _mode_in_table,
1845
+ _mode_in_table_text,
1846
+ _mode_in_caption,
1847
+ _mode_in_column_group,
1848
+ _mode_in_table_body,
1849
+ _mode_in_row,
1850
+ _mode_in_cell,
1851
+ _mode_in_frameset,
1852
+ _mode_after_frameset,
1853
+ _mode_after_after_frameset,
1854
+ _mode_in_select,
1855
+ _mode_in_template,
1856
+ ]
1857
+
1858
+ _BODY_TOKEN_HANDLERS = {
1859
+ CharacterTokens: _handle_characters_in_body,
1860
+ CommentToken: _handle_comment_in_body,
1861
+ Tag: _handle_tag_in_body,
1862
+ EOFToken: _handle_eof_in_body,
1863
+ }
1864
+
1865
+ _BODY_START_HANDLERS = {
1866
+ "a": _handle_body_start_a,
1867
+ "address": _handle_body_start_block_with_p,
1868
+ "applet": _handle_body_start_applet_like,
1869
+ "area": _handle_body_start_void_with_formatting,
1870
+ "article": _handle_body_start_block_with_p,
1871
+ "aside": _handle_body_start_block_with_p,
1872
+ "b": _handle_body_start_formatting,
1873
+ "base": _handle_body_start_in_head,
1874
+ "basefont": _handle_body_start_in_head,
1875
+ "bgsound": _handle_body_start_in_head,
1876
+ "big": _handle_body_start_formatting,
1877
+ "blockquote": _handle_body_start_block_with_p,
1878
+ "body": _handle_body_start_body,
1879
+ "br": _handle_body_start_br,
1880
+ "button": _handle_body_start_button,
1881
+ "caption": _handle_body_start_table_parse_error,
1882
+ "center": _handle_body_start_block_with_p,
1883
+ "code": _handle_body_start_formatting,
1884
+ "col": _handle_body_start_col_or_frame,
1885
+ "colgroup": _handle_body_start_structure_ignored,
1886
+ "dd": _handle_body_start_dd_dt,
1887
+ "details": _handle_body_start_block_with_p,
1888
+ "dialog": _handle_body_start_block_with_p,
1889
+ "dir": _handle_body_start_block_with_p,
1890
+ "div": _handle_body_start_block_with_p,
1891
+ "dl": _handle_body_start_block_with_p,
1892
+ "dt": _handle_body_start_dd_dt,
1893
+ "em": _handle_body_start_formatting,
1894
+ "embed": _handle_body_start_void_with_formatting,
1895
+ "fieldset": _handle_body_start_block_with_p,
1896
+ "figcaption": _handle_body_start_block_with_p,
1897
+ "figure": _handle_body_start_block_with_p,
1898
+ "font": _handle_body_start_formatting,
1899
+ "footer": _handle_body_start_block_with_p,
1900
+ "form": _handle_body_start_form,
1901
+ "frame": _handle_body_start_col_or_frame,
1902
+ "frameset": _handle_body_start_frameset,
1903
+ "h1": _handle_body_start_heading,
1904
+ "h2": _handle_body_start_heading,
1905
+ "h3": _handle_body_start_heading,
1906
+ "h4": _handle_body_start_heading,
1907
+ "h5": _handle_body_start_heading,
1908
+ "h6": _handle_body_start_heading,
1909
+ "head": _handle_body_start_head,
1910
+ "header": _handle_body_start_block_with_p,
1911
+ "hgroup": _handle_body_start_block_with_p,
1912
+ "html": _handle_body_start_html,
1913
+ "i": _handle_body_start_formatting,
1914
+ "image": _handle_body_start_image,
1915
+ "img": _handle_body_start_void_with_formatting,
1916
+ "input": _handle_body_start_input,
1917
+ "keygen": _handle_body_start_void_with_formatting,
1918
+ "li": _handle_body_start_li,
1919
+ "link": _handle_body_start_in_head,
1920
+ "listing": _handle_body_start_pre_listing,
1921
+ "main": _handle_body_start_block_with_p,
1922
+ "marquee": _handle_body_start_applet_like,
1923
+ "math": _handle_body_start_math,
1924
+ "menu": _handle_body_start_block_with_p,
1925
+ "meta": _handle_body_start_in_head,
1926
+ "nav": _handle_body_start_block_with_p,
1927
+ "nobr": _handle_body_start_formatting,
1928
+ "noframes": _handle_body_start_in_head,
1929
+ "object": _handle_body_start_applet_like,
1930
+ "ol": _handle_body_start_block_with_p,
1931
+ "optgroup": _handle_body_start_optgroup,
1932
+ "option": _handle_body_start_option,
1933
+ "p": _handle_body_start_paragraph,
1934
+ "param": _handle_body_start_simple_void,
1935
+ "plaintext": _handle_body_start_plaintext_xmp,
1936
+ "pre": _handle_body_start_pre_listing,
1937
+ "rb": _handle_body_start_rb_rtc,
1938
+ "rp": _handle_body_start_rp_rt,
1939
+ "rt": _handle_body_start_rp_rt,
1940
+ "rtc": _handle_body_start_rb_rtc,
1941
+ "s": _handle_body_start_formatting,
1942
+ "script": _handle_body_start_in_head,
1943
+ "search": _handle_body_start_block_with_p,
1944
+ "section": _handle_body_start_block_with_p,
1945
+ "select": _handle_body_start_select,
1946
+ "small": _handle_body_start_formatting,
1947
+ "source": _handle_body_start_simple_void,
1948
+ "strike": _handle_body_start_formatting,
1949
+ "strong": _handle_body_start_formatting,
1950
+ "style": _handle_body_start_in_head,
1951
+ "summary": _handle_body_start_block_with_p,
1952
+ "svg": _handle_body_start_svg,
1953
+ "table": _handle_body_start_table,
1954
+ "tbody": _handle_body_start_structure_ignored,
1955
+ "td": _handle_body_start_structure_ignored,
1956
+ "template": _handle_body_start_in_head,
1957
+ "textarea": _handle_body_start_textarea,
1958
+ "tfoot": _handle_body_start_structure_ignored,
1959
+ "th": _handle_body_start_structure_ignored,
1960
+ "thead": _handle_body_start_structure_ignored,
1961
+ "title": _handle_body_start_in_head,
1962
+ "tr": _handle_body_start_structure_ignored,
1963
+ "track": _handle_body_start_simple_void,
1964
+ "tt": _handle_body_start_formatting,
1965
+ "u": _handle_body_start_formatting,
1966
+ "ul": _handle_body_start_block_with_p,
1967
+ "wbr": _handle_body_start_void_with_formatting,
1968
+ "xmp": _handle_body_start_plaintext_xmp,
1969
+ }
1970
+ _BODY_END_HANDLERS = {
1971
+ "address": _handle_body_end_block,
1972
+ "applet": _handle_body_end_applet_like,
1973
+ "article": _handle_body_end_block,
1974
+ "aside": _handle_body_end_block,
1975
+ "blockquote": _handle_body_end_block,
1976
+ "body": _handle_body_end_body,
1977
+ "button": _handle_body_end_block,
1978
+ "center": _handle_body_end_block,
1979
+ "dd": _handle_body_end_dd_dt,
1980
+ "details": _handle_body_end_block,
1981
+ "dialog": _handle_body_end_block,
1982
+ "dir": _handle_body_end_block,
1983
+ "div": _handle_body_end_block,
1984
+ "dl": _handle_body_end_block,
1985
+ "dt": _handle_body_end_dd_dt,
1986
+ "fieldset": _handle_body_end_block,
1987
+ "figcaption": _handle_body_end_block,
1988
+ "figure": _handle_body_end_block,
1989
+ "footer": _handle_body_end_block,
1990
+ "form": _handle_body_end_form,
1991
+ "h1": _handle_body_end_heading,
1992
+ "h2": _handle_body_end_heading,
1993
+ "h3": _handle_body_end_heading,
1994
+ "h4": _handle_body_end_heading,
1995
+ "h5": _handle_body_end_heading,
1996
+ "h6": _handle_body_end_heading,
1997
+ "header": _handle_body_end_block,
1998
+ "hgroup": _handle_body_end_block,
1999
+ "html": _handle_body_end_html,
2000
+ "li": _handle_body_end_li,
2001
+ "listing": _handle_body_end_block,
2002
+ "main": _handle_body_end_block,
2003
+ "marquee": _handle_body_end_applet_like,
2004
+ "menu": _handle_body_end_block,
2005
+ "nav": _handle_body_end_block,
2006
+ "object": _handle_body_end_applet_like,
2007
+ "ol": _handle_body_end_block,
2008
+ "p": _handle_body_end_p,
2009
+ "pre": _handle_body_end_block,
2010
+ "search": _handle_body_end_block,
2011
+ "section": _handle_body_end_block,
2012
+ "summary": _handle_body_end_block,
2013
+ "table": _handle_body_end_block,
2014
+ "template": _handle_body_end_template,
2015
+ "ul": _handle_body_end_block,
2016
+ }