justhtml 0.24.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

justhtml/selector.py CHANGED
@@ -651,7 +651,9 @@ class SelectorMatcher:
651
651
  attr_value: str | None = None
652
652
  for name, value in attrs.items():
653
653
  if name.lower() == attr_name:
654
- attr_value = value
654
+ # Attributes can be boolean (represented as None in JustHTML).
655
+ # For selector matching, presence should still count.
656
+ attr_value = "" if value is None else str(value)
655
657
  break
656
658
 
657
659
  if attr_value is None:
justhtml/serialize.py CHANGED
@@ -4,17 +4,22 @@
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import re
7
8
  from typing import Any
8
9
 
9
- from .constants import FOREIGN_ATTRIBUTE_ADJUSTMENTS, SPECIAL_ELEMENTS, VOID_ELEMENTS
10
- from .sanitize import DEFAULT_DOCUMENT_POLICY, DEFAULT_POLICY, SanitizationPolicy, sanitize
10
+ from .constants import FOREIGN_ATTRIBUTE_ADJUSTMENTS, SPECIAL_ELEMENTS, VOID_ELEMENTS, WHITESPACE_PRESERVING_ELEMENTS
11
+
12
+ # Matches characters that prevent an attribute value from being unquoted.
13
+ # Note: This matches the logic of the previous loop-based implementation.
14
+ # It checks for space characters, quotes, equals sign, and greater-than.
15
+ _UNQUOTED_ATTR_VALUE_INVALID = re.compile(r'[ \t\n\f\r"\'=>]')
11
16
 
12
17
 
13
18
  def _escape_text(text: str | None) -> str:
14
19
  if not text:
15
20
  return ""
16
21
  # Minimal, but matches html5lib serializer expectations in core cases.
17
- return str(text).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
22
+ return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
18
23
 
19
24
 
20
25
  def _choose_attr_quote(value: str | None, forced_quote_char: str | None = None) -> str:
@@ -22,7 +27,7 @@ def _choose_attr_quote(value: str | None, forced_quote_char: str | None = None)
22
27
  return forced_quote_char
23
28
  if value is None:
24
29
  return '"'
25
- value = str(value)
30
+ # value is assumed to be a string
26
31
  if '"' in value and "'" not in value:
27
32
  return "'"
28
33
  return '"'
@@ -31,7 +36,7 @@ def _choose_attr_quote(value: str | None, forced_quote_char: str | None = None)
31
36
  def _escape_attr_value(value: str | None, quote_char: str, *, escape_lt_in_attrs: bool = False) -> str:
32
37
  if value is None:
33
38
  return ""
34
- value = str(value)
39
+ # value is assumed to be a string
35
40
  value = value.replace("&", "&amp;")
36
41
  if escape_lt_in_attrs:
37
42
  value = value.replace("<", "&lt;")
@@ -44,15 +49,8 @@ def _escape_attr_value(value: str | None, quote_char: str, *, escape_lt_in_attrs
44
49
  def _can_unquote_attr_value(value: str | None) -> bool:
45
50
  if value is None:
46
51
  return False
47
- value = str(value)
48
- for ch in value:
49
- if ch == ">":
50
- return False
51
- if ch in {'"', "'", "="}:
52
- return False
53
- if ch in {" ", "\t", "\n", "\f", "\r"}:
54
- return False
55
- return True
52
+ # Optimization: use regex instead of loop
53
+ return not _UNQUOTED_ATTR_VALUE_INVALID.search(value)
56
54
 
57
55
 
58
56
  def _serializer_minimize_attr_value(name: str, value: str | None, minimize_boolean_attributes: bool) -> bool:
@@ -60,7 +58,9 @@ def _serializer_minimize_attr_value(name: str, value: str | None, minimize_boole
60
58
  return False
61
59
  if value is None or value == "":
62
60
  return True
63
- return str(value).lower() == str(name).lower()
61
+ if value == name:
62
+ return True
63
+ return value.lower() == name
64
64
 
65
65
 
66
66
  def serialize_start_tag(
@@ -86,7 +86,8 @@ def serialize_start_tag(
86
86
  parts.extend([" ", key, '=""'])
87
87
  continue
88
88
 
89
- value_str = str(value)
89
+ # value is guaranteed to be a string here because attrs is dict[str, str | None]
90
+ value_str = value
90
91
  if value_str == "":
91
92
  parts.extend([" ", key, '=""'])
92
93
  continue
@@ -118,15 +119,8 @@ def to_html(
118
119
  indent_size: int = 2,
119
120
  *,
120
121
  pretty: bool = True,
121
- safe: bool = True,
122
- policy: SanitizationPolicy | None = None,
123
122
  ) -> str:
124
123
  """Convert node to HTML string."""
125
- if safe:
126
- if policy is None and node.name == "#document":
127
- node = sanitize(node, policy=DEFAULT_DOCUMENT_POLICY)
128
- else:
129
- node = sanitize(node, policy=policy or DEFAULT_POLICY)
130
124
  if node.name == "#document":
131
125
  # Document root - just render children
132
126
  parts: list[str] = []
@@ -136,12 +130,6 @@ def to_html(
136
130
  return _node_to_html(node, indent, indent_size, pretty, in_pre=False)
137
131
 
138
132
 
139
- _PREFORMATTED_ELEMENTS: set[str] = {"pre", "textarea", "code"}
140
-
141
- # Elements whose text content must not be normalized (e.g. scripts/styles).
142
- _RAWTEXT_ELEMENTS: set[str] = {"script", "style"}
143
-
144
-
145
133
  def _collapse_html_whitespace(text: str) -> str:
146
134
  """Collapse HTML whitespace runs to a single space and trim edges.
147
135
 
@@ -151,20 +139,26 @@ def _collapse_html_whitespace(text: str) -> str:
151
139
  if not text:
152
140
  return ""
153
141
 
154
- parts: list[str] = []
155
- in_whitespace = False
156
- for ch in text:
157
- if ch in {" ", "\t", "\n", "\f", "\r"}:
158
- if not in_whitespace:
159
- parts.append(" ")
160
- in_whitespace = True
161
- continue
162
-
163
- parts.append(ch)
142
+ # Optimization: split() handles whitespace collapsing efficiently.
143
+ # Note: split() treats \v as whitespace, which is not HTML whitespace.
144
+ # But \v is extremely rare in HTML.
145
+ if "\v" in text:
146
+ parts: list[str] = []
164
147
  in_whitespace = False
148
+ for ch in text:
149
+ if ch in {" ", "\t", "\n", "\f", "\r"}:
150
+ if not in_whitespace:
151
+ parts.append(" ")
152
+ in_whitespace = True
153
+ continue
154
+
155
+ parts.append(ch)
156
+ in_whitespace = False
165
157
 
166
- collapsed = "".join(parts)
167
- return collapsed.strip(" ")
158
+ collapsed = "".join(parts)
159
+ return collapsed.strip(" ")
160
+
161
+ return " ".join(text.split())
168
162
 
169
163
 
170
164
  def _normalize_formatting_whitespace(text: str) -> str:
@@ -226,6 +220,149 @@ def _is_whitespace_text_node(node: Any) -> bool:
226
220
  return node.name == "#text" and (node.data or "").strip() == ""
227
221
 
228
222
 
223
+ def _is_blocky_element(node: Any) -> bool:
224
+ # Treat elements as block-ish if they are block-level *or* contain any block-level
225
+ # descendants. This keeps pretty-printing readable for constructs like <a><div>...</div></a>.
226
+ try:
227
+ name = node.name
228
+ except AttributeError:
229
+ return False
230
+ if name in {"#text", "#comment", "!doctype"}:
231
+ return False
232
+ if name in SPECIAL_ELEMENTS:
233
+ return True
234
+
235
+ try:
236
+ children = node.children or []
237
+ except AttributeError:
238
+ return False
239
+ if not children:
240
+ return False
241
+
242
+ stack: list[Any] = list(children)
243
+ while stack:
244
+ child = stack.pop()
245
+ if child is None:
246
+ continue
247
+ child_name = child.name
248
+ if child_name in SPECIAL_ELEMENTS:
249
+ return True
250
+ if child_name in {"#text", "#comment", "!doctype"}:
251
+ continue
252
+ grand_children = child.children
253
+ if grand_children:
254
+ stack.extend(grand_children)
255
+
256
+ return False
257
+
258
+
259
+ _LAYOUT_BLOCK_ELEMENTS = {
260
+ "address",
261
+ "article",
262
+ "aside",
263
+ "blockquote",
264
+ "body",
265
+ "caption",
266
+ "center",
267
+ "dd",
268
+ "details",
269
+ "dialog",
270
+ "dir",
271
+ "div",
272
+ "dl",
273
+ "dt",
274
+ "fieldset",
275
+ "figcaption",
276
+ "figure",
277
+ "footer",
278
+ "form",
279
+ "h1",
280
+ "h2",
281
+ "h3",
282
+ "h4",
283
+ "h5",
284
+ "h6",
285
+ "header",
286
+ "hgroup",
287
+ "hr",
288
+ "html",
289
+ "iframe",
290
+ "li",
291
+ "listing",
292
+ "main",
293
+ "marquee",
294
+ "menu",
295
+ "nav",
296
+ "noframes",
297
+ "noscript",
298
+ "ol",
299
+ "p",
300
+ "plaintext",
301
+ "pre",
302
+ "search",
303
+ "section",
304
+ "summary",
305
+ "table",
306
+ "tbody",
307
+ "td",
308
+ "tfoot",
309
+ "th",
310
+ "thead",
311
+ "tr",
312
+ "ul",
313
+ }
314
+
315
+
316
+ _FORMAT_SEP = object()
317
+
318
+
319
+ def _is_layout_blocky_element(node: Any) -> bool:
320
+ # Similar to _is_blocky_element(), but limited to actual layout blocks.
321
+ # This avoids turning inline-ish "special" elements like <script> into
322
+ # multiline pretty-print breaks in contexts like <p>.
323
+ try:
324
+ name = node.name
325
+ except AttributeError:
326
+ return False
327
+ if name in {"#text", "#comment", "!doctype"}:
328
+ return False
329
+ if name in _LAYOUT_BLOCK_ELEMENTS:
330
+ return True
331
+
332
+ try:
333
+ children = node.children or []
334
+ except AttributeError:
335
+ return False
336
+ if not children:
337
+ return False
338
+
339
+ stack: list[Any] = list(children)
340
+ while stack:
341
+ child = stack.pop()
342
+ if child is None:
343
+ continue
344
+ child_name = child.name
345
+ if child_name in _LAYOUT_BLOCK_ELEMENTS:
346
+ return True
347
+ if child_name in {"#text", "#comment", "!doctype"}:
348
+ continue
349
+ grand_children = child.children
350
+ if grand_children:
351
+ stack.extend(grand_children)
352
+
353
+ return False
354
+
355
+
356
+ def _is_formatting_whitespace_text(data: str) -> bool:
357
+ # Formatting whitespace is something users typically don't intend to preserve
358
+ # exactly (e.g. newlines/indentation, or large runs of spaces).
359
+ if not data:
360
+ return False
361
+ if "\n" in data or "\r" in data or "\t" in data or "\f" in data:
362
+ return True
363
+ return len(data) > 2
364
+
365
+
229
366
  def _should_pretty_indent_children(children: list[Any]) -> bool:
230
367
  for child in children:
231
368
  if child is None:
@@ -243,26 +380,18 @@ def _should_pretty_indent_children(children: list[Any]) -> bool:
243
380
  return True
244
381
  if len(element_children) == 1:
245
382
  only_child = element_children[0]
246
- if only_child.name in SPECIAL_ELEMENTS:
383
+ if _is_blocky_element(only_child):
247
384
  return True
248
- if only_child.name == "a":
249
- # If an anchor wraps block-ish content (valid HTML5), treat it as block-ish
250
- # for pretty-printing so the parent can indent it on its own line.
251
- for grandchild in only_child.children or []:
252
- if grandchild is None:
253
- continue
254
- if grandchild.name in SPECIAL_ELEMENTS:
255
- return True
256
385
  return False
257
386
 
258
387
  # Safe indentation rule: only insert inter-element whitespace when we won't
259
388
  # be placing it between two adjacent inline/phrasing elements.
260
- prev_is_special = element_children[0].name in SPECIAL_ELEMENTS
389
+ prev_is_blocky = _is_blocky_element(element_children[0])
261
390
  for child in element_children[1:]:
262
- current_is_special = child.name in SPECIAL_ELEMENTS
263
- if not prev_is_special and not current_is_special:
391
+ current_is_blocky = _is_blocky_element(child)
392
+ if not prev_is_blocky and not current_is_blocky:
264
393
  return False
265
- prev_is_special = current_is_special
394
+ prev_is_blocky = current_is_blocky
266
395
  return True
267
396
 
268
397
 
@@ -270,7 +399,7 @@ def _node_to_html(node: Any, indent: int = 0, indent_size: int = 2, pretty: bool
270
399
  """Helper to convert a node to HTML."""
271
400
  prefix = " " * (indent * indent_size) if pretty and not in_pre else ""
272
401
  name: str = node.name
273
- content_pre = in_pre or name in _PREFORMATTED_ELEMENTS
402
+ content_pre = in_pre or name in WHITESPACE_PRESERVING_ELEMENTS
274
403
  newline = "\n" if pretty and not content_pre else ""
275
404
 
276
405
  # Text node
@@ -320,14 +449,19 @@ def _node_to_html(node: Any, indent: int = 0, indent_size: int = 2, pretty: bool
320
449
  return f"{prefix}{open_tag}{serialize_end_tag(name)}"
321
450
 
322
451
  # Check if all children are text-only (inline rendering)
323
- all_text = all(c.name == "#text" for c in children)
452
+ all_text = True
453
+ for child in children:
454
+ if child is None:
455
+ continue
456
+ if child.name != "#text":
457
+ all_text = False
458
+ break
324
459
 
325
460
  if all_text and pretty and not content_pre:
326
461
  # Serializer controls sanitization at the to_html() entry point; avoid
327
462
  # implicit re-sanitization during rendering.
328
- text_content = node.to_text(separator="", strip=False, safe=False)
329
- if name not in _RAWTEXT_ELEMENTS:
330
- text_content = _collapse_html_whitespace(text_content)
463
+ text_content = node.to_text(separator="", strip=False)
464
+ text_content = _collapse_html_whitespace(text_content)
331
465
  return f"{prefix}{open_tag}{_escape_text(text_content)}{serialize_end_tag(name)}"
332
466
 
333
467
  if pretty and content_pre:
@@ -338,11 +472,204 @@ def _node_to_html(node: Any, indent: int = 0, indent_size: int = 2, pretty: bool
338
472
  )
339
473
  return f"{prefix}{open_tag}{inner}{serialize_end_tag(name)}"
340
474
 
475
+ if pretty and not content_pre and name in SPECIAL_ELEMENTS:
476
+ # For block-ish containers that only have element children (and/or
477
+ # whitespace-only text nodes), prefer a multiline layout for readability
478
+ # even when children are inline elements.
479
+ can_indent = True
480
+ for child in children:
481
+ if child is None:
482
+ continue
483
+ if child.name == "#comment":
484
+ can_indent = False
485
+ break
486
+ if child.name == "#text" and (child.data or "").strip():
487
+ can_indent = False
488
+ break
489
+
490
+ if can_indent:
491
+ inner_lines: list[str] = []
492
+ for child in children:
493
+ if child is None:
494
+ continue
495
+ if _is_whitespace_text_node(child):
496
+ continue
497
+ child_html = _node_to_html(child, indent + 1, indent_size, pretty, in_pre=content_pre)
498
+ if child_html:
499
+ inner_lines.append(child_html)
500
+
501
+ if inner_lines:
502
+ parts = [f"{prefix}{open_tag}"]
503
+ parts.extend(inner_lines)
504
+ parts.append(f"{prefix}{serialize_end_tag(name)}")
505
+ return "\n".join(parts)
506
+
507
+ # Smart pretty-printing: if the author already inserted formatting whitespace
508
+ # between siblings, we can split into "inline runs" and put each run on its
509
+ # own line without introducing new inter-token whitespace.
510
+ has_comment = any(child is not None and child.name == "#comment" for child in children)
511
+ if not has_comment:
512
+ non_none_children: list[Any] = [child for child in children if child is not None]
513
+
514
+ # Only enable this mode if there is at least one formatting whitespace text node
515
+ # between non-whitespace siblings.
516
+ has_separator = False
517
+ for child in non_none_children[1:-1]:
518
+ if child.name != "#text":
519
+ continue
520
+ data = child.data or ""
521
+ if data.strip() != "":
522
+ continue
523
+ if _is_formatting_whitespace_text(data):
524
+ has_separator = True
525
+ break
526
+
527
+ if has_separator:
528
+ # Build runs by splitting on formatting whitespace text nodes.
529
+ # Keep small spacing nodes (" " or " ") inside runs.
530
+ items: list[Any] = []
531
+ last_was_sep = False
532
+ for child in non_none_children:
533
+ if child.name == "#text":
534
+ data = child.data or ""
535
+ if data.strip() == "" and _is_formatting_whitespace_text(data):
536
+ if not last_was_sep:
537
+ items.append(_FORMAT_SEP)
538
+ last_was_sep = True
539
+ continue
540
+ items.append(child)
541
+ last_was_sep = False
542
+
543
+ while items and items[0] is _FORMAT_SEP:
544
+ items.pop(0)
545
+ while items and items[-1] is _FORMAT_SEP:
546
+ items.pop()
547
+
548
+ runs: list[list[Any]] = []
549
+ current_run: list[Any] = []
550
+ for item in items:
551
+ if item is _FORMAT_SEP:
552
+ runs.append(current_run)
553
+ current_run = []
554
+ continue
555
+ current_run.append(item)
556
+ runs.append(current_run)
557
+ runs = [run for run in runs if run]
558
+
559
+ # Only apply if we can render each run either as a single blocky element
560
+ # (possibly multiline) or as a single-line inline run.
561
+ smart_lines: list[str] = []
562
+ can_apply = True
563
+ for run in runs:
564
+ blocky_elements = [c for c in run if c.name not in {"#text", "#comment"} and _is_blocky_element(c)]
565
+ if blocky_elements and len(run) != 1:
566
+ can_apply = False
567
+ break
568
+
569
+ if len(run) == 1 and run[0].name != "#text":
570
+ child_html = _node_to_html(run[0], indent + 1, indent_size, pretty=True, in_pre=content_pre)
571
+ smart_lines.append(child_html)
572
+ continue
573
+
574
+ # Inline run: render on one line.
575
+ run_parts: list[str] = []
576
+ for c in run:
577
+ if c.name == "#text":
578
+ data = c.data or ""
579
+ if not data.strip():
580
+ # Formatting whitespace never appears inside runs (it is used as a separator).
581
+ # Preserve intentional tiny spacing.
582
+ run_parts.append(data)
583
+ continue
584
+
585
+ run_parts.append(_escape_text(_normalize_formatting_whitespace(data)))
586
+ continue
587
+
588
+ # Render inline elements without their own leading indentation.
589
+ child_html = _node_to_html(c, 0, indent_size, pretty=True, in_pre=content_pre)
590
+ run_parts.append(child_html)
591
+
592
+ smart_lines.append(f"{' ' * ((indent + 1) * indent_size)}{''.join(run_parts)}")
593
+
594
+ if can_apply and smart_lines:
595
+ return f"{prefix}{open_tag}\n" + "\n".join(smart_lines) + f"\n{prefix}{serialize_end_tag(name)}"
596
+
341
597
  if pretty and not content_pre and not _should_pretty_indent_children(children):
342
598
  # For block-ish elements that contain only element children and whitespace-only
343
599
  # text nodes, we can still format each child on its own line (only when there
344
600
  # is already whitespace separating element siblings).
345
601
  if name in SPECIAL_ELEMENTS:
602
+ # Mixed content in block-ish containers: if we encounter a blocky child
603
+ # (e.g. <ul>) adjacent to inline text, printing everything on one line
604
+ # both hurts readability and can lose indentation inside the block subtree.
605
+ # In that case, put inline runs and blocky children on their own lines.
606
+ has_comment = any(child is not None and child.name == "#comment" for child in children)
607
+ if not has_comment:
608
+ has_blocky_child = any(
609
+ child is not None and child.name not in {"#text", "#comment"} and _is_layout_blocky_element(child)
610
+ for child in children
611
+ )
612
+ has_non_whitespace_text = any(
613
+ child is not None and child.name == "#text" and (child.data or "").strip() for child in children
614
+ )
615
+
616
+ if has_blocky_child and has_non_whitespace_text:
617
+ mixed_multiline_lines: list[str] = []
618
+ inline_parts: list[str] = []
619
+
620
+ mixed_first_non_none_index: int | None = None
621
+ mixed_last_non_none_index: int | None = None
622
+ for i, child in enumerate(children):
623
+ if child is None:
624
+ continue
625
+ if mixed_first_non_none_index is None:
626
+ mixed_first_non_none_index = i
627
+ mixed_last_non_none_index = i
628
+
629
+ def flush_inline() -> None:
630
+ if not inline_parts:
631
+ return
632
+ line = "".join(inline_parts).strip(" ")
633
+ inline_parts.clear()
634
+ if line:
635
+ mixed_multiline_lines.append(f"{' ' * ((indent + 1) * indent_size)}{line}")
636
+
637
+ for i, child in enumerate(children):
638
+ if child is None:
639
+ continue
640
+
641
+ if child.name == "#text":
642
+ data = child.data or ""
643
+ if not data.strip():
644
+ # Drop leading/trailing formatting whitespace.
645
+ if i == mixed_first_non_none_index or i == mixed_last_non_none_index:
646
+ continue
647
+ # Preserve intentional small spacing, but treat formatting whitespace
648
+ # as a separator between inline runs (new line).
649
+ if "\n" in data or "\r" in data or "\t" in data or len(data) > 2:
650
+ flush_inline()
651
+ else:
652
+ inline_parts.append(data)
653
+ continue
654
+
655
+ data = _normalize_formatting_whitespace(data)
656
+ inline_parts.append(_escape_text(data))
657
+ continue
658
+
659
+ if _is_layout_blocky_element(child):
660
+ flush_inline()
661
+ mixed_multiline_lines.append(
662
+ _node_to_html(child, indent + 1, indent_size, pretty=True, in_pre=content_pre)
663
+ )
664
+ continue
665
+
666
+ # Inline element: keep it in the current line without leading indentation.
667
+ inline_parts.append(_node_to_html(child, 0, indent_size, pretty=True, in_pre=content_pre))
668
+
669
+ flush_inline()
670
+ inner = "\n".join(line for line in mixed_multiline_lines if line)
671
+ return f"{prefix}{open_tag}\n{inner}\n{prefix}{serialize_end_tag(name)}"
672
+
346
673
  has_comment = False
347
674
  has_element = False
348
675
  has_whitespace_between_elements = False
@@ -388,32 +715,32 @@ def _node_to_html(node: Any, indent: int = 0, indent_size: int = 2, pretty: bool
388
715
  break
389
716
 
390
717
  if has_element and has_whitespace_between_elements and not has_comment and can_indent_non_whitespace_text:
391
- inner_lines: list[str] = []
718
+ element_multiline_lines: list[str] = []
392
719
  for child in children:
393
720
  if child is None:
394
721
  continue
395
722
  if child.name == "#text":
396
723
  text = _collapse_html_whitespace(child.data or "")
397
724
  if text:
398
- inner_lines.append(f"{' ' * ((indent + 1) * indent_size)}{_escape_text(text)}")
725
+ element_multiline_lines.append(f"{' ' * ((indent + 1) * indent_size)}{_escape_text(text)}")
399
726
  continue
400
727
  child_html = _node_to_html(child, indent + 1, indent_size, pretty=True, in_pre=content_pre)
401
728
  if child_html:
402
- inner_lines.append(child_html)
403
- if inner_lines:
404
- inner = "\n".join(inner_lines)
729
+ element_multiline_lines.append(child_html)
730
+ if element_multiline_lines:
731
+ inner = "\n".join(element_multiline_lines)
405
732
  return f"{prefix}{open_tag}\n{inner}\n{prefix}{serialize_end_tag(name)}"
406
733
 
407
734
  inner_parts: list[str] = []
408
735
 
409
- first_non_none_index: int | None = None
410
- last_non_none_index: int | None = None
736
+ compact_first_non_none_index: int | None = None
737
+ compact_last_non_none_index: int | None = None
411
738
  for i, child in enumerate(children):
412
739
  if child is None:
413
740
  continue
414
- if first_non_none_index is None:
415
- first_non_none_index = i
416
- last_non_none_index = i
741
+ if compact_first_non_none_index is None:
742
+ compact_first_non_none_index = i
743
+ compact_last_non_none_index = i
417
744
 
418
745
  for i, child in enumerate(children):
419
746
  if child is None:
@@ -423,15 +750,14 @@ def _node_to_html(node: Any, indent: int = 0, indent_size: int = 2, pretty: bool
423
750
  data = child.data or ""
424
751
  if not data.strip():
425
752
  # Drop leading/trailing formatting whitespace in compact mode.
426
- if i == first_non_none_index or i == last_non_none_index:
753
+ if i == compact_first_non_none_index or i == compact_last_non_none_index:
427
754
  continue
428
755
  # Preserve intentional small spacing, but collapse large formatting gaps.
429
756
  if "\n" in data or "\r" in data or "\t" in data or len(data) > 2:
430
757
  inner_parts.append(" ")
431
758
  continue
432
759
 
433
- if not content_pre and name not in _RAWTEXT_ELEMENTS:
434
- data = _normalize_formatting_whitespace(data)
760
+ data = _normalize_formatting_whitespace(data)
435
761
  child_html = _escape_text(data) if data else ""
436
762
  else:
437
763
  # Even when we can't safely insert whitespace *between* siblings, we can