justhtml 0.12.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

justhtml/node.py CHANGED
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Any
4
+ from urllib.parse import quote
4
5
 
6
+ from .sanitize import sanitize
5
7
  from .selector import query
6
8
  from .serialize import to_html
7
9
 
8
10
  if TYPE_CHECKING:
11
+ from .sanitize import SanitizationPolicy
9
12
  from .tokens import Doctype
10
13
 
11
14
 
@@ -43,6 +46,30 @@ def _markdown_code_span(s: str | None) -> str:
43
46
  return f"{fence}{s}{fence}"
44
47
 
45
48
 
49
+ def _markdown_link_destination(url: str) -> str:
50
+ """Return a Markdown-safe link destination.
51
+
52
+ We primarily care about avoiding Markdown formatting injection and broken
53
+ parsing for URLs that contain whitespace or parentheses.
54
+
55
+ CommonMark supports destinations wrapped in angle brackets:
56
+ `[text](<https://example.com/a(b)c>)`
57
+ """
58
+
59
+ u = (url or "").strip()
60
+ if not u:
61
+ return ""
62
+
63
+ # If the destination contains characters that can terminate or confuse
64
+ # the Markdown destination parser, wrap in <...> and percent-encode
65
+ # whitespace and angle brackets.
66
+ if any(ch in u for ch in (" ", "\t", "\n", "\r", "(", ")", "<", ">")):
67
+ u = quote(u, safe=":/?#[]@!$&'*+,;=%-._~()")
68
+ return f"<{u}>"
69
+
70
+ return u
71
+
72
+
46
73
  class _MarkdownBuilder:
47
74
  __slots__ = ("_buf", "_newline_count", "_pending_space")
48
75
 
@@ -133,29 +160,45 @@ NodeType = "SimpleDomNode | ElementNode | TemplateNode | TextNode"
133
160
 
134
161
 
135
162
  def _to_text_collect(node: Any, parts: list[str], strip: bool) -> None:
136
- name: str = node.name
137
-
138
- if name == "#text":
139
- data: str | None = node.data
140
- if not data:
141
- return
142
- if strip:
143
- data = data.strip()
163
+ # Iterative traversal avoids recursion overhead on large documents.
164
+ stack: list[Any] = [node]
165
+ while stack:
166
+ current = stack.pop()
167
+ name: str = current.name
168
+
169
+ if name == "#text":
170
+ data: str | None = current.data
144
171
  if not data:
145
- return
146
- parts.append(data)
147
- return
172
+ continue
173
+ if strip:
174
+ data = data.strip()
175
+ if not data:
176
+ continue
177
+ parts.append(data)
178
+ continue
148
179
 
149
- if node.children:
150
- for child in node.children:
151
- _to_text_collect(child, parts, strip=strip)
180
+ # Preserve the same traversal order as the recursive implementation:
181
+ # children first, then template content.
182
+ if type(current) is TemplateNode and current.template_content:
183
+ stack.append(current.template_content)
152
184
 
153
- if isinstance(node, ElementNode) and node.template_content:
154
- _to_text_collect(node.template_content, parts, strip=strip)
185
+ children = current.children
186
+ if children:
187
+ stack.extend(reversed(children))
155
188
 
156
189
 
157
190
  class SimpleDomNode:
158
- __slots__ = ("attrs", "children", "data", "name", "namespace", "parent")
191
+ __slots__ = (
192
+ "_origin_col",
193
+ "_origin_line",
194
+ "_origin_pos",
195
+ "attrs",
196
+ "children",
197
+ "data",
198
+ "name",
199
+ "namespace",
200
+ "parent",
201
+ )
159
202
 
160
203
  name: str
161
204
  parent: SimpleDomNode | ElementNode | TemplateNode | None
@@ -163,6 +206,9 @@ class SimpleDomNode:
163
206
  children: list[Any] | None
164
207
  data: str | Doctype | None
165
208
  namespace: str | None
209
+ _origin_pos: int | None
210
+ _origin_line: int | None
211
+ _origin_col: int | None
166
212
 
167
213
  def __init__(
168
214
  self,
@@ -174,6 +220,9 @@ class SimpleDomNode:
174
220
  self.name = name
175
221
  self.parent = None
176
222
  self.data = data
223
+ self._origin_pos = None
224
+ self._origin_line = None
225
+ self._origin_col = None
177
226
 
178
227
  if name.startswith("#") or name == "!doctype":
179
228
  self.namespace = namespace
@@ -193,14 +242,41 @@ class SimpleDomNode:
193
242
  self.children.append(node)
194
243
  node.parent = self
195
244
 
245
+ @property
246
+ def origin_offset(self) -> int | None:
247
+ """Best-effort origin offset (0-indexed) in the source HTML, if known."""
248
+ return self._origin_pos
249
+
250
+ @property
251
+ def origin_line(self) -> int | None:
252
+ return self._origin_line
253
+
254
+ @property
255
+ def origin_col(self) -> int | None:
256
+ return self._origin_col
257
+
258
+ @property
259
+ def origin_location(self) -> tuple[int, int] | None:
260
+ if self._origin_line is None or self._origin_col is None:
261
+ return None
262
+ return (self._origin_line, self._origin_col)
263
+
196
264
  def remove_child(self, node: Any) -> None:
197
265
  if self.children is not None:
198
266
  self.children.remove(node)
199
267
  node.parent = None
200
268
 
201
- def to_html(self, indent: int = 0, indent_size: int = 2, pretty: bool = True) -> str:
269
+ def to_html(
270
+ self,
271
+ indent: int = 0,
272
+ indent_size: int = 2,
273
+ pretty: bool = True,
274
+ *,
275
+ safe: bool = True,
276
+ policy: SanitizationPolicy | None = None,
277
+ ) -> str:
202
278
  """Convert node to HTML string."""
203
- return to_html(self, indent, indent_size, pretty=pretty)
279
+ return to_html(self, indent, indent_size, pretty=pretty, safe=safe, policy=policy)
204
280
 
205
281
  def query(self, selector: str) -> list[Any]:
206
282
  """
@@ -232,27 +308,43 @@ class SimpleDomNode:
232
308
  return ""
233
309
  return ""
234
310
 
235
- def to_text(self, separator: str = " ", strip: bool = True) -> str:
311
+ def to_text(
312
+ self,
313
+ separator: str = " ",
314
+ strip: bool = True,
315
+ *,
316
+ safe: bool = True,
317
+ policy: SanitizationPolicy | None = None,
318
+ ) -> str:
236
319
  """Return the concatenated text of this node's descendants.
237
320
 
238
321
  - `separator` controls how text nodes are joined (default: a single space).
239
322
  - `strip=True` strips each text node and drops empty segments.
323
+ - `safe=True` sanitizes untrusted HTML before extracting text.
324
+ - `policy` overrides the default sanitization policy.
240
325
 
241
326
  Template element contents are included via `template_content`.
242
327
  """
328
+ node: Any = sanitize(self, policy=policy) if safe else self
243
329
  parts: list[str] = []
244
- _to_text_collect(self, parts, strip=strip)
330
+ _to_text_collect(node, parts, strip=strip)
245
331
  if not parts:
246
332
  return ""
247
333
  return separator.join(parts)
248
334
 
249
- def to_markdown(self) -> str:
335
+ def to_markdown(self, *, safe: bool = True, policy: SanitizationPolicy | None = None) -> str:
250
336
  """Return a GitHub Flavored Markdown representation of this subtree.
251
337
 
252
338
  This is a pragmatic HTML->Markdown converter intended for readability.
253
339
  - Tables and images are preserved as raw HTML.
254
340
  - Unknown elements fall back to rendering their children.
255
341
  """
342
+ if safe:
343
+ node = sanitize(self, policy=policy)
344
+ builder = _MarkdownBuilder()
345
+ _to_markdown_walk(node, builder, preserve_whitespace=False, list_depth=0)
346
+ return builder.finish()
347
+
256
348
  builder = _MarkdownBuilder()
257
349
  _to_markdown_walk(self, builder, preserve_whitespace=False, list_depth=0)
258
350
  return builder.finish()
@@ -329,6 +421,9 @@ class SimpleDomNode:
329
421
  self.data,
330
422
  self.namespace,
331
423
  )
424
+ clone._origin_pos = self._origin_pos
425
+ clone._origin_line = self._origin_line
426
+ clone._origin_col = self._origin_col
332
427
  if deep and self.children:
333
428
  for child in self.children:
334
429
  clone.append_child(child.clone_node(deep=True))
@@ -350,9 +445,15 @@ class ElementNode(SimpleDomNode):
350
445
  self.children = []
351
446
  self.attrs = attrs if attrs is not None else {}
352
447
  self.template_content = None
448
+ self._origin_pos = None
449
+ self._origin_line = None
450
+ self._origin_col = None
353
451
 
354
452
  def clone_node(self, deep: bool = False) -> ElementNode:
355
453
  clone = ElementNode(self.name, self.attrs.copy() if self.attrs else {}, self.namespace)
454
+ clone._origin_pos = self._origin_pos
455
+ clone._origin_line = self._origin_line
456
+ clone._origin_col = self._origin_col
356
457
  if deep:
357
458
  for child in self.children:
358
459
  clone.append_child(child.clone_node(deep=True))
@@ -382,6 +483,9 @@ class TemplateNode(ElementNode):
382
483
  None,
383
484
  self.namespace,
384
485
  )
486
+ clone._origin_pos = self._origin_pos
487
+ clone._origin_line = self._origin_line
488
+ clone._origin_col = self._origin_col
385
489
  if deep:
386
490
  if self.template_content:
387
491
  clone.template_content = self.template_content.clone_node(deep=True)
@@ -391,26 +495,62 @@ class TemplateNode(ElementNode):
391
495
 
392
496
 
393
497
  class TextNode:
394
- __slots__ = ("data", "name", "namespace", "parent")
498
+ __slots__ = ("_origin_col", "_origin_line", "_origin_pos", "data", "name", "namespace", "parent")
395
499
 
396
500
  data: str | None
397
501
  name: str
398
502
  namespace: None
399
503
  parent: SimpleDomNode | ElementNode | TemplateNode | None
504
+ _origin_pos: int | None
505
+ _origin_line: int | None
506
+ _origin_col: int | None
400
507
 
401
508
  def __init__(self, data: str | None) -> None:
402
509
  self.data = data
403
510
  self.parent = None
404
511
  self.name = "#text"
405
512
  self.namespace = None
513
+ self._origin_pos = None
514
+ self._origin_line = None
515
+ self._origin_col = None
516
+
517
+ @property
518
+ def origin_offset(self) -> int | None:
519
+ """Best-effort origin offset (0-indexed) in the source HTML, if known."""
520
+ return self._origin_pos
521
+
522
+ @property
523
+ def origin_line(self) -> int | None:
524
+ return self._origin_line
525
+
526
+ @property
527
+ def origin_col(self) -> int | None:
528
+ return self._origin_col
529
+
530
+ @property
531
+ def origin_location(self) -> tuple[int, int] | None:
532
+ if self._origin_line is None or self._origin_col is None:
533
+ return None
534
+ return (self._origin_line, self._origin_col)
406
535
 
407
536
  @property
408
537
  def text(self) -> str:
409
538
  """Return the text content of this node."""
410
539
  return self.data or ""
411
540
 
412
- def to_text(self, separator: str = " ", strip: bool = True) -> str:
541
+ def to_text(
542
+ self,
543
+ separator: str = " ",
544
+ strip: bool = True,
545
+ *,
546
+ safe: bool = True,
547
+ policy: SanitizationPolicy | None = None,
548
+ ) -> str:
413
549
  # Parameters are accepted for API consistency; they don't affect leaf nodes.
550
+ _ = separator
551
+ _ = safe
552
+ _ = policy
553
+
414
554
  if self.data is None:
415
555
  return ""
416
556
  if strip:
@@ -432,7 +572,11 @@ class TextNode:
432
572
  return False
433
573
 
434
574
  def clone_node(self, deep: bool = False) -> TextNode:
435
- return TextNode(self.data)
575
+ clone = TextNode(self.data)
576
+ clone._origin_pos = self._origin_pos
577
+ clone._origin_line = self._origin_line
578
+ clone._origin_col = self._origin_col
579
+ return clone
436
580
 
437
581
 
438
582
  _MARKDOWN_BLOCK_ELEMENTS: frozenset[str] = frozenset(
@@ -463,7 +607,13 @@ _MARKDOWN_BLOCK_ELEMENTS: frozenset[str] = frozenset(
463
607
  )
464
608
 
465
609
 
466
- def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace: bool, list_depth: int) -> None:
610
+ def _to_markdown_walk(
611
+ node: Any,
612
+ builder: _MarkdownBuilder,
613
+ preserve_whitespace: bool,
614
+ list_depth: int,
615
+ in_link: bool = False,
616
+ ) -> None:
467
617
  name: str = node.name
468
618
 
469
619
  if name == "#text":
@@ -474,7 +624,10 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
474
624
  return
475
625
 
476
626
  if name == "br":
477
- builder.newline(1)
627
+ if in_link:
628
+ builder.text(" ", preserve_whitespace=False)
629
+ else:
630
+ builder.newline(1)
478
631
  return
479
632
 
480
633
  # Comments/doctype don't contribute.
@@ -485,52 +638,80 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
485
638
  if name.startswith("#"):
486
639
  if node.children:
487
640
  for child in node.children:
488
- _to_markdown_walk(child, builder, preserve_whitespace, list_depth)
641
+ _to_markdown_walk(
642
+ child,
643
+ builder,
644
+ preserve_whitespace,
645
+ list_depth,
646
+ in_link=in_link,
647
+ )
489
648
  return
490
649
 
491
650
  tag = name.lower()
492
651
 
652
+ # Metadata containers don't contribute to body text.
653
+ if tag == "head" or tag == "title":
654
+ return
655
+
493
656
  # Preserve <img> and <table> as HTML.
494
657
  if tag == "img":
495
658
  builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
496
659
  return
497
660
 
498
661
  if tag == "table":
499
- builder.ensure_newlines(2 if builder._buf else 0)
662
+ if not in_link:
663
+ builder.ensure_newlines(2 if builder._buf else 0)
500
664
  builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
501
- builder.ensure_newlines(2)
665
+ if not in_link:
666
+ builder.ensure_newlines(2)
502
667
  return
503
668
 
504
669
  # Headings.
505
670
  if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
506
- builder.ensure_newlines(2 if builder._buf else 0)
507
- level = int(tag[1])
508
- builder.raw("#" * level)
509
- builder.raw(" ")
671
+ if not in_link:
672
+ builder.ensure_newlines(2 if builder._buf else 0)
673
+ level = int(tag[1])
674
+ builder.raw("#" * level)
675
+ builder.raw(" ")
676
+
510
677
  if node.children:
511
678
  for child in node.children:
512
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
513
- builder.ensure_newlines(2)
679
+ _to_markdown_walk(
680
+ child,
681
+ builder,
682
+ preserve_whitespace=False,
683
+ list_depth=list_depth,
684
+ in_link=in_link,
685
+ )
686
+
687
+ if not in_link:
688
+ builder.ensure_newlines(2)
514
689
  return
515
690
 
516
691
  # Horizontal rule.
517
692
  if tag == "hr":
518
- builder.ensure_newlines(2 if builder._buf else 0)
519
- builder.raw("---")
520
- builder.ensure_newlines(2)
693
+ if not in_link:
694
+ builder.ensure_newlines(2 if builder._buf else 0)
695
+ builder.raw("---")
696
+ builder.ensure_newlines(2)
521
697
  return
522
698
 
523
699
  # Code blocks.
524
700
  if tag == "pre":
525
- builder.ensure_newlines(2 if builder._buf else 0)
526
- code = node.to_text(separator="", strip=False)
527
- builder.raw("```")
528
- builder.newline(1)
529
- if code:
530
- builder.raw(code.rstrip("\n"))
701
+ if not in_link:
702
+ builder.ensure_newlines(2 if builder._buf else 0)
703
+ code = node.to_text(separator="", strip=False)
704
+ builder.raw("```")
531
705
  builder.newline(1)
532
- builder.raw("```")
533
- builder.ensure_newlines(2)
706
+ if code:
707
+ builder.raw(code.rstrip("\n"))
708
+ builder.newline(1)
709
+ builder.raw("```")
710
+ builder.ensure_newlines(2)
711
+ else:
712
+ # Inside link, render as inline code or text
713
+ code = node.to_text(separator="", strip=False)
714
+ builder.raw(_markdown_code_span(code))
534
715
  return
535
716
 
536
717
  # Inline code.
@@ -541,64 +722,126 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
541
722
 
542
723
  # Paragraph-like blocks.
543
724
  if tag == "p":
544
- builder.ensure_newlines(2 if builder._buf else 0)
725
+ if not in_link:
726
+ builder.ensure_newlines(2 if builder._buf else 0)
727
+
545
728
  if node.children:
546
729
  for child in node.children:
547
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
548
- builder.ensure_newlines(2)
730
+ _to_markdown_walk(
731
+ child,
732
+ builder,
733
+ preserve_whitespace=False,
734
+ list_depth=list_depth,
735
+ in_link=in_link,
736
+ )
737
+
738
+ if not in_link:
739
+ builder.ensure_newlines(2)
740
+ else:
741
+ builder.text(" ", preserve_whitespace=False)
549
742
  return
550
743
 
551
744
  # Blockquotes.
552
745
  if tag == "blockquote":
553
- builder.ensure_newlines(2 if builder._buf else 0)
554
- inner = _MarkdownBuilder()
555
- if node.children:
556
- for child in node.children:
557
- _to_markdown_walk(child, inner, preserve_whitespace=False, list_depth=list_depth)
558
- text = inner.finish()
559
- if text:
560
- lines = text.split("\n")
561
- for i, line in enumerate(lines):
562
- if i:
563
- builder.newline(1)
564
- builder.raw("> ")
565
- builder.raw(line)
566
- builder.ensure_newlines(2)
746
+ if not in_link:
747
+ builder.ensure_newlines(2 if builder._buf else 0)
748
+ inner = _MarkdownBuilder()
749
+ if node.children:
750
+ for child in node.children:
751
+ _to_markdown_walk(
752
+ child,
753
+ inner,
754
+ preserve_whitespace=False,
755
+ list_depth=list_depth,
756
+ in_link=in_link,
757
+ )
758
+ text = inner.finish()
759
+ if text:
760
+ lines = text.split("\n")
761
+ for i, line in enumerate(lines):
762
+ if i:
763
+ builder.newline(1)
764
+ builder.raw("> ")
765
+ builder.raw(line)
766
+ builder.ensure_newlines(2)
767
+ else:
768
+ if node.children:
769
+ for child in node.children:
770
+ _to_markdown_walk(
771
+ child,
772
+ builder,
773
+ preserve_whitespace=False,
774
+ list_depth=list_depth,
775
+ in_link=in_link,
776
+ )
567
777
  return
568
778
 
569
779
  # Lists.
570
780
  if tag in {"ul", "ol"}:
571
- builder.ensure_newlines(2 if builder._buf else 0)
572
- ordered = tag == "ol"
573
- idx = 1
574
- for child in node.children or []:
575
- if child.name.lower() != "li":
576
- continue
577
- if idx > 1:
578
- builder.newline(1)
579
- indent = " " * list_depth
580
- marker = f"{idx}. " if ordered else "- "
581
- builder.raw(indent)
582
- builder.raw(marker)
583
- # Render list item content inline-ish.
584
- for li_child in child.children or []:
585
- _to_markdown_walk(li_child, builder, preserve_whitespace=False, list_depth=list_depth + 1)
586
- idx += 1
587
- builder.ensure_newlines(2)
781
+ if not in_link:
782
+ builder.ensure_newlines(2 if builder._buf else 0)
783
+ ordered = tag == "ol"
784
+ idx = 1
785
+ for child in node.children or []:
786
+ if child.name.lower() != "li":
787
+ continue
788
+ if idx > 1:
789
+ builder.newline(1)
790
+ indent = " " * list_depth
791
+ marker = f"{idx}. " if ordered else "- "
792
+ builder.raw(indent)
793
+ builder.raw(marker)
794
+ # Render list item content inline-ish.
795
+ for li_child in child.children or []:
796
+ _to_markdown_walk(
797
+ li_child,
798
+ builder,
799
+ preserve_whitespace=False,
800
+ list_depth=list_depth + 1,
801
+ in_link=in_link,
802
+ )
803
+ idx += 1
804
+ builder.ensure_newlines(2)
805
+ else:
806
+ # Flatten list inside link
807
+ for child in node.children or []:
808
+ if child.name.lower() != "li":
809
+ continue
810
+ builder.raw(" ")
811
+ for li_child in child.children or []:
812
+ _to_markdown_walk(
813
+ li_child,
814
+ builder,
815
+ preserve_whitespace=False,
816
+ list_depth=list_depth + 1,
817
+ in_link=in_link,
818
+ )
588
819
  return
589
820
 
590
821
  # Emphasis/strong.
591
822
  if tag in {"em", "i"}:
592
823
  builder.raw("*")
593
824
  for child in node.children or []:
594
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
825
+ _to_markdown_walk(
826
+ child,
827
+ builder,
828
+ preserve_whitespace=False,
829
+ list_depth=list_depth,
830
+ in_link=in_link,
831
+ )
595
832
  builder.raw("*")
596
833
  return
597
834
 
598
835
  if tag in {"strong", "b"}:
599
836
  builder.raw("**")
600
837
  for child in node.children or []:
601
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
838
+ _to_markdown_walk(
839
+ child,
840
+ builder,
841
+ preserve_whitespace=False,
842
+ list_depth=list_depth,
843
+ in_link=in_link,
844
+ )
602
845
  builder.raw("**")
603
846
  return
604
847
 
@@ -608,13 +851,24 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
608
851
  if node.attrs and "href" in node.attrs and node.attrs["href"] is not None:
609
852
  href = str(node.attrs["href"])
610
853
 
611
- builder.raw("[")
854
+ # Capture inner text to strip whitespace.
855
+ inner_builder = _MarkdownBuilder()
612
856
  for child in node.children or []:
613
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
857
+ _to_markdown_walk(
858
+ child,
859
+ inner_builder,
860
+ preserve_whitespace=False,
861
+ list_depth=list_depth,
862
+ in_link=True,
863
+ )
864
+ link_text = inner_builder.finish()
865
+
866
+ builder.raw("[")
867
+ builder.raw(link_text)
614
868
  builder.raw("]")
615
869
  if href:
616
870
  builder.raw("(")
617
- builder.raw(href)
871
+ builder.raw(_markdown_link_destination(href))
618
872
  builder.raw(")")
619
873
  return
620
874
 
@@ -622,11 +876,26 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
622
876
  next_preserve = preserve_whitespace or (tag in {"textarea", "script", "style"})
623
877
  if node.children:
624
878
  for child in node.children:
625
- _to_markdown_walk(child, builder, next_preserve, list_depth)
879
+ _to_markdown_walk(
880
+ child,
881
+ builder,
882
+ next_preserve,
883
+ list_depth,
884
+ in_link=in_link,
885
+ )
626
886
 
627
887
  if isinstance(node, ElementNode) and node.template_content:
628
- _to_markdown_walk(node.template_content, builder, next_preserve, list_depth)
888
+ _to_markdown_walk(
889
+ node.template_content,
890
+ builder,
891
+ next_preserve,
892
+ list_depth,
893
+ in_link=in_link,
894
+ )
629
895
 
630
896
  # Add spacing after block containers to keep output readable.
631
897
  if tag in _MARKDOWN_BLOCK_ELEMENTS:
632
- builder.ensure_newlines(2)
898
+ if not in_link:
899
+ builder.ensure_newlines(2)
900
+ else:
901
+ builder.text(" ", preserve_whitespace=False)