justhtml 0.12.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of justhtml might be problematic. Click here for more details.

justhtml/node.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Any
4
+ from urllib.parse import quote
4
5
 
5
6
  from .selector import query
6
7
  from .serialize import to_html
@@ -43,6 +44,30 @@ def _markdown_code_span(s: str | None) -> str:
43
44
  return f"{fence}{s}{fence}"
44
45
 
45
46
 
47
+ def _markdown_link_destination(url: str) -> str:
48
+ """Return a Markdown-safe link destination.
49
+
50
+ We primarily care about avoiding Markdown formatting injection and broken
51
+ parsing for URLs that contain whitespace or parentheses.
52
+
53
+ CommonMark supports destinations wrapped in angle brackets:
54
+ `[text](<https://example.com/a(b)c>)`
55
+ """
56
+
57
+ u = (url or "").strip()
58
+ if not u:
59
+ return ""
60
+
61
+ # If the destination contains characters that can terminate or confuse
62
+ # the Markdown destination parser, wrap in <...> and percent-encode
63
+ # whitespace and angle brackets.
64
+ if any(ch in u for ch in (" ", "\t", "\n", "\r", "(", ")", "<", ">")):
65
+ u = quote(u, safe=":/?#[]@!$&'*+,;=%-._~()")
66
+ return f"<{u}>"
67
+
68
+ return u
69
+
70
+
46
71
  class _MarkdownBuilder:
47
72
  __slots__ = ("_buf", "_newline_count", "_pending_space")
48
73
 
@@ -133,29 +158,46 @@ NodeType = "SimpleDomNode | ElementNode | TemplateNode | TextNode"
133
158
 
134
159
 
135
160
  def _to_text_collect(node: Any, parts: list[str], strip: bool) -> None:
136
- name: str = node.name
137
-
138
- if name == "#text":
139
- data: str | None = node.data
140
- if not data:
141
- return
142
- if strip:
143
- data = data.strip()
161
+ # Iterative traversal avoids recursion overhead on large documents.
162
+ stack: list[Any] = [node]
163
+ while stack:
164
+ current = stack.pop()
165
+ name: str = current.name
166
+
167
+ if name == "#text":
168
+ data: str | None = current.data
144
169
  if not data:
145
- return
146
- parts.append(data)
147
- return
170
+ continue
171
+ if strip:
172
+ data = data.strip()
173
+ if not data:
174
+ continue
175
+ parts.append(data)
176
+ continue
148
177
 
149
- if node.children:
150
- for child in node.children:
151
- _to_text_collect(child, parts, strip=strip)
178
+ # Preserve the same traversal order as the recursive implementation:
179
+ # children first, then template content.
180
+ if type(current) is TemplateNode and current.template_content:
181
+ stack.append(current.template_content)
152
182
 
153
- if isinstance(node, ElementNode) and node.template_content:
154
- _to_text_collect(node.template_content, parts, strip=strip)
183
+ children = current.children
184
+ if children:
185
+ stack.extend(reversed(children))
155
186
 
156
187
 
157
188
  class SimpleDomNode:
158
- __slots__ = ("attrs", "children", "data", "name", "namespace", "parent")
189
+ __slots__ = (
190
+ "_origin_col",
191
+ "_origin_line",
192
+ "_origin_pos",
193
+ "_source_html",
194
+ "attrs",
195
+ "children",
196
+ "data",
197
+ "name",
198
+ "namespace",
199
+ "parent",
200
+ )
159
201
 
160
202
  name: str
161
203
  parent: SimpleDomNode | ElementNode | TemplateNode | None
@@ -163,6 +205,10 @@ class SimpleDomNode:
163
205
  children: list[Any] | None
164
206
  data: str | Doctype | None
165
207
  namespace: str | None
208
+ _origin_pos: int | None
209
+ _origin_line: int | None
210
+ _origin_col: int | None
211
+ _source_html: str | None
166
212
 
167
213
  def __init__(
168
214
  self,
@@ -174,6 +220,10 @@ class SimpleDomNode:
174
220
  self.name = name
175
221
  self.parent = None
176
222
  self.data = data
223
+ self._source_html = None
224
+ self._origin_pos = None
225
+ self._origin_line = None
226
+ self._origin_col = None
177
227
 
178
228
  if name.startswith("#") or name == "!doctype":
179
229
  self.namespace = namespace
@@ -193,12 +243,36 @@ class SimpleDomNode:
193
243
  self.children.append(node)
194
244
  node.parent = self
195
245
 
246
+ @property
247
+ def origin_offset(self) -> int | None:
248
+ """Best-effort origin offset (0-indexed) in the source HTML, if known."""
249
+ return self._origin_pos
250
+
251
+ @property
252
+ def origin_line(self) -> int | None:
253
+ return self._origin_line
254
+
255
+ @property
256
+ def origin_col(self) -> int | None:
257
+ return self._origin_col
258
+
259
+ @property
260
+ def origin_location(self) -> tuple[int, int] | None:
261
+ if self._origin_line is None or self._origin_col is None:
262
+ return None
263
+ return (self._origin_line, self._origin_col)
264
+
196
265
  def remove_child(self, node: Any) -> None:
197
266
  if self.children is not None:
198
267
  self.children.remove(node)
199
268
  node.parent = None
200
269
 
201
- def to_html(self, indent: int = 0, indent_size: int = 2, pretty: bool = True) -> str:
270
+ def to_html(
271
+ self,
272
+ indent: int = 0,
273
+ indent_size: int = 2,
274
+ pretty: bool = True,
275
+ ) -> str:
202
276
  """Convert node to HTML string."""
203
277
  return to_html(self, indent, indent_size, pretty=pretty)
204
278
 
@@ -232,16 +306,20 @@ class SimpleDomNode:
232
306
  return ""
233
307
  return ""
234
308
 
235
- def to_text(self, separator: str = " ", strip: bool = True) -> str:
309
+ def to_text(
310
+ self,
311
+ separator: str = " ",
312
+ strip: bool = True,
313
+ ) -> str:
236
314
  """Return the concatenated text of this node's descendants.
237
315
 
238
316
  - `separator` controls how text nodes are joined (default: a single space).
239
317
  - `strip=True` strips each text node and drops empty segments.
240
-
241
318
  Template element contents are included via `template_content`.
242
319
  """
320
+ node: Any = self
243
321
  parts: list[str] = []
244
- _to_text_collect(self, parts, strip=strip)
322
+ _to_text_collect(node, parts, strip=strip)
245
323
  if not parts:
246
324
  return ""
247
325
  return separator.join(parts)
@@ -313,22 +391,28 @@ class SimpleDomNode:
313
391
  """Return True if this node has children."""
314
392
  return bool(self.children)
315
393
 
316
- def clone_node(self, deep: bool = False) -> SimpleDomNode:
394
+ def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> SimpleDomNode:
317
395
  """
318
396
  Clone this node.
319
397
 
320
398
  Args:
321
399
  deep: If True, recursively clone children.
400
+ override_attrs: Optional dictionary to use as attributes for the clone.
322
401
 
323
402
  Returns:
324
403
  A new node that is a copy of this node.
325
404
  """
405
+ attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else None)
326
406
  clone = SimpleDomNode(
327
407
  self.name,
328
- self.attrs.copy() if self.attrs else None,
408
+ attrs,
329
409
  self.data,
330
410
  self.namespace,
331
411
  )
412
+ clone._source_html = self._source_html
413
+ clone._origin_pos = self._origin_pos
414
+ clone._origin_line = self._origin_line
415
+ clone._origin_col = self._origin_col
332
416
  if deep and self.children:
333
417
  for child in self.children:
334
418
  clone.append_child(child.clone_node(deep=True))
@@ -336,11 +420,25 @@ class SimpleDomNode:
336
420
 
337
421
 
338
422
  class ElementNode(SimpleDomNode):
339
- __slots__ = ("template_content",)
423
+ __slots__ = (
424
+ "_end_tag_end",
425
+ "_end_tag_present",
426
+ "_end_tag_start",
427
+ "_self_closing",
428
+ "_start_tag_end",
429
+ "_start_tag_start",
430
+ "template_content",
431
+ )
340
432
 
341
433
  template_content: SimpleDomNode | None
342
434
  children: list[Any]
343
435
  attrs: dict[str, str | None]
436
+ _start_tag_start: int | None
437
+ _start_tag_end: int | None
438
+ _end_tag_start: int | None
439
+ _end_tag_end: int | None
440
+ _end_tag_present: bool
441
+ _self_closing: bool
344
442
 
345
443
  def __init__(self, name: str, attrs: dict[str, str | None] | None, namespace: str | None) -> None:
346
444
  self.name = name
@@ -350,9 +448,30 @@ class ElementNode(SimpleDomNode):
350
448
  self.children = []
351
449
  self.attrs = attrs if attrs is not None else {}
352
450
  self.template_content = None
353
-
354
- def clone_node(self, deep: bool = False) -> ElementNode:
355
- clone = ElementNode(self.name, self.attrs.copy() if self.attrs else {}, self.namespace)
451
+ self._source_html = None
452
+ self._origin_pos = None
453
+ self._origin_line = None
454
+ self._origin_col = None
455
+ self._start_tag_start = None
456
+ self._start_tag_end = None
457
+ self._end_tag_start = None
458
+ self._end_tag_end = None
459
+ self._end_tag_present = False
460
+ self._self_closing = False
461
+
462
+ def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> ElementNode:
463
+ attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else {})
464
+ clone = ElementNode(self.name, attrs, self.namespace)
465
+ clone._source_html = self._source_html
466
+ clone._origin_pos = self._origin_pos
467
+ clone._origin_line = self._origin_line
468
+ clone._origin_col = self._origin_col
469
+ clone._start_tag_start = self._start_tag_start
470
+ clone._start_tag_end = self._start_tag_end
471
+ clone._end_tag_start = self._end_tag_start
472
+ clone._end_tag_end = self._end_tag_end
473
+ clone._end_tag_present = self._end_tag_present
474
+ clone._self_closing = self._self_closing
356
475
  if deep:
357
476
  for child in self.children:
358
477
  clone.append_child(child.clone_node(deep=True))
@@ -375,13 +494,24 @@ class TemplateNode(ElementNode):
375
494
  else:
376
495
  self.template_content = None
377
496
 
378
- def clone_node(self, deep: bool = False) -> TemplateNode:
497
+ def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> TemplateNode:
498
+ attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else {})
379
499
  clone = TemplateNode(
380
500
  self.name,
381
- self.attrs.copy() if self.attrs else {},
501
+ attrs,
382
502
  None,
383
503
  self.namespace,
384
504
  )
505
+ clone._source_html = self._source_html
506
+ clone._origin_pos = self._origin_pos
507
+ clone._origin_line = self._origin_line
508
+ clone._origin_col = self._origin_col
509
+ clone._start_tag_start = self._start_tag_start
510
+ clone._start_tag_end = self._start_tag_end
511
+ clone._end_tag_start = self._end_tag_start
512
+ clone._end_tag_end = self._end_tag_end
513
+ clone._end_tag_present = self._end_tag_present
514
+ clone._self_closing = self._self_closing
385
515
  if deep:
386
516
  if self.template_content:
387
517
  clone.template_content = self.template_content.clone_node(deep=True)
@@ -391,26 +521,55 @@ class TemplateNode(ElementNode):
391
521
 
392
522
 
393
523
  class TextNode:
394
- __slots__ = ("data", "name", "namespace", "parent")
524
+ __slots__ = ("_origin_col", "_origin_line", "_origin_pos", "data", "name", "namespace", "parent")
395
525
 
396
526
  data: str | None
397
527
  name: str
398
528
  namespace: None
399
529
  parent: SimpleDomNode | ElementNode | TemplateNode | None
530
+ _origin_pos: int | None
531
+ _origin_line: int | None
532
+ _origin_col: int | None
400
533
 
401
534
  def __init__(self, data: str | None) -> None:
402
535
  self.data = data
403
536
  self.parent = None
404
537
  self.name = "#text"
405
538
  self.namespace = None
539
+ self._origin_pos = None
540
+ self._origin_line = None
541
+ self._origin_col = None
542
+
543
+ @property
544
+ def origin_offset(self) -> int | None:
545
+ """Best-effort origin offset (0-indexed) in the source HTML, if known."""
546
+ return self._origin_pos
547
+
548
+ @property
549
+ def origin_line(self) -> int | None:
550
+ return self._origin_line
551
+
552
+ @property
553
+ def origin_col(self) -> int | None:
554
+ return self._origin_col
555
+
556
+ @property
557
+ def origin_location(self) -> tuple[int, int] | None:
558
+ if self._origin_line is None or self._origin_col is None:
559
+ return None
560
+ return (self._origin_line, self._origin_col)
406
561
 
407
562
  @property
408
563
  def text(self) -> str:
409
564
  """Return the text content of this node."""
410
565
  return self.data or ""
411
566
 
412
- def to_text(self, separator: str = " ", strip: bool = True) -> str:
413
- # Parameters are accepted for API consistency; they don't affect leaf nodes.
567
+ def to_text(
568
+ self,
569
+ separator: str = " ",
570
+ strip: bool = True,
571
+ ) -> str:
572
+ _ = separator
414
573
  if self.data is None:
415
574
  return ""
416
575
  if strip:
@@ -432,7 +591,11 @@ class TextNode:
432
591
  return False
433
592
 
434
593
  def clone_node(self, deep: bool = False) -> TextNode:
435
- return TextNode(self.data)
594
+ clone = TextNode(self.data)
595
+ clone._origin_pos = self._origin_pos
596
+ clone._origin_line = self._origin_line
597
+ clone._origin_col = self._origin_col
598
+ return clone
436
599
 
437
600
 
438
601
  _MARKDOWN_BLOCK_ELEMENTS: frozenset[str] = frozenset(
@@ -463,7 +626,13 @@ _MARKDOWN_BLOCK_ELEMENTS: frozenset[str] = frozenset(
463
626
  )
464
627
 
465
628
 
466
- def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace: bool, list_depth: int) -> None:
629
+ def _to_markdown_walk(
630
+ node: Any,
631
+ builder: _MarkdownBuilder,
632
+ preserve_whitespace: bool,
633
+ list_depth: int,
634
+ in_link: bool = False,
635
+ ) -> None:
467
636
  name: str = node.name
468
637
 
469
638
  if name == "#text":
@@ -474,7 +643,10 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
474
643
  return
475
644
 
476
645
  if name == "br":
477
- builder.newline(1)
646
+ if in_link:
647
+ builder.text(" ", preserve_whitespace=False)
648
+ else:
649
+ builder.newline(1)
478
650
  return
479
651
 
480
652
  # Comments/doctype don't contribute.
@@ -485,52 +657,80 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
485
657
  if name.startswith("#"):
486
658
  if node.children:
487
659
  for child in node.children:
488
- _to_markdown_walk(child, builder, preserve_whitespace, list_depth)
660
+ _to_markdown_walk(
661
+ child,
662
+ builder,
663
+ preserve_whitespace,
664
+ list_depth,
665
+ in_link=in_link,
666
+ )
489
667
  return
490
668
 
491
669
  tag = name.lower()
492
670
 
671
+ # Metadata containers don't contribute to body text.
672
+ if tag == "head" or tag == "title":
673
+ return
674
+
493
675
  # Preserve <img> and <table> as HTML.
494
676
  if tag == "img":
495
677
  builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
496
678
  return
497
679
 
498
680
  if tag == "table":
499
- builder.ensure_newlines(2 if builder._buf else 0)
681
+ if not in_link:
682
+ builder.ensure_newlines(2 if builder._buf else 0)
500
683
  builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
501
- builder.ensure_newlines(2)
684
+ if not in_link:
685
+ builder.ensure_newlines(2)
502
686
  return
503
687
 
504
688
  # Headings.
505
689
  if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
506
- builder.ensure_newlines(2 if builder._buf else 0)
507
- level = int(tag[1])
508
- builder.raw("#" * level)
509
- builder.raw(" ")
690
+ if not in_link:
691
+ builder.ensure_newlines(2 if builder._buf else 0)
692
+ level = int(tag[1])
693
+ builder.raw("#" * level)
694
+ builder.raw(" ")
695
+
510
696
  if node.children:
511
697
  for child in node.children:
512
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
513
- builder.ensure_newlines(2)
698
+ _to_markdown_walk(
699
+ child,
700
+ builder,
701
+ preserve_whitespace=False,
702
+ list_depth=list_depth,
703
+ in_link=in_link,
704
+ )
705
+
706
+ if not in_link:
707
+ builder.ensure_newlines(2)
514
708
  return
515
709
 
516
710
  # Horizontal rule.
517
711
  if tag == "hr":
518
- builder.ensure_newlines(2 if builder._buf else 0)
519
- builder.raw("---")
520
- builder.ensure_newlines(2)
712
+ if not in_link:
713
+ builder.ensure_newlines(2 if builder._buf else 0)
714
+ builder.raw("---")
715
+ builder.ensure_newlines(2)
521
716
  return
522
717
 
523
718
  # Code blocks.
524
719
  if tag == "pre":
525
- builder.ensure_newlines(2 if builder._buf else 0)
526
- code = node.to_text(separator="", strip=False)
527
- builder.raw("```")
528
- builder.newline(1)
529
- if code:
530
- builder.raw(code.rstrip("\n"))
720
+ if not in_link:
721
+ builder.ensure_newlines(2 if builder._buf else 0)
722
+ code = node.to_text(separator="", strip=False)
723
+ builder.raw("```")
531
724
  builder.newline(1)
532
- builder.raw("```")
533
- builder.ensure_newlines(2)
725
+ if code:
726
+ builder.raw(code.rstrip("\n"))
727
+ builder.newline(1)
728
+ builder.raw("```")
729
+ builder.ensure_newlines(2)
730
+ else:
731
+ # Inside link, render as inline code or text
732
+ code = node.to_text(separator="", strip=False)
733
+ builder.raw(_markdown_code_span(code))
534
734
  return
535
735
 
536
736
  # Inline code.
@@ -541,64 +741,126 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
541
741
 
542
742
  # Paragraph-like blocks.
543
743
  if tag == "p":
544
- builder.ensure_newlines(2 if builder._buf else 0)
744
+ if not in_link:
745
+ builder.ensure_newlines(2 if builder._buf else 0)
746
+
545
747
  if node.children:
546
748
  for child in node.children:
547
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
548
- builder.ensure_newlines(2)
749
+ _to_markdown_walk(
750
+ child,
751
+ builder,
752
+ preserve_whitespace=False,
753
+ list_depth=list_depth,
754
+ in_link=in_link,
755
+ )
756
+
757
+ if not in_link:
758
+ builder.ensure_newlines(2)
759
+ else:
760
+ builder.text(" ", preserve_whitespace=False)
549
761
  return
550
762
 
551
763
  # Blockquotes.
552
764
  if tag == "blockquote":
553
- builder.ensure_newlines(2 if builder._buf else 0)
554
- inner = _MarkdownBuilder()
555
- if node.children:
556
- for child in node.children:
557
- _to_markdown_walk(child, inner, preserve_whitespace=False, list_depth=list_depth)
558
- text = inner.finish()
559
- if text:
560
- lines = text.split("\n")
561
- for i, line in enumerate(lines):
562
- if i:
563
- builder.newline(1)
564
- builder.raw("> ")
565
- builder.raw(line)
566
- builder.ensure_newlines(2)
765
+ if not in_link:
766
+ builder.ensure_newlines(2 if builder._buf else 0)
767
+ inner = _MarkdownBuilder()
768
+ if node.children:
769
+ for child in node.children:
770
+ _to_markdown_walk(
771
+ child,
772
+ inner,
773
+ preserve_whitespace=False,
774
+ list_depth=list_depth,
775
+ in_link=in_link,
776
+ )
777
+ text = inner.finish()
778
+ if text:
779
+ lines = text.split("\n")
780
+ for i, line in enumerate(lines):
781
+ if i:
782
+ builder.newline(1)
783
+ builder.raw("> ")
784
+ builder.raw(line)
785
+ builder.ensure_newlines(2)
786
+ else:
787
+ if node.children:
788
+ for child in node.children:
789
+ _to_markdown_walk(
790
+ child,
791
+ builder,
792
+ preserve_whitespace=False,
793
+ list_depth=list_depth,
794
+ in_link=in_link,
795
+ )
567
796
  return
568
797
 
569
798
  # Lists.
570
799
  if tag in {"ul", "ol"}:
571
- builder.ensure_newlines(2 if builder._buf else 0)
572
- ordered = tag == "ol"
573
- idx = 1
574
- for child in node.children or []:
575
- if child.name.lower() != "li":
576
- continue
577
- if idx > 1:
578
- builder.newline(1)
579
- indent = " " * list_depth
580
- marker = f"{idx}. " if ordered else "- "
581
- builder.raw(indent)
582
- builder.raw(marker)
583
- # Render list item content inline-ish.
584
- for li_child in child.children or []:
585
- _to_markdown_walk(li_child, builder, preserve_whitespace=False, list_depth=list_depth + 1)
586
- idx += 1
587
- builder.ensure_newlines(2)
800
+ if not in_link:
801
+ builder.ensure_newlines(2 if builder._buf else 0)
802
+ ordered = tag == "ol"
803
+ idx = 1
804
+ for child in node.children or []:
805
+ if child.name.lower() != "li":
806
+ continue
807
+ if idx > 1:
808
+ builder.newline(1)
809
+ indent = " " * list_depth
810
+ marker = f"{idx}. " if ordered else "- "
811
+ builder.raw(indent)
812
+ builder.raw(marker)
813
+ # Render list item content inline-ish.
814
+ for li_child in child.children or []:
815
+ _to_markdown_walk(
816
+ li_child,
817
+ builder,
818
+ preserve_whitespace=False,
819
+ list_depth=list_depth + 1,
820
+ in_link=in_link,
821
+ )
822
+ idx += 1
823
+ builder.ensure_newlines(2)
824
+ else:
825
+ # Flatten list inside link
826
+ for child in node.children or []:
827
+ if child.name.lower() != "li":
828
+ continue
829
+ builder.raw(" ")
830
+ for li_child in child.children or []:
831
+ _to_markdown_walk(
832
+ li_child,
833
+ builder,
834
+ preserve_whitespace=False,
835
+ list_depth=list_depth + 1,
836
+ in_link=in_link,
837
+ )
588
838
  return
589
839
 
590
840
  # Emphasis/strong.
591
841
  if tag in {"em", "i"}:
592
842
  builder.raw("*")
593
843
  for child in node.children or []:
594
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
844
+ _to_markdown_walk(
845
+ child,
846
+ builder,
847
+ preserve_whitespace=False,
848
+ list_depth=list_depth,
849
+ in_link=in_link,
850
+ )
595
851
  builder.raw("*")
596
852
  return
597
853
 
598
854
  if tag in {"strong", "b"}:
599
855
  builder.raw("**")
600
856
  for child in node.children or []:
601
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
857
+ _to_markdown_walk(
858
+ child,
859
+ builder,
860
+ preserve_whitespace=False,
861
+ list_depth=list_depth,
862
+ in_link=in_link,
863
+ )
602
864
  builder.raw("**")
603
865
  return
604
866
 
@@ -608,13 +870,24 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
608
870
  if node.attrs and "href" in node.attrs and node.attrs["href"] is not None:
609
871
  href = str(node.attrs["href"])
610
872
 
611
- builder.raw("[")
873
+ # Capture inner text to strip whitespace.
874
+ inner_builder = _MarkdownBuilder()
612
875
  for child in node.children or []:
613
- _to_markdown_walk(child, builder, preserve_whitespace=False, list_depth=list_depth)
876
+ _to_markdown_walk(
877
+ child,
878
+ inner_builder,
879
+ preserve_whitespace=False,
880
+ list_depth=list_depth,
881
+ in_link=True,
882
+ )
883
+ link_text = inner_builder.finish()
884
+
885
+ builder.raw("[")
886
+ builder.raw(link_text)
614
887
  builder.raw("]")
615
888
  if href:
616
889
  builder.raw("(")
617
- builder.raw(href)
890
+ builder.raw(_markdown_link_destination(href))
618
891
  builder.raw(")")
619
892
  return
620
893
 
@@ -622,11 +895,26 @@ def _to_markdown_walk(node: Any, builder: _MarkdownBuilder, preserve_whitespace:
622
895
  next_preserve = preserve_whitespace or (tag in {"textarea", "script", "style"})
623
896
  if node.children:
624
897
  for child in node.children:
625
- _to_markdown_walk(child, builder, next_preserve, list_depth)
898
+ _to_markdown_walk(
899
+ child,
900
+ builder,
901
+ next_preserve,
902
+ list_depth,
903
+ in_link=in_link,
904
+ )
626
905
 
627
906
  if isinstance(node, ElementNode) and node.template_content:
628
- _to_markdown_walk(node.template_content, builder, next_preserve, list_depth)
907
+ _to_markdown_walk(
908
+ node.template_content,
909
+ builder,
910
+ next_preserve,
911
+ list_depth,
912
+ in_link=in_link,
913
+ )
629
914
 
630
915
  # Add spacing after block containers to keep output readable.
631
916
  if tag in _MARKDOWN_BLOCK_ELEMENTS:
632
- builder.ensure_newlines(2)
917
+ if not in_link:
918
+ builder.ensure_newlines(2)
919
+ else:
920
+ builder.text(" ", preserve_whitespace=False)