PyPI - justhtml - Versions diffs - 0.6.0__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

justhtml 0.6.0py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

justhtml/__init__.py +28 -0
justhtml/__main__.py +161 -13
justhtml/constants.py +17 -1
justhtml/context.py +7 -1
justhtml/encoding.py +405 -0
justhtml/entities.py +57 -17
justhtml/errors.py +20 -4
justhtml/linkify.py +438 -0
justhtml/node.py +738 -41
justhtml/parser.py +188 -21
justhtml/py.typed +0 -0
justhtml/sanitize.py +1141 -0
justhtml/selector.py +240 -104
justhtml/serialize.py +418 -57
justhtml/stream.py +34 -10
justhtml/tokenizer.py +433 -289
justhtml/tokens.py +91 -23
justhtml/transforms.py +690 -0
justhtml/treebuilder.py +196 -111
justhtml/treebuilder_modes.py +191 -117
justhtml/treebuilder_utils.py +11 -4
justhtml-0.33.0.dist-info/METADATA +196 -0
justhtml-0.33.0.dist-info/RECORD +26 -0
justhtml-0.33.0.dist-info/entry_points.txt +2 -0
{justhtml-0.6.0.dist-info → justhtml-0.33.0.dist-info}/licenses/LICENSE +4 -1
justhtml-0.6.0.dist-info/METADATA +0 -126
justhtml-0.6.0.dist-info/RECORD +0 -20
{justhtml-0.6.0.dist-info → justhtml-0.33.0.dist-info}/WHEEL +0 -0

justhtml/node.py CHANGED Viewed

@@ -1,14 +1,228 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from urllib.parse import quote
+from .sanitize import _sanitize
 from .selector import query
 from .serialize import to_html
+if TYPE_CHECKING:
+    from .sanitize import SanitizationPolicy
+    from .tokens import Doctype
+def _markdown_escape_text(s: str) -> str:
+    if not s:
+        return ""
+    # Pragmatic: escape the few characters that commonly change Markdown meaning.
+    # Keep this minimal to preserve readability.
+    out: list[str] = []
+    for ch in s:
+        if ch in "\\`*_[]":
+            out.append("\\")
+        out.append(ch)
+    return "".join(out)
+def _markdown_code_span(s: str | None) -> str:
+    if s is None:
+        s = ""
+    # Use a backtick fence longer than any run of backticks inside.
+    longest = 0
+    run = 0
+    for ch in s:
+        if ch == "`":
+            run += 1
+            if run > longest:
+                longest = run
+        else:
+            run = 0
+    fence = "`" * (longest + 1)
+    # CommonMark requires a space if the content starts/ends with backticks.
+    needs_space = s.startswith("`") or s.endswith("`")
+    if needs_space:
+        return f"{fence} {s} {fence}"
+    return f"{fence}{s}{fence}"
-class SimpleDomNode:
-    __slots__ = ("attrs", "children", "data", "name", "namespace", "parent")
+def _markdown_link_destination(url: str) -> str:
+    """Return a Markdown-safe link destination.
+    We primarily care about avoiding Markdown formatting injection and broken
+    parsing for URLs that contain whitespace or parentheses.
+    CommonMark supports destinations wrapped in angle brackets:
+    `[text](<https://example.com/a(b)c>)`
+    """
+    u = (url or "").strip()
+    if not u:
+        return ""
+    # If the destination contains characters that can terminate or confuse
+    # the Markdown destination parser, wrap in <...> and percent-encode
+    # whitespace and angle brackets.
+    if any(ch in u for ch in (" ", "\t", "\n", "\r", "(", ")", "<", ">")):
+        u = quote(u, safe=":/?#[]@!$&'*+,;=%-._~()")
+        return f"<{u}>"
+    return u
+class _MarkdownBuilder:
+    __slots__ = ("_buf", "_newline_count", "_pending_space")
+    _buf: list[str]
+    _newline_count: int
+    _pending_space: bool
+    def __init__(self) -> None:
+        self._buf = []
+        self._newline_count = 0
+        self._pending_space = False
+    def _rstrip_last_segment(self) -> None:
+        if not self._buf:
+            return
+        last = self._buf[-1]
+        stripped = last.rstrip(" \t")
+        if stripped != last:
+            self._buf[-1] = stripped
+    def newline(self, count: int = 1) -> None:
+        for _ in range(count):
+            self._pending_space = False
+            self._rstrip_last_segment()
+            self._buf.append("\n")
+            # Track newlines to make it easy to insert blank lines.
+            if self._newline_count < 2:
+                self._newline_count += 1
+    def ensure_newlines(self, count: int) -> None:
+        while self._newline_count < count:
+            self.newline(1)
+    def raw(self, s: str) -> None:
+        if not s:
+            return
+        # If we've collapsed whitespace and the next output is raw (e.g. "**"),
+        # we still need to emit a single separating space.
+        if self._pending_space:
+            first = s[0]
+            if first not in " \t\n\r\f" and self._buf and self._newline_count == 0:
+                self._buf.append(" ")
+            self._pending_space = False
+        self._buf.append(s)
+        if "\n" in s:
+            # Count trailing newlines (cap at 2 for blank-line semantics).
+            trailing = 0
+            i = len(s) - 1
+            while i >= 0 and s[i] == "\n":
+                trailing += 1
+                i -= 1
+            self._newline_count = min(2, trailing)
+            if trailing:
+                self._pending_space = False
+        else:
+            self._newline_count = 0
+    def text(self, s: str, preserve_whitespace: bool = False) -> None:
+        if not s:
+            return
-    def __init__(self, name, attrs=None, data=None, namespace=None):
+        if preserve_whitespace:
+            self.raw(s)
+            return
+        for ch in s:
+            if ch in " \t\n\r\f":
+                self._pending_space = True
+                continue
+            if self._pending_space:
+                if self._buf and self._newline_count == 0:
+                    self._buf.append(" ")
+                self._pending_space = False
+            self._buf.append(ch)
+            self._newline_count = 0
+    def finish(self) -> str:
+        out = "".join(self._buf)
+        return out.strip(" \t\n")
+# Type alias for any node type
+NodeType = "SimpleDomNode | ElementNode | TemplateNode | TextNode"
+def _to_text_collect(node: Any, parts: list[str], strip: bool) -> None:
+    # Iterative traversal avoids recursion overhead on large documents.
+    stack: list[Any] = [node]
+    while stack:
+        current = stack.pop()
+        name: str = current.name
+        if name == "#text":
+            data: str | None = current.data
+            if not data:
+                continue
+            if strip:
+                data = data.strip()
+                if not data:
+                    continue
+            parts.append(data)
+            continue
+        # Preserve the same traversal order as the recursive implementation:
+        # children first, then template content.
+        if type(current) is TemplateNode and current.template_content:
+            stack.append(current.template_content)
+        children = current.children
+        if children:
+            stack.extend(reversed(children))
+class SimpleDomNode:
+    __slots__ = (
+        "_origin_col",
+        "_origin_line",
+        "_origin_pos",
+        "attrs",
+        "children",
+        "data",
+        "name",
+        "namespace",
+        "parent",
+    )
+    name: str
+    parent: SimpleDomNode | ElementNode | TemplateNode | None
+    attrs: dict[str, str | None] | None
+    children: list[Any] | None
+    data: str | Doctype | None
+    namespace: str | None
+    _origin_pos: int | None
+    _origin_line: int | None
+    _origin_col: int | None
+    def __init__(
+        self,
+        name: str,
+        attrs: dict[str, str | None] | None = None,
+        data: str | Doctype | None = None,
+        namespace: str | None = None,
+    ) -> None:
         self.name = name
         self.parent = None
         self.data = data
+        self._origin_pos = None
+        self._origin_line = None
+        self._origin_col = None
         if name.startswith("#") or name == "!doctype":
             self.namespace = namespace
@@ -23,19 +237,48 @@ class SimpleDomNode:
             self.children = []
             self.attrs = attrs if attrs is not None else {}
-    def append_child(self, node):
-        self.children.append(node)
-        node.parent = self
+    def append_child(self, node: Any) -> None:
+        if self.children is not None:
+            self.children.append(node)
+            node.parent = self
+    @property
+    def origin_offset(self) -> int | None:
+        """Best-effort origin offset (0-indexed) in the source HTML, if known."""
+        return self._origin_pos
-    def remove_child(self, node):
-        self.children.remove(node)
-        node.parent = None
+    @property
+    def origin_line(self) -> int | None:
+        return self._origin_line
+    @property
+    def origin_col(self) -> int | None:
+        return self._origin_col
-    def to_html(self, indent=0, indent_size=2, pretty=True):
+    @property
+    def origin_location(self) -> tuple[int, int] | None:
+        if self._origin_line is None or self._origin_col is None:
+            return None
+        return (self._origin_line, self._origin_col)
+    def remove_child(self, node: Any) -> None:
+        if self.children is not None:
+            self.children.remove(node)
+            node.parent = None
+    def to_html(
+        self,
+        indent: int = 0,
+        indent_size: int = 2,
+        pretty: bool = True,
+        *,
+        safe: bool = True,
+        policy: SanitizationPolicy | None = None,
+    ) -> str:
         """Convert node to HTML string."""
-        return to_html(self, indent, indent_size, pretty=pretty)
+        return to_html(self, indent, indent_size, pretty=pretty, safe=safe, policy=policy)
-    def query(self, selector):
+    def query(self, selector: str) -> list[Any]:
         """
         Query this subtree using a CSS selector.
@@ -48,18 +291,65 @@ class SimpleDomNode:
         Raises:
             ValueError: If the selector is invalid
         """
-        return query(self, selector)
+        result: list[Any] = query(self, selector)
+        return result
     @property
-    def text(self):
-        """Return the text content of this node and its descendants."""
+    def text(self) -> str:
+        """Return the node's own text value.
+        For text nodes this is the node data. For other nodes this is an empty
+        string. Use `to_text()` to get textContent semantics.
+        """
         if self.name == "#text":
-            return self.data or ""
-        if not self.children:
+            data = self.data
+            if isinstance(data, str):
+                return data
             return ""
-        return "".join(child.text for child in self.children)
+        return ""
+    def to_text(
+        self,
+        separator: str = " ",
+        strip: bool = True,
+        *,
+        safe: bool = True,
+        policy: SanitizationPolicy | None = None,
+    ) -> str:
+        """Return the concatenated text of this node's descendants.
+        - `separator` controls how text nodes are joined (default: a single space).
+        - `strip=True` strips each text node and drops empty segments.
+        - `safe=True` sanitizes untrusted HTML before extracting text.
+        - `policy` overrides the default sanitization policy.
+        Template element contents are included via `template_content`.
+        """
+        node: Any = _sanitize(self, policy=policy) if safe else self
+        parts: list[str] = []
+        _to_text_collect(node, parts, strip=strip)
+        if not parts:
+            return ""
+        return separator.join(parts)
+    def to_markdown(self, *, safe: bool = True, policy: SanitizationPolicy | None = None) -> str:
+        """Return a GitHub Flavored Markdown representation of this subtree.
-    def insert_before(self, node, reference_node):
+        This is a pragmatic HTML->Markdown converter intended for readability.
+        - Tables and images are preserved as raw HTML.
+        - Unknown elements fall back to rendering their children.
+        """
+        if safe:
+            node = _sanitize(self, policy=policy)
+            builder = _MarkdownBuilder()
+            _to_markdown_walk(node, builder, preserve_whitespace=False, list_depth=0)
+            return builder.finish()
+        builder = _MarkdownBuilder()
+        _to_markdown_walk(self, builder, preserve_whitespace=False, list_depth=0)
+        return builder.finish()
+    def insert_before(self, node: Any, reference_node: Any | None) -> None:
         """
         Insert a node before a reference node.
@@ -84,7 +374,7 @@ class SimpleDomNode:
         except ValueError:
             raise ValueError("Reference node is not a child of this node") from None
-    def replace_child(self, new_node, old_node):
+    def replace_child(self, new_node: Any, old_node: Any) -> Any:
         """
         Replace a child node with a new node.
@@ -111,26 +401,31 @@ class SimpleDomNode:
         old_node.parent = None
         return old_node
-    def has_child_nodes(self):
+    def has_child_nodes(self) -> bool:
         """Return True if this node has children."""
         return bool(self.children)
-    def clone_node(self, deep=False):
+    def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> SimpleDomNode:
         """
         Clone this node.
         Args:
             deep: If True, recursively clone children.
+            override_attrs: Optional dictionary to use as attributes for the clone.
         Returns:
             A new node that is a copy of this node.
         """
+        attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else None)
         clone = SimpleDomNode(
             self.name,
-            self.attrs.copy() if self.attrs else None,
+            attrs,
             self.data,
             self.namespace,
         )
+        clone._origin_pos = self._origin_pos
+        clone._origin_line = self._origin_line
+        clone._origin_col = self._origin_col
         if deep and self.children:
             for child in self.children:
                 clone.append_child(child.clone_node(deep=True))
@@ -138,18 +433,30 @@ class SimpleDomNode:
 class ElementNode(SimpleDomNode):
-    __slots__ = ()
+    __slots__ = ("template_content",)
-    def __init__(self, name, attrs, namespace):
+    template_content: SimpleDomNode | None
+    children: list[Any]
+    attrs: dict[str, str | None]
+    def __init__(self, name: str, attrs: dict[str, str | None] | None, namespace: str | None) -> None:
         self.name = name
         self.parent = None
         self.data = None
         self.namespace = namespace
         self.children = []
-        self.attrs = attrs
-    def clone_node(self, deep=False):
-        clone = ElementNode(self.name, self.attrs.copy() if self.attrs else {}, self.namespace)
+        self.attrs = attrs if attrs is not None else {}
+        self.template_content = None
+        self._origin_pos = None
+        self._origin_line = None
+        self._origin_col = None
+    def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> ElementNode:
+        attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else {})
+        clone = ElementNode(self.name, attrs, self.namespace)
+        clone._origin_pos = self._origin_pos
+        clone._origin_line = self._origin_line
+        clone._origin_col = self._origin_col
         if deep:
             for child in self.children:
                 clone.append_child(child.clone_node(deep=True))
@@ -157,22 +464,32 @@ class ElementNode(SimpleDomNode):
 class TemplateNode(ElementNode):
-    __slots__ = ("template_content",)
+    __slots__ = ()
-    def __init__(self, name, attrs=None, data=None, namespace=None):
+    def __init__(
+        self,
+        name: str,
+        attrs: dict[str, str | None] | None = None,
+        data: str | None = None,
+        namespace: str | None = None,
+    ) -> None:
         super().__init__(name, attrs, namespace)
         if self.namespace == "html":
             self.template_content = SimpleDomNode("#document-fragment")
         else:
             self.template_content = None
-    def clone_node(self, deep=False):
+    def clone_node(self, deep: bool = False, override_attrs: dict[str, str | None] | None = None) -> TemplateNode:
+        attrs = override_attrs if override_attrs is not None else (self.attrs.copy() if self.attrs else {})
         clone = TemplateNode(
             self.name,
-            self.attrs.copy() if self.attrs else {},
-            self.data,
+            attrs,
+            None,
             self.namespace,
         )
+        clone._origin_pos = self._origin_pos
+        clone._origin_line = self._origin_line
+        clone._origin_col = self._origin_col
         if deep:
             if self.template_content:
                 clone.template_content = self.template_content.clone_node(deep=True)
@@ -182,27 +499,407 @@ class TemplateNode(ElementNode):
 class TextNode:
-    __slots__ = ("data", "name", "namespace", "parent")
+    __slots__ = ("_origin_col", "_origin_line", "_origin_pos", "data", "name", "namespace", "parent")
-    def __init__(self, data):
+    data: str | None
+    name: str
+    namespace: None
+    parent: SimpleDomNode | ElementNode | TemplateNode | None
+    _origin_pos: int | None
+    _origin_line: int | None
+    _origin_col: int | None
+    def __init__(self, data: str | None) -> None:
         self.data = data
         self.parent = None
         self.name = "#text"
         self.namespace = None
+        self._origin_pos = None
+        self._origin_line = None
+        self._origin_col = None
+    @property
+    def origin_offset(self) -> int | None:
+        """Best-effort origin offset (0-indexed) in the source HTML, if known."""
+        return self._origin_pos
+    @property
+    def origin_line(self) -> int | None:
+        return self._origin_line
     @property
-    def text(self):
+    def origin_col(self) -> int | None:
+        return self._origin_col
+    @property
+    def origin_location(self) -> tuple[int, int] | None:
+        if self._origin_line is None or self._origin_col is None:
+            return None
+        return (self._origin_line, self._origin_col)
+    @property
+    def text(self) -> str:
         """Return the text content of this node."""
         return self.data or ""
+    def to_text(
+        self,
+        separator: str = " ",
+        strip: bool = True,
+        *,
+        safe: bool = True,
+        policy: SanitizationPolicy | None = None,
+    ) -> str:
+        # Parameters are accepted for API consistency; they don't affect leaf nodes.
+        _ = separator
+        _ = safe
+        _ = policy
+        if self.data is None:
+            return ""
+        if strip:
+            return self.data.strip()
+        return self.data
+    def to_markdown(self) -> str:
+        builder = _MarkdownBuilder()
+        builder.text(_markdown_escape_text(self.data or ""), preserve_whitespace=False)
+        return builder.finish()
     @property
-    def children(self):
+    def children(self) -> list[Any]:
         """Return empty list for TextNode (leaf node)."""
         return []
-    def has_child_nodes(self):
+    def has_child_nodes(self) -> bool:
         """Return False for TextNode."""
         return False
-    def clone_node(self, deep=False):
-        return TextNode(self.data)
+    def clone_node(self, deep: bool = False) -> TextNode:
+        clone = TextNode(self.data)
+        clone._origin_pos = self._origin_pos
+        clone._origin_line = self._origin_line
+        clone._origin_col = self._origin_col
+        return clone
+_MARKDOWN_BLOCK_ELEMENTS: frozenset[str] = frozenset(
+    {
+        "p",
+        "div",
+        "section",
+        "article",
+        "header",
+        "footer",
+        "main",
+        "nav",
+        "aside",
+        "blockquote",
+        "pre",
+        "ul",
+        "ol",
+        "li",
+        "hr",
+        "h1",
+        "h2",
+        "h3",
+        "h4",
+        "h5",
+        "h6",
+        "table",
+    }
+)
+def _to_markdown_walk(
+    node: Any,
+    builder: _MarkdownBuilder,
+    preserve_whitespace: bool,
+    list_depth: int,
+    in_link: bool = False,
+) -> None:
+    name: str = node.name
+    if name == "#text":
+        if preserve_whitespace:
+            builder.raw(node.data or "")
+        else:
+            builder.text(_markdown_escape_text(node.data or ""), preserve_whitespace=False)
+        return
+    if name == "br":
+        if in_link:
+            builder.text(" ", preserve_whitespace=False)
+        else:
+            builder.newline(1)
+        return
+    # Comments/doctype don't contribute.
+    if name == "#comment" or name == "!doctype":
+        return
+    # Document containers contribute via descendants.
+    if name.startswith("#"):
+        if node.children:
+            for child in node.children:
+                _to_markdown_walk(
+                    child,
+                    builder,
+                    preserve_whitespace,
+                    list_depth,
+                    in_link=in_link,
+                )
+        return
+    tag = name.lower()
+    # Metadata containers don't contribute to body text.
+    if tag == "head" or tag == "title":
+        return
+    # Preserve <img> and <table> as HTML.
+    if tag == "img":
+        builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
+        return
+    if tag == "table":
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+        builder.raw(node.to_html(indent=0, indent_size=2, pretty=False))
+        if not in_link:
+            builder.ensure_newlines(2)
+        return
+    # Headings.
+    if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+            level = int(tag[1])
+            builder.raw("#" * level)
+            builder.raw(" ")
+        if node.children:
+            for child in node.children:
+                _to_markdown_walk(
+                    child,
+                    builder,
+                    preserve_whitespace=False,
+                    list_depth=list_depth,
+                    in_link=in_link,
+                )
+        if not in_link:
+            builder.ensure_newlines(2)
+        return
+    # Horizontal rule.
+    if tag == "hr":
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+            builder.raw("---")
+            builder.ensure_newlines(2)
+        return
+    # Code blocks.
+    if tag == "pre":
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+            code = node.to_text(separator="", strip=False)
+            builder.raw("```")
+            builder.newline(1)
+            if code:
+                builder.raw(code.rstrip("\n"))
+                builder.newline(1)
+            builder.raw("```")
+            builder.ensure_newlines(2)
+        else:
+            # Inside link, render as inline code or text
+            code = node.to_text(separator="", strip=False)
+            builder.raw(_markdown_code_span(code))
+        return
+    # Inline code.
+    if tag == "code" and not preserve_whitespace:
+        code = node.to_text(separator="", strip=False)
+        builder.raw(_markdown_code_span(code))
+        return
+    # Paragraph-like blocks.
+    if tag == "p":
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+        if node.children:
+            for child in node.children:
+                _to_markdown_walk(
+                    child,
+                    builder,
+                    preserve_whitespace=False,
+                    list_depth=list_depth,
+                    in_link=in_link,
+                )
+        if not in_link:
+            builder.ensure_newlines(2)
+        else:
+            builder.text(" ", preserve_whitespace=False)
+        return
+    # Blockquotes.
+    if tag == "blockquote":
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+            inner = _MarkdownBuilder()
+            if node.children:
+                for child in node.children:
+                    _to_markdown_walk(
+                        child,
+                        inner,
+                        preserve_whitespace=False,
+                        list_depth=list_depth,
+                        in_link=in_link,
+                    )
+            text = inner.finish()
+            if text:
+                lines = text.split("\n")
+                for i, line in enumerate(lines):
+                    if i:
+                        builder.newline(1)
+                    builder.raw("> ")
+                    builder.raw(line)
+            builder.ensure_newlines(2)
+        else:
+            if node.children:
+                for child in node.children:
+                    _to_markdown_walk(
+                        child,
+                        builder,
+                        preserve_whitespace=False,
+                        list_depth=list_depth,
+                        in_link=in_link,
+                    )
+        return
+    # Lists.
+    if tag in {"ul", "ol"}:
+        if not in_link:
+            builder.ensure_newlines(2 if builder._buf else 0)
+            ordered = tag == "ol"
+            idx = 1
+            for child in node.children or []:
+                if child.name.lower() != "li":
+                    continue
+                if idx > 1:
+                    builder.newline(1)
+                indent = "  " * list_depth
+                marker = f"{idx}. " if ordered else "- "
+                builder.raw(indent)
+                builder.raw(marker)
+                # Render list item content inline-ish.
+                for li_child in child.children or []:
+                    _to_markdown_walk(
+                        li_child,
+                        builder,
+                        preserve_whitespace=False,
+                        list_depth=list_depth + 1,
+                        in_link=in_link,
+                    )
+                idx += 1
+            builder.ensure_newlines(2)
+        else:
+            # Flatten list inside link
+            for child in node.children or []:
+                if child.name.lower() != "li":
+                    continue
+                builder.raw(" ")
+                for li_child in child.children or []:
+                    _to_markdown_walk(
+                        li_child,
+                        builder,
+                        preserve_whitespace=False,
+                        list_depth=list_depth + 1,
+                        in_link=in_link,
+                    )
+        return
+    # Emphasis/strong.
+    if tag in {"em", "i"}:
+        builder.raw("*")
+        for child in node.children or []:
+            _to_markdown_walk(
+                child,
+                builder,
+                preserve_whitespace=False,
+                list_depth=list_depth,
+                in_link=in_link,
+            )
+        builder.raw("*")
+        return
+    if tag in {"strong", "b"}:
+        builder.raw("**")
+        for child in node.children or []:
+            _to_markdown_walk(
+                child,
+                builder,
+                preserve_whitespace=False,
+                list_depth=list_depth,
+                in_link=in_link,
+            )
+        builder.raw("**")
+        return
+    # Links.
+    if tag == "a":
+        href = ""
+        if node.attrs and "href" in node.attrs and node.attrs["href"] is not None:
+            href = str(node.attrs["href"])
+        # Capture inner text to strip whitespace.
+        inner_builder = _MarkdownBuilder()
+        for child in node.children or []:
+            _to_markdown_walk(
+                child,
+                inner_builder,
+                preserve_whitespace=False,
+                list_depth=list_depth,
+                in_link=True,
+            )
+        link_text = inner_builder.finish()
+        builder.raw("[")
+        builder.raw(link_text)
+        builder.raw("]")
+        if href:
+            builder.raw("(")
+            builder.raw(_markdown_link_destination(href))
+            builder.raw(")")
+        return
+    # Containers / unknown tags: recurse into children.
+    next_preserve = preserve_whitespace or (tag in {"textarea", "script", "style"})
+    if node.children:
+        for child in node.children:
+            _to_markdown_walk(
+                child,
+                builder,
+                next_preserve,
+                list_depth,
+                in_link=in_link,
+            )
+    if isinstance(node, ElementNode) and node.template_content:
+        _to_markdown_walk(
+            node.template_content,
+            builder,
+            next_preserve,
+            list_depth,
+            in_link=in_link,
+        )
+    # Add spacing after block containers to keep output readable.
+    if tag in _MARKDOWN_BLOCK_ELEMENTS:
+        if not in_link:
+            builder.ensure_newlines(2)
+        else:
+            builder.text(" ", preserve_whitespace=False)

justhtml 0.6.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

justhtml 0.6.0py3-none-any.whl → 0.33.0py3-none-any.whl