PyPI - devrev-Python-SDK - Versions diffs - 2.12.2__py3-none-any.whl → 2.13.0__py3-none-any.whl - Mend

devrev-Python-SDK 2.12.2py3-none-any.whl → 2.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

devrev/services/articles.py CHANGED Viewed

@@ -35,15 +35,16 @@ from devrev.models.artifacts import (
 )
 from devrev.models.base import SetTagWithValue
 from devrev.services.base import AsyncBaseService, BaseService
-from devrev.utils.content_converter import html_to_devrev_rt
-# Content format to file extension mapping
-_CONTENT_FORMAT_EXTENSIONS: dict[str, str] = {
-    "text/html": ".html",
-    "text/markdown": ".md",
-    "text/plain": ".txt",
-    "devrev/rt": "",  # DevRev rich text uses no extension (file name "Article")
-}
+from devrev.utils.content_converter import (
+    CONTENT_FORMAT_DEVREV_RT,
+    CONTENT_FORMAT_HTML,
+    CONTENT_FORMAT_MARKDOWN,
+    OutputFormat,
+    detect_content_format,
+    devrev_rt_to_html,
+    devrev_rt_to_markdown,
+    html_to_devrev_rt,
+)
 def _extract_content_artifact_id(resource: dict[str, object]) -> str | None:
@@ -101,6 +102,69 @@ def _extract_content_format(resource: dict[str, object]) -> str:
     return "text/plain"
+def _convert_content(
+    content: str,
+    source_format: str,
+    target_format: str,
+) -> tuple[str, str]:
+    """Convert article content between formats.
+    Args:
+        content: The raw content string.
+        source_format: The MIME type of *content* (e.g. ``"devrev/rt"``).
+        target_format: The desired output MIME type.
+    Returns:
+        A ``(converted_content, actual_format)`` tuple.  If no conversion
+        is necessary (source == target, or conversion is not possible)
+        the original content and format are returned.
+    Raises:
+        ValueError: If *target_format* is not a recognised format.
+    """
+    _VALID_FORMATS = {
+        CONTENT_FORMAT_MARKDOWN,
+        CONTENT_FORMAT_HTML,
+        CONTENT_FORMAT_DEVREV_RT,
+    }
+    if target_format not in _VALID_FORMATS:
+        raise ValueError(
+            f"Invalid output_format {target_format!r}. Accepted values: {sorted(_VALID_FORMATS)}"
+        )
+    if source_format == target_format:
+        return content, source_format
+    # Auto-detect source format when unknown / generic
+    if source_format in ("text/plain", ""):
+        source_format = detect_content_format(content)
+    if target_format == CONTENT_FORMAT_MARKDOWN:
+        if source_format == CONTENT_FORMAT_DEVREV_RT:
+            return devrev_rt_to_markdown(content), CONTENT_FORMAT_MARKDOWN
+        # HTML or unknown → convert to devrev/rt first, then to markdown
+        if source_format == CONTENT_FORMAT_HTML:
+            rt = html_to_devrev_rt(content)
+            return devrev_rt_to_markdown(rt), CONTENT_FORMAT_MARKDOWN
+        # Already markdown or plain text
+        return content, source_format
+    if target_format == CONTENT_FORMAT_HTML:
+        if source_format == CONTENT_FORMAT_DEVREV_RT:
+            return devrev_rt_to_html(content), CONTENT_FORMAT_HTML
+        if source_format == CONTENT_FORMAT_MARKDOWN:
+            rt = html_to_devrev_rt(content)
+            return devrev_rt_to_html(rt), CONTENT_FORMAT_HTML
+        return content, source_format
+    if target_format == CONTENT_FORMAT_DEVREV_RT:
+        if source_format != CONTENT_FORMAT_DEVREV_RT:
+            return html_to_devrev_rt(content), CONTENT_FORMAT_DEVREV_RT
+        return content, source_format
+    # Unknown target format – return unchanged
+    return content, source_format
 class ArticlesService(BaseService):
     """Service for managing DevRev Articles."""
@@ -313,17 +377,27 @@ class ArticlesService(BaseService):
             # Re-raise the original error
             raise DevRevError(f"Failed to create article with content: {e}") from e
-    def get_with_content(self, id: str) -> ArticleWithContent:
+    def get_with_content(
+        self,
+        id: str,
+        *,
+        output_format: OutputFormat | None = None,
+    ) -> ArticleWithContent:
         """Get an article with its content loaded.
         This is a high-level method that:
         1. Fetches article metadata
         2. Locates the content artifact
         3. Downloads artifact content
-        4. Returns combined model
+        4. Optionally converts to the requested output format
+        5. Returns combined model
         Args:
             id: Article ID
+            output_format: Desired output format for the content.  Accepted
+                values: ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
+                When ``None`` (the default) the raw stored content is returned
+                as-is.
         Returns:
             ArticleWithContent with metadata and content
@@ -336,6 +410,9 @@ class ArticlesService(BaseService):
             >>> article_with_content = client.articles.get_with_content("ART-123")
             >>> print(article_with_content.article.title)
             >>> print(article_with_content.content)
+            >>> # Get content as Markdown
+            >>> md = client.articles.get_with_content("ART-123", output_format="text/markdown")
+            >>> print(md.content)
         """
         if not self._parent_client:
             raise DevRevError(
@@ -364,12 +441,18 @@ class ArticlesService(BaseService):
             # Get content format from resource metadata (more reliable than artifact.get)
             content_format = _extract_content_format(article.resource)
+            # Step 4: Convert to requested output format if specified
+            if output_format is not None:
+                content, content_format = _convert_content(content, content_format, output_format)
             return ArticleWithContent(
                 article=article,
                 content=content,
                 content_format=content_format,
                 content_version=None,
             )
+        except DevRevError:
+            raise
         except Exception as e:
             raise DevRevError(f"Failed to download content for article {id}: {e}") from e
@@ -772,17 +855,27 @@ class AsyncArticlesService(AsyncBaseService):
             # Re-raise the original error
             raise DevRevError(f"Failed to create article with content: {e}") from e
-    async def get_with_content(self, id: str) -> ArticleWithContent:
+    async def get_with_content(
+        self,
+        id: str,
+        *,
+        output_format: OutputFormat | None = None,
+    ) -> ArticleWithContent:
         """Get an article with its content loaded (async).
         This is a high-level method that:
         1. Fetches article metadata
         2. Locates the content artifact
         3. Downloads artifact content
-        4. Returns combined model
+        4. Optionally converts to the requested output format
+        5. Returns combined model
         Args:
             id: Article ID
+            output_format: Desired output format for the content.  Accepted
+                values: ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
+                When ``None`` (the default) the raw stored content is returned
+                as-is.
         Returns:
             ArticleWithContent with metadata and content
@@ -818,12 +911,18 @@ class AsyncArticlesService(AsyncBaseService):
             # Get content format from resource metadata (more reliable than artifact.get)
             content_format = _extract_content_format(article.resource)
+            # Step 4: Convert to requested output format if specified
+            if output_format is not None:
+                content, content_format = _convert_content(content, content_format, output_format)
             return ArticleWithContent(
                 article=article,
                 content=content,
                 content_format=content_format,
                 content_version=None,
             )
+        except DevRevError:
+            raise
         except Exception as e:
             raise DevRevError(f"Failed to download content for article {id}: {e}") from e

devrev/utils/__init__.py CHANGED Viewed

@@ -3,14 +3,32 @@
 This module contains utility functions and classes used throughout the SDK.
 """
-from devrev.utils.content_converter import html_to_devrev_rt
+from devrev.utils.content_converter import (
+    CONTENT_FORMAT_DEVREV_RT,
+    CONTENT_FORMAT_HTML,
+    CONTENT_FORMAT_MARKDOWN,
+    CONTENT_FORMAT_PLAIN,
+    OutputFormat,
+    detect_content_format,
+    devrev_rt_to_html,
+    devrev_rt_to_markdown,
+    html_to_devrev_rt,
+)
 from devrev.utils.deprecation import deprecated
 from devrev.utils.logging import ColoredFormatter, configure_logging, get_logger
 __all__ = [
+    "CONTENT_FORMAT_DEVREV_RT",
+    "CONTENT_FORMAT_HTML",
+    "CONTENT_FORMAT_MARKDOWN",
+    "CONTENT_FORMAT_PLAIN",
+    "OutputFormat",
     "ColoredFormatter",
     "configure_logging",
     "deprecated",
+    "detect_content_format",
+    "devrev_rt_to_html",
+    "devrev_rt_to_markdown",
     "get_logger",
     "html_to_devrev_rt",
 ]

devrev/utils/content_converter.py CHANGED Viewed

@@ -1,27 +1,35 @@
-"""HTML / Markdown to DevRev Rich Text (ProseMirror JSON) converter.
+"""Content format detection and conversion for DevRev articles.
-Converts HTML or Markdown content to the ``devrev/rt`` format used by
-DevRev's UI for inline article rendering.  Without this conversion,
-content appears as an attachment rather than rendered inline.
+Converts between HTML, Markdown, plain text, and the ``devrev/rt``
+(ProseMirror JSON) format used by DevRev's UI for inline article rendering.
 The ``devrev/rt`` format is a ProseMirror / Tiptap JSON document
 structure wrapped in an ``{"article": ..., "artifactIds": []}`` envelope.
-Supported input formats
------------------------
+Supported formats
+-----------------
 * **HTML** – parsed with *BeautifulSoup 4* for robust DOM walking.
 * **Markdown** – first converted to HTML via the *markdown* library
-  (with ``tables``, ``fenced_code``, and ``codehilite`` extensions),
+  (with ``tables``, ``fenced_code``, and ``md_in_html`` extensions),
   then parsed identically.
 * **Plain text** – wrapped in a single ``<p>`` before conversion.
-* **Existing devrev/rt JSON** – detected and returned unchanged.
+* **devrev/rt JSON** – ProseMirror document envelope; detected and
+  returned unchanged when converting *to* devrev/rt.
+Public API
+----------
+* :func:`detect_content_format` – detect the format of a content string.
+* :func:`html_to_devrev_rt` – convert any supported format → devrev/rt.
+* :func:`devrev_rt_to_markdown` – convert devrev/rt → Markdown.
+* :func:`devrev_rt_to_html` – convert devrev/rt → HTML.
 """
 from __future__ import annotations
+import html as html_module
 import json
 import re
-from typing import Any
+from typing import Any, Literal
 from bs4 import BeautifulSoup, NavigableString, Tag  # type: ignore[attr-defined]
 from markdown import markdown as md_to_html  # type: ignore[import-untyped]
@@ -284,7 +292,15 @@ def _ensure_block_children(nodes: list[dict[str, Any]]) -> list[dict[str, Any]]:
 def _is_markdown(content: str) -> bool:
-    """Heuristic check: does *content* look like Markdown rather than HTML?"""
+    """Heuristic check: does *content* look like Markdown rather than HTML?
+    **Trade-offs:**  Content that starts with an HTML tag (e.g. ``<div>``,
+    ``<p>``) is classified as HTML even if it also contains Markdown syntax
+    inside the tags.  This means ``<p>**bold**</p>`` will be detected as
+    HTML, not Markdown.  This is intentional: mixed HTML-with-Markdown is
+    better handled by the HTML parser path, which preserves the outer
+    structure.  Pure Markdown documents rarely start with a raw HTML tag.
+    """
     # If it starts with an HTML tag it's almost certainly HTML.
     stripped = content.strip()
     if stripped.startswith("<") and not stripped.startswith("<!"):
@@ -356,3 +372,460 @@ def html_to_devrev_rt(content: str) -> str:
     doc: dict[str, Any] = {"type": "doc", "content": nodes}
     envelope: dict[str, Any] = {"article": doc, "artifactIds": []}
     return json.dumps(envelope)
+# ---------------------------------------------------------------------------
+# Content format detection
+# ---------------------------------------------------------------------------
+#: Canonical format identifiers returned by :func:`detect_content_format`.
+CONTENT_FORMAT_DEVREV_RT = "devrev/rt"
+CONTENT_FORMAT_MARKDOWN = "text/markdown"
+CONTENT_FORMAT_HTML = "text/html"
+CONTENT_FORMAT_PLAIN = "text/plain"
+#: Type alias for the output formats accepted by conversion functions.
+OutputFormat = Literal["text/markdown", "text/html", "devrev/rt"]
+def detect_content_format(content: str) -> str:
+    """Detect the format of an article content string.
+    The detection logic is:
+    1. If *content* is valid JSON with an ``"article"`` key → ``"devrev/rt"``
+    2. If *content* matches common Markdown patterns → ``"text/markdown"``
+    3. If *content* contains HTML tags → ``"text/html"``
+    4. Otherwise → ``"text/plain"``
+    Args:
+        content: The raw content string to inspect.
+    Returns:
+        One of ``"devrev/rt"``, ``"text/markdown"``, ``"text/html"``,
+        or ``"text/plain"``.
+    Example:
+        >>> detect_content_format("# Hello\\n\\nWorld")
+        'text/markdown'
+        >>> detect_content_format("<p>Hello</p>")
+        'text/html'
+        >>> detect_content_format('{"article": {"type": "doc"}}')
+        'devrev/rt'
+        >>> detect_content_format("Just plain text")
+        'text/plain'
+    """
+    stripped = content.strip()
+    # 1. devrev/rt JSON envelope
+    if stripped.startswith("{"):
+        try:
+            parsed = json.loads(stripped)
+            if "article" in parsed:
+                return CONTENT_FORMAT_DEVREV_RT
+        except (json.JSONDecodeError, KeyError):
+            pass
+    # 2. Markdown heuristics
+    if _is_markdown(content):
+        return CONTENT_FORMAT_MARKDOWN
+    # 3. HTML (contains tags)
+    if re.search(r"<[a-zA-Z][^>]*>", stripped):
+        return CONTENT_FORMAT_HTML
+    # 4. Fallback
+    return CONTENT_FORMAT_PLAIN
+# ---------------------------------------------------------------------------
+# devrev/rt → Markdown converter
+# ---------------------------------------------------------------------------
+# Mapping of ProseMirror heading levels to ATX prefix
+_HEADING_PREFIX: dict[int, str] = {1: "#", 2: "##", 3: "###", 4: "####", 5: "#####", 6: "######"}
+def _pm_nodes_to_markdown(nodes: list[dict[str, Any]], *, indent: str = "") -> str:
+    """Recursively convert a list of ProseMirror nodes to Markdown."""
+    parts: list[str] = []
+    for node in nodes:
+        ntype = node.get("type", "")
+        content: list[dict[str, Any]] = node.get("content", [])
+        attrs: dict[str, Any] = node.get("attrs") or {}
+        if ntype == "paragraph":
+            parts.append(indent + _pm_inline_to_markdown(content))
+            parts.append("")
+        elif ntype == "heading":
+            level = attrs.get("level", 1)
+            prefix = _HEADING_PREFIX.get(level, "#")
+            parts.append(f"{prefix} {_pm_inline_to_markdown(content)}")
+            parts.append("")
+        elif ntype == "codeBlock":
+            lang = attrs.get("language") or ""
+            code_text = _pm_inline_to_markdown(content)
+            parts.append(f"```{lang}")
+            parts.append(code_text)
+            parts.append("```")
+            parts.append("")
+        elif ntype == "blockquote":
+            inner = _pm_nodes_to_markdown(content, indent="> ")
+            # Prefix blank lines with "> " to preserve multi-paragraph blockquotes
+            fixed_lines: list[str] = []
+            for line in inner.split("\n"):
+                if line == "":
+                    fixed_lines.append(">")
+                else:
+                    fixed_lines.append(line)
+            parts.append("\n".join(fixed_lines))
+        elif ntype == "bulletList":
+            for item in content:
+                if item.get("type") == "listItem":
+                    item_md = _pm_nodes_to_markdown(item.get("content", []))
+                    lines = item_md.strip().split("\n")
+                    if lines:
+                        parts.append(f"- {lines[0]}")
+                        for line in lines[1:]:
+                            parts.append(f"  {line}" if line else "")
+            parts.append("")
+        elif ntype == "orderedList":
+            start = (node.get("attrs") or {}).get("start", 1) or 1
+            for idx, item in enumerate(content):
+                if item.get("type") == "listItem":
+                    item_md = _pm_nodes_to_markdown(item.get("content", []))
+                    lines = item_md.strip().split("\n")
+                    if lines:
+                        parts.append(f"{start + idx}. {lines[0]}")
+                        for line in lines[1:]:
+                            parts.append(f"   {line}" if line else "")
+            parts.append("")
+        elif ntype == "horizontalRule":
+            parts.append("---")
+            parts.append("")
+        elif ntype == "table":
+            parts.append(_pm_table_to_markdown(content))
+            parts.append("")
+        elif ntype == "image":
+            src = attrs.get("src", "")
+            alt = attrs.get("alt", "")
+            parts.append(f"![{alt}]({src})")
+            parts.append("")
+        elif ntype == "text":
+            # Top-level text shouldn't happen but handle gracefully
+            parts.append(_pm_text_node_to_markdown(node))
+        else:
+            # Unknown node – recurse into children
+            if content:
+                parts.append(_pm_nodes_to_markdown(content, indent=indent))
+    return "\n".join(parts)
+def _pm_inline_to_markdown(nodes: list[dict[str, Any]]) -> str:
+    """Convert a list of ProseMirror inline nodes to a single Markdown line."""
+    parts: list[str] = []
+    for node in nodes:
+        ntype = node.get("type", "")
+        if ntype == "text":
+            parts.append(_pm_text_node_to_markdown(node))
+        elif ntype == "hardBreak":
+            parts.append("  \n")
+        elif ntype == "image":
+            attrs = node.get("attrs") or {}
+            src = attrs.get("src", "")
+            alt = attrs.get("alt", "")
+            parts.append(f"![{alt}]({src})")
+        else:
+            # Recurse for unknown inline types
+            content = node.get("content", [])
+            if content:
+                parts.append(_pm_inline_to_markdown(content))
+    return "".join(parts)
+def _pm_text_node_to_markdown(node: dict[str, Any]) -> str:
+    """Convert a ProseMirror text node (with optional marks) to Markdown."""
+    text: str = str(node.get("text", ""))
+    marks: list[dict[str, Any]] = node.get("marks", [])
+    for mark in marks:
+        mtype = mark.get("type", "")
+        if mtype == "bold":
+            text = f"**{text}**"
+        elif mtype == "italic":
+            text = f"*{text}*"
+        elif mtype == "code":
+            text = f"`{text}`"
+        elif mtype == "strike":
+            text = f"~~{text}~~"
+        elif mtype == "link":
+            href = (mark.get("attrs") or {}).get("href", "")
+            text = f"[{text}]({href})"
+        # underline, subscript, superscript have no standard Markdown equiv
+        # – leave text unchanged for those
+    return text
+def _pm_table_to_markdown(rows: list[dict[str, Any]]) -> str:
+    """Convert ProseMirror table rows to a Markdown table."""
+    md_rows: list[list[str]] = []
+    has_header = False
+    for pm_row in rows:
+        if pm_row.get("type") != "tableRow":
+            continue
+        cells: list[str] = []
+        for cell in pm_row.get("content", []):
+            ctype = cell.get("type", "")
+            if ctype in ("tableHeader", "tableCell"):
+                if ctype == "tableHeader":
+                    has_header = True
+                cell_content = cell.get("content", [])
+                cell_text = _pm_nodes_to_markdown(cell_content).strip()
+                # Collapse newlines inside a cell for table rendering
+                cell_text = cell_text.replace("\n", " ")
+                cells.append(cell_text)
+        md_rows.append(cells)
+    if not md_rows:
+        return ""
+    # Determine column count
+    col_count = max(len(r) for r in md_rows) if md_rows else 0
+    # Pad rows to equal length
+    for md_row in md_rows:
+        while len(md_row) < col_count:
+            md_row.append("")
+    lines: list[str] = []
+    for i, md_row in enumerate(md_rows):
+        lines.append("| " + " | ".join(md_row) + " |")
+        if i == 0 and has_header:
+            lines.append("| " + " | ".join("---" for _ in md_row) + " |")
+    # If no explicit header, add separator after first row anyway
+    if not has_header and md_rows:
+        lines.insert(1, "| " + " | ".join("---" for _ in md_rows[0]) + " |")
+    return "\n".join(lines)
+def devrev_rt_to_markdown(content: str) -> str:
+    """Convert DevRev Rich Text (ProseMirror JSON) to Markdown.
+    Accepts either the full ``{"article": ..., "artifactIds": [...]}``
+    envelope or just the inner ``{"type": "doc", "content": [...]}``
+    document node.
+    If *content* is not valid devrev/rt JSON, it is returned unchanged
+    (it might already be Markdown or plain text).
+    Args:
+        content: JSON string in devrev/rt format, or arbitrary text.
+    Returns:
+        Markdown string.
+    Example:
+        >>> rt = '{"article": {"type": "doc", "content": [{"type": "heading", "attrs": {"level": 1}, "content": [{"type": "text", "text": "Hello"}]}]}}'
+        >>> devrev_rt_to_markdown(rt)
+        '# Hello\\n'
+    """
+    stripped = content.strip()
+    if not stripped.startswith("{"):
+        return content
+    try:
+        parsed = json.loads(stripped)
+    except json.JSONDecodeError:
+        return content
+    # Unwrap envelope
+    doc = parsed.get("article", parsed)
+    if not isinstance(doc, dict) or doc.get("type") != "doc":
+        return content
+    nodes = doc.get("content", [])
+    md = _pm_nodes_to_markdown(nodes)
+    # Clean up excessive blank lines
+    md = re.sub(r"\n{3,}", "\n\n", md)
+    return md.strip() + "\n" if md.strip() else ""
+# ---------------------------------------------------------------------------
+# devrev/rt → HTML converter
+# ---------------------------------------------------------------------------
+def _pm_nodes_to_html(nodes: list[dict[str, Any]]) -> str:
+    """Recursively convert ProseMirror nodes to HTML."""
+    parts: list[str] = []
+    for node in nodes:
+        ntype = node.get("type", "")
+        content: list[dict[str, Any]] = node.get("content", [])
+        attrs: dict[str, Any] = node.get("attrs") or {}
+        if ntype == "paragraph":
+            parts.append(f"<p>{_pm_inline_to_html(content)}</p>")
+        elif ntype == "heading":
+            level = attrs.get("level", 1)
+            parts.append(f"<h{level}>{_pm_inline_to_html(content)}</h{level}>")
+        elif ntype == "codeBlock":
+            lang = attrs.get("language") or ""
+            code_text = _pm_inline_to_html(content)
+            if lang:
+                parts.append(f'<pre><code class="language-{lang}">{code_text}</code></pre>')
+            else:
+                parts.append(f"<pre><code>{code_text}</code></pre>")
+        elif ntype == "blockquote":
+            inner = _pm_nodes_to_html(content)
+            parts.append(f"<blockquote>{inner}</blockquote>")
+        elif ntype == "bulletList":
+            items = _pm_nodes_to_html(content)
+            parts.append(f"<ul>{items}</ul>")
+        elif ntype == "orderedList":
+            start = attrs.get("start", 1)
+            start_attr = f' start="{start}"' if start and start != 1 else ""
+            items = _pm_nodes_to_html(content)
+            parts.append(f"<ol{start_attr}>{items}</ol>")
+        elif ntype == "listItem":
+            inner = _pm_nodes_to_html(content)
+            parts.append(f"<li>{inner}</li>")
+        elif ntype == "horizontalRule":
+            parts.append("<hr>")
+        elif ntype == "table":
+            inner = _pm_nodes_to_html(content)
+            parts.append(f"<table>{inner}</table>")
+        elif ntype == "tableRow":
+            inner = _pm_nodes_to_html(content)
+            parts.append(f"<tr>{inner}</tr>")
+        elif ntype in ("tableCell", "tableHeader"):
+            tag = "th" if ntype == "tableHeader" else "td"
+            inner = _pm_nodes_to_html(content)
+            parts.append(f"<{tag}>{inner}</{tag}>")
+        elif ntype == "image":
+            src = html_module.escape(attrs.get("src", ""), quote=True)
+            alt = html_module.escape(attrs.get("alt", ""), quote=True)
+            parts.append(f'<img src="{src}" alt="{alt}">')
+        elif ntype == "text":
+            parts.append(_pm_text_node_to_html(node))
+        else:
+            if content:
+                parts.append(_pm_nodes_to_html(content))
+    return "".join(parts)
+def _pm_inline_to_html(nodes: list[dict[str, Any]]) -> str:
+    """Convert ProseMirror inline nodes to an HTML fragment."""
+    parts: list[str] = []
+    for node in nodes:
+        ntype = node.get("type", "")
+        if ntype == "text":
+            parts.append(_pm_text_node_to_html(node))
+        elif ntype == "hardBreak":
+            parts.append("<br>")
+        elif ntype == "image":
+            attrs = node.get("attrs") or {}
+            src = html_module.escape(attrs.get("src", ""), quote=True)
+            alt = html_module.escape(attrs.get("alt", ""), quote=True)
+            parts.append(f'<img src="{src}" alt="{alt}">')
+        else:
+            content = node.get("content", [])
+            if content:
+                parts.append(_pm_inline_to_html(content))
+    return "".join(parts)
+def _pm_text_node_to_html(node: dict[str, Any]) -> str:
+    """Convert a ProseMirror text node (with marks) to HTML.
+    Text content and attribute values are escaped to prevent XSS and
+    malformed HTML output.
+    """
+    text: str = html_module.escape(str(node.get("text", "")))
+    marks: list[dict[str, Any]] = node.get("marks", [])
+    for mark in marks:
+        mtype = mark.get("type", "")
+        if mtype == "bold":
+            text = f"<strong>{text}</strong>"
+        elif mtype == "italic":
+            text = f"<em>{text}</em>"
+        elif mtype == "code":
+            text = f"<code>{text}</code>"
+        elif mtype == "strike":
+            text = f"<s>{text}</s>"
+        elif mtype == "underline":
+            text = f"<u>{text}</u>"
+        elif mtype == "link":
+            href = html_module.escape((mark.get("attrs") or {}).get("href", ""), quote=True)
+            target = html_module.escape(
+                (mark.get("attrs") or {}).get("target", "_blank"), quote=True
+            )
+            text = f'<a href="{href}" target="{target}" rel="noopener noreferrer">{text}</a>'
+        elif mtype == "subscript":
+            text = f"<sub>{text}</sub>"
+        elif mtype == "superscript":
+            text = f"<sup>{text}</sup>"
+    return text
+def devrev_rt_to_html(content: str) -> str:
+    """Convert DevRev Rich Text (ProseMirror JSON) to HTML.
+    Accepts either the full ``{"article": ..., "artifactIds": [...]}``
+    envelope or just the inner ``{"type": "doc", "content": [...]}``
+    document node.
+    If *content* is not valid devrev/rt JSON, it is returned unchanged.
+    Args:
+        content: JSON string in devrev/rt format, or arbitrary text.
+    Returns:
+        HTML string.
+    Example:
+        >>> rt = '{"article": {"type": "doc", "content": [{"type": "paragraph", "attrs": {}, "content": [{"type": "text", "text": "Hello"}]}]}}'
+        >>> devrev_rt_to_html(rt)
+        '<p>Hello</p>'
+    """
+    stripped = content.strip()
+    if not stripped.startswith("{"):
+        return content
+    try:
+        parsed = json.loads(stripped)
+    except json.JSONDecodeError:
+        return content
+    doc = parsed.get("article", parsed)
+    if not isinstance(doc, dict) or doc.get("type") != "doc":
+        return content
+    nodes = doc.get("content", [])
+    return _pm_nodes_to_html(nodes)

devrev_mcp/tools/articles.py CHANGED Viewed

@@ -20,6 +20,7 @@ from devrev.models.articles import (
     SetSharedWithMembership,
 )
 from devrev.models.base import SetTagWithValue
+from devrev.utils.content_converter import OutputFormat
 from devrev_mcp.server import _config, mcp
 from devrev_mcp.utils.errors import format_devrev_error
 from devrev_mcp.utils.formatting import serialize_model, serialize_models
@@ -65,7 +66,10 @@ async def devrev_articles_list(
 @mcp.tool()
 async def devrev_articles_get(
-    ctx: Context[Any, Any, Any], id: str, include_content: bool = False
+    ctx: Context[Any, Any, Any],
+    id: str,
+    include_content: bool = False,
+    output_format: OutputFormat | None = None,
 ) -> dict[str, Any]:
     """Get a specific article by ID.
@@ -73,6 +77,10 @@ async def devrev_articles_get(
         ctx: MCP context containing the DevRev client.
         id: The article ID.
         include_content: If True, fetch and include article body content.
+        output_format: When include_content is True, convert the content to
+            this format before returning.  Accepted values:
+            ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
+            If omitted the raw stored content is returned as-is.
     Returns:
         Dictionary containing the article details. When include_content=True,
@@ -84,7 +92,9 @@ async def devrev_articles_get(
     app = ctx.request_context.lifespan_context
     try:
         if include_content:
-            article_with_content = await app.get_client().articles.get_with_content(id)
+            article_with_content = await app.get_client().articles.get_with_content(
+                id, output_format=output_format
+            )
             return serialize_model(article_with_content)
         else:
             request = ArticlesGetRequest(id=id)

{devrev_python_sdk-2.12.2.dist-info → devrev_python_sdk-2.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: devrev-Python-SDK
-Version: 2.12.2
+Version: 2.13.0
 Summary: A modern, type-safe Python SDK for the DevRev API
 Project-URL: Homepage, https://github.com/mgmonteleone/py-dev-rev
 Project-URL: Documentation, https://github.com/mgmonteleone/py-dev-rev

{devrev_python_sdk-2.12.2.dist-info → devrev_python_sdk-2.13.0.dist-info}/RECORD RENAMED Viewed

@@ -36,7 +36,7 @@ devrev/models/widgets.py,sha256=7WWN17_ySqnu1pjYSIS5B8J5dEaqkoB8X8GWzvI0ZYc,4577
 devrev/models/works.py,sha256=0PypJjDA5T3d-mmuvo5tl6_EUoz22YREmTWdFHx5jCc,10924
 devrev/services/__init__.py,sha256=eH_sj5qTjuNhCayawlZKZRmhKhv2mSSnbi_Qh3DnMaI,3838
 devrev/services/accounts.py,sha256=X7FgcODex0XKLiV_VvXKDl2Jm8XsNpn9qp40oRjZqME,9704
-devrev/services/articles.py,sha256=cLuBxe4CJi_KCA8Eb105bcsksYmX9ltMb8LxT_wIzXw,37873
+devrev/services/articles.py,sha256=xOpJOG9f29a5W3IFROplL1a9eJdGucbUFkK-MAIIYlc,41637
 devrev/services/artifacts.py,sha256=SJzIi5M4np0ENoOTTGEAcoqoRFMVd8pe-BCo9vvhYzk,14124
 devrev/services/base.py,sha256=g55dfhd2-uAhgXuguiBacSP4yOAa03Q7hzKDG2vbZ8U,7100
 devrev/services/brands.py,sha256=W6FB9XXTtEzGOfm9IBI6dqY8WXrMeH6urwjiZKrDVgI,5679
@@ -61,8 +61,8 @@ devrev/services/track_events.py,sha256=lI4wXkWu3uUuXtuRg1MGNkTZ7B0Lc1PjM8Kw-6sUn
 devrev/services/uoms.py,sha256=AA3ymoHj24FIbsZpYC4tg2elSdQ3iINTVOz7MraZcj8,8163
 devrev/services/webhooks.py,sha256=-TSkcaya1y48WB24_vHd-bqO5xSqxRsCLiilncNzQZU,3917
 devrev/services/works.py,sha256=b-HWpPMLnda-1P6iB-_QzHu_MoB7z4g_OQGh26rqhtQ,11252
-devrev/utils/__init__.py,sha256=zN601yi01d6RlFoSgqGjos5CxhwCga0tm3stJEelFak,422
-devrev/utils/content_converter.py,sha256=sGEGHDaFV3hm2bV59QsPNHLQNn_PzwZSaN6pgbuVxEY,11366
+devrev/utils/__init__.py,sha256=NOrbpkjDVLH8n9xf-xpZJiIIa_GVI_6vqTm3E8L3Udw,857
+devrev/utils/content_converter.py,sha256=emRBLiVoOfDGpPDzrMRnqQr4-QkqN13OdWlYOyU_LCg,28141
 devrev/utils/deprecation.py,sha256=7qB2Dx531oP7mNi7q2txOYsOKC9YwdHqlKPMFHOW9Ws,1275
 devrev/utils/http.py,sha256=mvjZyPTKRO_M60EC_8b7LP2WD52a-e1sH9WFVFVbK54,34248
 devrev/utils/logging.py,sha256=0Kp0jW03_IX1wOhTyQk-BfRZkvOIU6H8hnDW9xjIdF4,8037
@@ -92,7 +92,7 @@ devrev_mcp/resources/ticket.py,sha256=D-S8Unsae8iV5dvPmCYQT4xcOEgX9E7Kcc8lg6Pxe8
 devrev_mcp/resources/user.py,sha256=0Paq2w_nbj_dCQ8R0S81zlgjUhDAUzvn1_NmshadqM8,1553
 devrev_mcp/tools/__init__.py,sha256=wiou4HHy6HeOQY0El3KYqy_S7c2IC4hjsYHjMm7aH-w,54
 devrev_mcp/tools/accounts.py,sha256=AzRkJkKaYLYRaGL71KsoMEHUPBJ4IXgUs121tMjae-k,5777
-devrev_mcp/tools/articles.py,sha256=l-vpG-dgpMU9bNzpk7kBEJDm9YtobtlotV081xTREFc,13384
+devrev_mcp/tools/articles.py,sha256=Mnr-O6ZCkTaGOXOEEP5sSFQhHezXZLJSguc7SjhTaeI,13823
 devrev_mcp/tools/conversations.py,sha256=koslrPsYf558NR_sw-FNrmBgjtqDZ23iHJ1nTgC4B0w,5615
 devrev_mcp/tools/engagements.py,sha256=0net7c_MogSJ9khOcI98c54env_5f6t6eKobovVAc8Q,8670
 devrev_mcp/tools/groups.py,sha256=HoqlSScIOeWE4Qr1UwIUrHLuJ5Hi5yrnnKTCyMxOBhQ,8289
@@ -112,7 +112,7 @@ devrev_mcp/utils/__init__.py,sha256=2_5b1KC5kjoUqFY1ZSdB2Tefd2ekjbZ-eHyFWBKI-0A,
 devrev_mcp/utils/errors.py,sha256=5mRAo76rJvvEVi6b1ZokPxDtX5JKkptaqmiYDLCkwBE,2110
 devrev_mcp/utils/formatting.py,sha256=6JssG5x1BxjdgSiQ8Ou3H-9Wo3wgWTWmejsrGez4wKc,2431
 devrev_mcp/utils/pagination.py,sha256=EOUgL-ZdSToM1Q-ydXmjhibsef5K1u1g3CaS9K8I2fY,1286
-devrev_python_sdk-2.12.2.dist-info/METADATA,sha256=LzYvwb7DvAFGOubOY8mJRE5Sgm4pz048BHj4cTCy24c,40907
-devrev_python_sdk-2.12.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-devrev_python_sdk-2.12.2.dist-info/entry_points.txt,sha256=XiV4J_yy0yzVZVxg7T66YERVIlqdPNp3O-NHTHkllqQ,63
-devrev_python_sdk-2.12.2.dist-info/RECORD,,
+devrev_python_sdk-2.13.0.dist-info/METADATA,sha256=UQ-fDydUuXv4Vco7j80b6_n8_37vG3cCJZR_LjbtP88,40907
+devrev_python_sdk-2.13.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+devrev_python_sdk-2.13.0.dist-info/entry_points.txt,sha256=XiV4J_yy0yzVZVxg7T66YERVIlqdPNp3O-NHTHkllqQ,63
+devrev_python_sdk-2.13.0.dist-info/RECORD,,

{devrev_python_sdk-2.12.2.dist-info → devrev_python_sdk-2.13.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{devrev_python_sdk-2.12.2.dist-info → devrev_python_sdk-2.13.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

devrev-Python-SDK 2.12.2__py3-none-any.whl → 2.13.0__py3-none-any.whl

devrev-Python-SDK 2.12.2py3-none-any.whl → 2.13.0py3-none-any.whl