devrev-Python-SDK 2.12.2__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,15 +35,16 @@ from devrev.models.artifacts import (
35
35
  )
36
36
  from devrev.models.base import SetTagWithValue
37
37
  from devrev.services.base import AsyncBaseService, BaseService
38
- from devrev.utils.content_converter import html_to_devrev_rt
39
-
40
- # Content format to file extension mapping
41
- _CONTENT_FORMAT_EXTENSIONS: dict[str, str] = {
42
- "text/html": ".html",
43
- "text/markdown": ".md",
44
- "text/plain": ".txt",
45
- "devrev/rt": "", # DevRev rich text uses no extension (file name "Article")
46
- }
38
+ from devrev.utils.content_converter import (
39
+ CONTENT_FORMAT_DEVREV_RT,
40
+ CONTENT_FORMAT_HTML,
41
+ CONTENT_FORMAT_MARKDOWN,
42
+ OutputFormat,
43
+ detect_content_format,
44
+ devrev_rt_to_html,
45
+ devrev_rt_to_markdown,
46
+ html_to_devrev_rt,
47
+ )
47
48
 
48
49
 
49
50
  def _extract_content_artifact_id(resource: dict[str, object]) -> str | None:
@@ -101,6 +102,69 @@ def _extract_content_format(resource: dict[str, object]) -> str:
101
102
  return "text/plain"
102
103
 
103
104
 
105
+ def _convert_content(
106
+ content: str,
107
+ source_format: str,
108
+ target_format: str,
109
+ ) -> tuple[str, str]:
110
+ """Convert article content between formats.
111
+
112
+ Args:
113
+ content: The raw content string.
114
+ source_format: The MIME type of *content* (e.g. ``"devrev/rt"``).
115
+ target_format: The desired output MIME type.
116
+
117
+ Returns:
118
+ A ``(converted_content, actual_format)`` tuple. If no conversion
119
+ is necessary (source == target, or conversion is not possible)
120
+ the original content and format are returned.
121
+ Raises:
122
+ ValueError: If *target_format* is not a recognised format.
123
+ """
124
+ _VALID_FORMATS = {
125
+ CONTENT_FORMAT_MARKDOWN,
126
+ CONTENT_FORMAT_HTML,
127
+ CONTENT_FORMAT_DEVREV_RT,
128
+ }
129
+ if target_format not in _VALID_FORMATS:
130
+ raise ValueError(
131
+ f"Invalid output_format {target_format!r}. Accepted values: {sorted(_VALID_FORMATS)}"
132
+ )
133
+
134
+ if source_format == target_format:
135
+ return content, source_format
136
+
137
+ # Auto-detect source format when unknown / generic
138
+ if source_format in ("text/plain", ""):
139
+ source_format = detect_content_format(content)
140
+
141
+ if target_format == CONTENT_FORMAT_MARKDOWN:
142
+ if source_format == CONTENT_FORMAT_DEVREV_RT:
143
+ return devrev_rt_to_markdown(content), CONTENT_FORMAT_MARKDOWN
144
+ # HTML or unknown → convert to devrev/rt first, then to markdown
145
+ if source_format == CONTENT_FORMAT_HTML:
146
+ rt = html_to_devrev_rt(content)
147
+ return devrev_rt_to_markdown(rt), CONTENT_FORMAT_MARKDOWN
148
+ # Already markdown or plain text
149
+ return content, source_format
150
+
151
+ if target_format == CONTENT_FORMAT_HTML:
152
+ if source_format == CONTENT_FORMAT_DEVREV_RT:
153
+ return devrev_rt_to_html(content), CONTENT_FORMAT_HTML
154
+ if source_format == CONTENT_FORMAT_MARKDOWN:
155
+ rt = html_to_devrev_rt(content)
156
+ return devrev_rt_to_html(rt), CONTENT_FORMAT_HTML
157
+ return content, source_format
158
+
159
+ if target_format == CONTENT_FORMAT_DEVREV_RT:
160
+ if source_format != CONTENT_FORMAT_DEVREV_RT:
161
+ return html_to_devrev_rt(content), CONTENT_FORMAT_DEVREV_RT
162
+ return content, source_format
163
+
164
+ # Unknown target format – return unchanged
165
+ return content, source_format
166
+
167
+
104
168
  class ArticlesService(BaseService):
105
169
  """Service for managing DevRev Articles."""
106
170
 
@@ -313,17 +377,27 @@ class ArticlesService(BaseService):
313
377
  # Re-raise the original error
314
378
  raise DevRevError(f"Failed to create article with content: {e}") from e
315
379
 
316
- def get_with_content(self, id: str) -> ArticleWithContent:
380
+ def get_with_content(
381
+ self,
382
+ id: str,
383
+ *,
384
+ output_format: OutputFormat | None = None,
385
+ ) -> ArticleWithContent:
317
386
  """Get an article with its content loaded.
318
387
 
319
388
  This is a high-level method that:
320
389
  1. Fetches article metadata
321
390
  2. Locates the content artifact
322
391
  3. Downloads artifact content
323
- 4. Returns combined model
392
+ 4. Optionally converts to the requested output format
393
+ 5. Returns combined model
324
394
 
325
395
  Args:
326
396
  id: Article ID
397
+ output_format: Desired output format for the content. Accepted
398
+ values: ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
399
+ When ``None`` (the default) the raw stored content is returned
400
+ as-is.
327
401
 
328
402
  Returns:
329
403
  ArticleWithContent with metadata and content
@@ -336,6 +410,9 @@ class ArticlesService(BaseService):
336
410
  >>> article_with_content = client.articles.get_with_content("ART-123")
337
411
  >>> print(article_with_content.article.title)
338
412
  >>> print(article_with_content.content)
413
+ >>> # Get content as Markdown
414
+ >>> md = client.articles.get_with_content("ART-123", output_format="text/markdown")
415
+ >>> print(md.content)
339
416
  """
340
417
  if not self._parent_client:
341
418
  raise DevRevError(
@@ -364,12 +441,18 @@ class ArticlesService(BaseService):
364
441
  # Get content format from resource metadata (more reliable than artifact.get)
365
442
  content_format = _extract_content_format(article.resource)
366
443
 
444
+ # Step 4: Convert to requested output format if specified
445
+ if output_format is not None:
446
+ content, content_format = _convert_content(content, content_format, output_format)
447
+
367
448
  return ArticleWithContent(
368
449
  article=article,
369
450
  content=content,
370
451
  content_format=content_format,
371
452
  content_version=None,
372
453
  )
454
+ except DevRevError:
455
+ raise
373
456
  except Exception as e:
374
457
  raise DevRevError(f"Failed to download content for article {id}: {e}") from e
375
458
 
@@ -772,17 +855,27 @@ class AsyncArticlesService(AsyncBaseService):
772
855
  # Re-raise the original error
773
856
  raise DevRevError(f"Failed to create article with content: {e}") from e
774
857
 
775
- async def get_with_content(self, id: str) -> ArticleWithContent:
858
+ async def get_with_content(
859
+ self,
860
+ id: str,
861
+ *,
862
+ output_format: OutputFormat | None = None,
863
+ ) -> ArticleWithContent:
776
864
  """Get an article with its content loaded (async).
777
865
 
778
866
  This is a high-level method that:
779
867
  1. Fetches article metadata
780
868
  2. Locates the content artifact
781
869
  3. Downloads artifact content
782
- 4. Returns combined model
870
+ 4. Optionally converts to the requested output format
871
+ 5. Returns combined model
783
872
 
784
873
  Args:
785
874
  id: Article ID
875
+ output_format: Desired output format for the content. Accepted
876
+ values: ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
877
+ When ``None`` (the default) the raw stored content is returned
878
+ as-is.
786
879
 
787
880
  Returns:
788
881
  ArticleWithContent with metadata and content
@@ -818,12 +911,18 @@ class AsyncArticlesService(AsyncBaseService):
818
911
  # Get content format from resource metadata (more reliable than artifact.get)
819
912
  content_format = _extract_content_format(article.resource)
820
913
 
914
+ # Step 4: Convert to requested output format if specified
915
+ if output_format is not None:
916
+ content, content_format = _convert_content(content, content_format, output_format)
917
+
821
918
  return ArticleWithContent(
822
919
  article=article,
823
920
  content=content,
824
921
  content_format=content_format,
825
922
  content_version=None,
826
923
  )
924
+ except DevRevError:
925
+ raise
827
926
  except Exception as e:
828
927
  raise DevRevError(f"Failed to download content for article {id}: {e}") from e
829
928
 
devrev/utils/__init__.py CHANGED
@@ -3,14 +3,32 @@
3
3
  This module contains utility functions and classes used throughout the SDK.
4
4
  """
5
5
 
6
- from devrev.utils.content_converter import html_to_devrev_rt
6
+ from devrev.utils.content_converter import (
7
+ CONTENT_FORMAT_DEVREV_RT,
8
+ CONTENT_FORMAT_HTML,
9
+ CONTENT_FORMAT_MARKDOWN,
10
+ CONTENT_FORMAT_PLAIN,
11
+ OutputFormat,
12
+ detect_content_format,
13
+ devrev_rt_to_html,
14
+ devrev_rt_to_markdown,
15
+ html_to_devrev_rt,
16
+ )
7
17
  from devrev.utils.deprecation import deprecated
8
18
  from devrev.utils.logging import ColoredFormatter, configure_logging, get_logger
9
19
 
10
20
  __all__ = [
21
+ "CONTENT_FORMAT_DEVREV_RT",
22
+ "CONTENT_FORMAT_HTML",
23
+ "CONTENT_FORMAT_MARKDOWN",
24
+ "CONTENT_FORMAT_PLAIN",
25
+ "OutputFormat",
11
26
  "ColoredFormatter",
12
27
  "configure_logging",
13
28
  "deprecated",
29
+ "detect_content_format",
30
+ "devrev_rt_to_html",
31
+ "devrev_rt_to_markdown",
14
32
  "get_logger",
15
33
  "html_to_devrev_rt",
16
34
  ]
@@ -1,27 +1,35 @@
1
- """HTML / Markdown to DevRev Rich Text (ProseMirror JSON) converter.
1
+ """Content format detection and conversion for DevRev articles.
2
2
 
3
- Converts HTML or Markdown content to the ``devrev/rt`` format used by
4
- DevRev's UI for inline article rendering. Without this conversion,
5
- content appears as an attachment rather than rendered inline.
3
+ Converts between HTML, Markdown, plain text, and the ``devrev/rt``
4
+ (ProseMirror JSON) format used by DevRev's UI for inline article rendering.
6
5
 
7
6
  The ``devrev/rt`` format is a ProseMirror / Tiptap JSON document
8
7
  structure wrapped in an ``{"article": ..., "artifactIds": []}`` envelope.
9
8
 
10
- Supported input formats
11
- -----------------------
9
+ Supported formats
10
+ -----------------
12
11
  * **HTML** – parsed with *BeautifulSoup 4* for robust DOM walking.
13
12
  * **Markdown** – first converted to HTML via the *markdown* library
14
- (with ``tables``, ``fenced_code``, and ``codehilite`` extensions),
13
+ (with ``tables``, ``fenced_code``, and ``md_in_html`` extensions),
15
14
  then parsed identically.
16
15
  * **Plain text** – wrapped in a single ``<p>`` before conversion.
17
- * **Existing devrev/rt JSON** – detected and returned unchanged.
16
+ * **devrev/rt JSON** – ProseMirror document envelope; detected and
17
+ returned unchanged when converting *to* devrev/rt.
18
+
19
+ Public API
20
+ ----------
21
+ * :func:`detect_content_format` – detect the format of a content string.
22
+ * :func:`html_to_devrev_rt` – convert any supported format → devrev/rt.
23
+ * :func:`devrev_rt_to_markdown` – convert devrev/rt → Markdown.
24
+ * :func:`devrev_rt_to_html` – convert devrev/rt → HTML.
18
25
  """
19
26
 
20
27
  from __future__ import annotations
21
28
 
29
+ import html as html_module
22
30
  import json
23
31
  import re
24
- from typing import Any
32
+ from typing import Any, Literal
25
33
 
26
34
  from bs4 import BeautifulSoup, NavigableString, Tag # type: ignore[attr-defined]
27
35
  from markdown import markdown as md_to_html # type: ignore[import-untyped]
@@ -284,7 +292,15 @@ def _ensure_block_children(nodes: list[dict[str, Any]]) -> list[dict[str, Any]]:
284
292
 
285
293
 
286
294
  def _is_markdown(content: str) -> bool:
287
- """Heuristic check: does *content* look like Markdown rather than HTML?"""
295
+ """Heuristic check: does *content* look like Markdown rather than HTML?
296
+
297
+ **Trade-offs:** Content that starts with an HTML tag (e.g. ``<div>``,
298
+ ``<p>``) is classified as HTML even if it also contains Markdown syntax
299
+ inside the tags. This means ``<p>**bold**</p>`` will be detected as
300
+ HTML, not Markdown. This is intentional: mixed HTML-with-Markdown is
301
+ better handled by the HTML parser path, which preserves the outer
302
+ structure. Pure Markdown documents rarely start with a raw HTML tag.
303
+ """
288
304
  # If it starts with an HTML tag it's almost certainly HTML.
289
305
  stripped = content.strip()
290
306
  if stripped.startswith("<") and not stripped.startswith("<!"):
@@ -356,3 +372,460 @@ def html_to_devrev_rt(content: str) -> str:
356
372
  doc: dict[str, Any] = {"type": "doc", "content": nodes}
357
373
  envelope: dict[str, Any] = {"article": doc, "artifactIds": []}
358
374
  return json.dumps(envelope)
375
+
376
+
377
+ # ---------------------------------------------------------------------------
378
+ # Content format detection
379
+ # ---------------------------------------------------------------------------
380
+
381
+ #: Canonical format identifiers returned by :func:`detect_content_format`.
382
+ CONTENT_FORMAT_DEVREV_RT = "devrev/rt"
383
+ CONTENT_FORMAT_MARKDOWN = "text/markdown"
384
+ CONTENT_FORMAT_HTML = "text/html"
385
+ CONTENT_FORMAT_PLAIN = "text/plain"
386
+
387
+ #: Type alias for the output formats accepted by conversion functions.
388
+ OutputFormat = Literal["text/markdown", "text/html", "devrev/rt"]
389
+
390
+
391
+ def detect_content_format(content: str) -> str:
392
+ """Detect the format of an article content string.
393
+
394
+ The detection logic is:
395
+
396
+ 1. If *content* is valid JSON with an ``"article"`` key → ``"devrev/rt"``
397
+ 2. If *content* matches common Markdown patterns → ``"text/markdown"``
398
+ 3. If *content* contains HTML tags → ``"text/html"``
399
+ 4. Otherwise → ``"text/plain"``
400
+
401
+ Args:
402
+ content: The raw content string to inspect.
403
+
404
+ Returns:
405
+ One of ``"devrev/rt"``, ``"text/markdown"``, ``"text/html"``,
406
+ or ``"text/plain"``.
407
+
408
+ Example:
409
+ >>> detect_content_format("# Hello\\n\\nWorld")
410
+ 'text/markdown'
411
+ >>> detect_content_format("<p>Hello</p>")
412
+ 'text/html'
413
+ >>> detect_content_format('{"article": {"type": "doc"}}')
414
+ 'devrev/rt'
415
+ >>> detect_content_format("Just plain text")
416
+ 'text/plain'
417
+ """
418
+ stripped = content.strip()
419
+
420
+ # 1. devrev/rt JSON envelope
421
+ if stripped.startswith("{"):
422
+ try:
423
+ parsed = json.loads(stripped)
424
+ if "article" in parsed:
425
+ return CONTENT_FORMAT_DEVREV_RT
426
+ except (json.JSONDecodeError, KeyError):
427
+ pass
428
+
429
+ # 2. Markdown heuristics
430
+ if _is_markdown(content):
431
+ return CONTENT_FORMAT_MARKDOWN
432
+
433
+ # 3. HTML (contains tags)
434
+ if re.search(r"<[a-zA-Z][^>]*>", stripped):
435
+ return CONTENT_FORMAT_HTML
436
+
437
+ # 4. Fallback
438
+ return CONTENT_FORMAT_PLAIN
439
+
440
+
441
+ # ---------------------------------------------------------------------------
442
+ # devrev/rt → Markdown converter
443
+ # ---------------------------------------------------------------------------
444
+
445
+ # Mapping of ProseMirror heading levels to ATX prefix
446
+ _HEADING_PREFIX: dict[int, str] = {1: "#", 2: "##", 3: "###", 4: "####", 5: "#####", 6: "######"}
447
+
448
+
449
+ def _pm_nodes_to_markdown(nodes: list[dict[str, Any]], *, indent: str = "") -> str:
450
+ """Recursively convert a list of ProseMirror nodes to Markdown."""
451
+ parts: list[str] = []
452
+ for node in nodes:
453
+ ntype = node.get("type", "")
454
+ content: list[dict[str, Any]] = node.get("content", [])
455
+ attrs: dict[str, Any] = node.get("attrs") or {}
456
+
457
+ if ntype == "paragraph":
458
+ parts.append(indent + _pm_inline_to_markdown(content))
459
+ parts.append("")
460
+
461
+ elif ntype == "heading":
462
+ level = attrs.get("level", 1)
463
+ prefix = _HEADING_PREFIX.get(level, "#")
464
+ parts.append(f"{prefix} {_pm_inline_to_markdown(content)}")
465
+ parts.append("")
466
+
467
+ elif ntype == "codeBlock":
468
+ lang = attrs.get("language") or ""
469
+ code_text = _pm_inline_to_markdown(content)
470
+ parts.append(f"```{lang}")
471
+ parts.append(code_text)
472
+ parts.append("```")
473
+ parts.append("")
474
+
475
+ elif ntype == "blockquote":
476
+ inner = _pm_nodes_to_markdown(content, indent="> ")
477
+ # Prefix blank lines with "> " to preserve multi-paragraph blockquotes
478
+ fixed_lines: list[str] = []
479
+ for line in inner.split("\n"):
480
+ if line == "":
481
+ fixed_lines.append(">")
482
+ else:
483
+ fixed_lines.append(line)
484
+ parts.append("\n".join(fixed_lines))
485
+
486
+ elif ntype == "bulletList":
487
+ for item in content:
488
+ if item.get("type") == "listItem":
489
+ item_md = _pm_nodes_to_markdown(item.get("content", []))
490
+ lines = item_md.strip().split("\n")
491
+ if lines:
492
+ parts.append(f"- {lines[0]}")
493
+ for line in lines[1:]:
494
+ parts.append(f" {line}" if line else "")
495
+ parts.append("")
496
+
497
+ elif ntype == "orderedList":
498
+ start = (node.get("attrs") or {}).get("start", 1) or 1
499
+ for idx, item in enumerate(content):
500
+ if item.get("type") == "listItem":
501
+ item_md = _pm_nodes_to_markdown(item.get("content", []))
502
+ lines = item_md.strip().split("\n")
503
+ if lines:
504
+ parts.append(f"{start + idx}. {lines[0]}")
505
+ for line in lines[1:]:
506
+ parts.append(f" {line}" if line else "")
507
+ parts.append("")
508
+
509
+ elif ntype == "horizontalRule":
510
+ parts.append("---")
511
+ parts.append("")
512
+
513
+ elif ntype == "table":
514
+ parts.append(_pm_table_to_markdown(content))
515
+ parts.append("")
516
+
517
+ elif ntype == "image":
518
+ src = attrs.get("src", "")
519
+ alt = attrs.get("alt", "")
520
+ parts.append(f"![{alt}]({src})")
521
+ parts.append("")
522
+
523
+ elif ntype == "text":
524
+ # Top-level text shouldn't happen but handle gracefully
525
+ parts.append(_pm_text_node_to_markdown(node))
526
+
527
+ else:
528
+ # Unknown node – recurse into children
529
+ if content:
530
+ parts.append(_pm_nodes_to_markdown(content, indent=indent))
531
+
532
+ return "\n".join(parts)
533
+
534
+
535
+ def _pm_inline_to_markdown(nodes: list[dict[str, Any]]) -> str:
536
+ """Convert a list of ProseMirror inline nodes to a single Markdown line."""
537
+ parts: list[str] = []
538
+ for node in nodes:
539
+ ntype = node.get("type", "")
540
+ if ntype == "text":
541
+ parts.append(_pm_text_node_to_markdown(node))
542
+ elif ntype == "hardBreak":
543
+ parts.append(" \n")
544
+ elif ntype == "image":
545
+ attrs = node.get("attrs") or {}
546
+ src = attrs.get("src", "")
547
+ alt = attrs.get("alt", "")
548
+ parts.append(f"![{alt}]({src})")
549
+ else:
550
+ # Recurse for unknown inline types
551
+ content = node.get("content", [])
552
+ if content:
553
+ parts.append(_pm_inline_to_markdown(content))
554
+ return "".join(parts)
555
+
556
+
557
+ def _pm_text_node_to_markdown(node: dict[str, Any]) -> str:
558
+ """Convert a ProseMirror text node (with optional marks) to Markdown."""
559
+ text: str = str(node.get("text", ""))
560
+ marks: list[dict[str, Any]] = node.get("marks", [])
561
+
562
+ for mark in marks:
563
+ mtype = mark.get("type", "")
564
+ if mtype == "bold":
565
+ text = f"**{text}**"
566
+ elif mtype == "italic":
567
+ text = f"*{text}*"
568
+ elif mtype == "code":
569
+ text = f"`{text}`"
570
+ elif mtype == "strike":
571
+ text = f"~~{text}~~"
572
+ elif mtype == "link":
573
+ href = (mark.get("attrs") or {}).get("href", "")
574
+ text = f"[{text}]({href})"
575
+ # underline, subscript, superscript have no standard Markdown equiv
576
+ # – leave text unchanged for those
577
+ return text
578
+
579
+
580
+ def _pm_table_to_markdown(rows: list[dict[str, Any]]) -> str:
581
+ """Convert ProseMirror table rows to a Markdown table."""
582
+ md_rows: list[list[str]] = []
583
+ has_header = False
584
+ for pm_row in rows:
585
+ if pm_row.get("type") != "tableRow":
586
+ continue
587
+ cells: list[str] = []
588
+ for cell in pm_row.get("content", []):
589
+ ctype = cell.get("type", "")
590
+ if ctype in ("tableHeader", "tableCell"):
591
+ if ctype == "tableHeader":
592
+ has_header = True
593
+ cell_content = cell.get("content", [])
594
+ cell_text = _pm_nodes_to_markdown(cell_content).strip()
595
+ # Collapse newlines inside a cell for table rendering
596
+ cell_text = cell_text.replace("\n", " ")
597
+ cells.append(cell_text)
598
+ md_rows.append(cells)
599
+
600
+ if not md_rows:
601
+ return ""
602
+
603
+ # Determine column count
604
+ col_count = max(len(r) for r in md_rows) if md_rows else 0
605
+ # Pad rows to equal length
606
+ for md_row in md_rows:
607
+ while len(md_row) < col_count:
608
+ md_row.append("")
609
+
610
+ lines: list[str] = []
611
+ for i, md_row in enumerate(md_rows):
612
+ lines.append("| " + " | ".join(md_row) + " |")
613
+ if i == 0 and has_header:
614
+ lines.append("| " + " | ".join("---" for _ in md_row) + " |")
615
+
616
+ # If no explicit header, add separator after first row anyway
617
+ if not has_header and md_rows:
618
+ lines.insert(1, "| " + " | ".join("---" for _ in md_rows[0]) + " |")
619
+
620
+ return "\n".join(lines)
621
+
622
+
623
+ def devrev_rt_to_markdown(content: str) -> str:
624
+ """Convert DevRev Rich Text (ProseMirror JSON) to Markdown.
625
+
626
+ Accepts either the full ``{"article": ..., "artifactIds": [...]}``
627
+ envelope or just the inner ``{"type": "doc", "content": [...]}``
628
+ document node.
629
+
630
+ If *content* is not valid devrev/rt JSON, it is returned unchanged
631
+ (it might already be Markdown or plain text).
632
+
633
+ Args:
634
+ content: JSON string in devrev/rt format, or arbitrary text.
635
+
636
+ Returns:
637
+ Markdown string.
638
+
639
+ Example:
640
+ >>> rt = '{"article": {"type": "doc", "content": [{"type": "heading", "attrs": {"level": 1}, "content": [{"type": "text", "text": "Hello"}]}]}}'
641
+ >>> devrev_rt_to_markdown(rt)
642
+ '# Hello\\n'
643
+ """
644
+ stripped = content.strip()
645
+ if not stripped.startswith("{"):
646
+ return content
647
+
648
+ try:
649
+ parsed = json.loads(stripped)
650
+ except json.JSONDecodeError:
651
+ return content
652
+
653
+ # Unwrap envelope
654
+ doc = parsed.get("article", parsed)
655
+ if not isinstance(doc, dict) or doc.get("type") != "doc":
656
+ return content
657
+
658
+ nodes = doc.get("content", [])
659
+ md = _pm_nodes_to_markdown(nodes)
660
+ # Clean up excessive blank lines
661
+ md = re.sub(r"\n{3,}", "\n\n", md)
662
+ return md.strip() + "\n" if md.strip() else ""
663
+
664
+
665
+ # ---------------------------------------------------------------------------
666
+ # devrev/rt → HTML converter
667
+ # ---------------------------------------------------------------------------
668
+
669
+
670
+ def _pm_nodes_to_html(nodes: list[dict[str, Any]]) -> str:
671
+ """Recursively convert ProseMirror nodes to HTML."""
672
+ parts: list[str] = []
673
+ for node in nodes:
674
+ ntype = node.get("type", "")
675
+ content: list[dict[str, Any]] = node.get("content", [])
676
+ attrs: dict[str, Any] = node.get("attrs") or {}
677
+
678
+ if ntype == "paragraph":
679
+ parts.append(f"<p>{_pm_inline_to_html(content)}</p>")
680
+
681
+ elif ntype == "heading":
682
+ level = attrs.get("level", 1)
683
+ parts.append(f"<h{level}>{_pm_inline_to_html(content)}</h{level}>")
684
+
685
+ elif ntype == "codeBlock":
686
+ lang = attrs.get("language") or ""
687
+ code_text = _pm_inline_to_html(content)
688
+ if lang:
689
+ parts.append(f'<pre><code class="language-{lang}">{code_text}</code></pre>')
690
+ else:
691
+ parts.append(f"<pre><code>{code_text}</code></pre>")
692
+
693
+ elif ntype == "blockquote":
694
+ inner = _pm_nodes_to_html(content)
695
+ parts.append(f"<blockquote>{inner}</blockquote>")
696
+
697
+ elif ntype == "bulletList":
698
+ items = _pm_nodes_to_html(content)
699
+ parts.append(f"<ul>{items}</ul>")
700
+
701
+ elif ntype == "orderedList":
702
+ start = attrs.get("start", 1)
703
+ start_attr = f' start="{start}"' if start and start != 1 else ""
704
+ items = _pm_nodes_to_html(content)
705
+ parts.append(f"<ol{start_attr}>{items}</ol>")
706
+
707
+ elif ntype == "listItem":
708
+ inner = _pm_nodes_to_html(content)
709
+ parts.append(f"<li>{inner}</li>")
710
+
711
+ elif ntype == "horizontalRule":
712
+ parts.append("<hr>")
713
+
714
+ elif ntype == "table":
715
+ inner = _pm_nodes_to_html(content)
716
+ parts.append(f"<table>{inner}</table>")
717
+
718
+ elif ntype == "tableRow":
719
+ inner = _pm_nodes_to_html(content)
720
+ parts.append(f"<tr>{inner}</tr>")
721
+
722
+ elif ntype in ("tableCell", "tableHeader"):
723
+ tag = "th" if ntype == "tableHeader" else "td"
724
+ inner = _pm_nodes_to_html(content)
725
+ parts.append(f"<{tag}>{inner}</{tag}>")
726
+
727
+ elif ntype == "image":
728
+ src = html_module.escape(attrs.get("src", ""), quote=True)
729
+ alt = html_module.escape(attrs.get("alt", ""), quote=True)
730
+ parts.append(f'<img src="{src}" alt="{alt}">')
731
+
732
+ elif ntype == "text":
733
+ parts.append(_pm_text_node_to_html(node))
734
+
735
+ else:
736
+ if content:
737
+ parts.append(_pm_nodes_to_html(content))
738
+
739
+ return "".join(parts)
740
+
741
+
742
+ def _pm_inline_to_html(nodes: list[dict[str, Any]]) -> str:
743
+ """Convert ProseMirror inline nodes to an HTML fragment."""
744
+ parts: list[str] = []
745
+ for node in nodes:
746
+ ntype = node.get("type", "")
747
+ if ntype == "text":
748
+ parts.append(_pm_text_node_to_html(node))
749
+ elif ntype == "hardBreak":
750
+ parts.append("<br>")
751
+ elif ntype == "image":
752
+ attrs = node.get("attrs") or {}
753
+ src = html_module.escape(attrs.get("src", ""), quote=True)
754
+ alt = html_module.escape(attrs.get("alt", ""), quote=True)
755
+ parts.append(f'<img src="{src}" alt="{alt}">')
756
+ else:
757
+ content = node.get("content", [])
758
+ if content:
759
+ parts.append(_pm_inline_to_html(content))
760
+ return "".join(parts)
761
+
762
+
763
+ def _pm_text_node_to_html(node: dict[str, Any]) -> str:
764
+ """Convert a ProseMirror text node (with marks) to HTML.
765
+
766
+ Text content and attribute values are escaped to prevent XSS and
767
+ malformed HTML output.
768
+ """
769
+ text: str = html_module.escape(str(node.get("text", "")))
770
+ marks: list[dict[str, Any]] = node.get("marks", [])
771
+
772
+ for mark in marks:
773
+ mtype = mark.get("type", "")
774
+ if mtype == "bold":
775
+ text = f"<strong>{text}</strong>"
776
+ elif mtype == "italic":
777
+ text = f"<em>{text}</em>"
778
+ elif mtype == "code":
779
+ text = f"<code>{text}</code>"
780
+ elif mtype == "strike":
781
+ text = f"<s>{text}</s>"
782
+ elif mtype == "underline":
783
+ text = f"<u>{text}</u>"
784
+ elif mtype == "link":
785
+ href = html_module.escape((mark.get("attrs") or {}).get("href", ""), quote=True)
786
+ target = html_module.escape(
787
+ (mark.get("attrs") or {}).get("target", "_blank"), quote=True
788
+ )
789
+ text = f'<a href="{href}" target="{target}" rel="noopener noreferrer">{text}</a>'
790
+ elif mtype == "subscript":
791
+ text = f"<sub>{text}</sub>"
792
+ elif mtype == "superscript":
793
+ text = f"<sup>{text}</sup>"
794
+ return text
795
+
796
+
797
+ def devrev_rt_to_html(content: str) -> str:
798
+ """Convert DevRev Rich Text (ProseMirror JSON) to HTML.
799
+
800
+ Accepts either the full ``{"article": ..., "artifactIds": [...]}``
801
+ envelope or just the inner ``{"type": "doc", "content": [...]}``
802
+ document node.
803
+
804
+ If *content* is not valid devrev/rt JSON, it is returned unchanged.
805
+
806
+ Args:
807
+ content: JSON string in devrev/rt format, or arbitrary text.
808
+
809
+ Returns:
810
+ HTML string.
811
+
812
+ Example:
813
+ >>> rt = '{"article": {"type": "doc", "content": [{"type": "paragraph", "attrs": {}, "content": [{"type": "text", "text": "Hello"}]}]}}'
814
+ >>> devrev_rt_to_html(rt)
815
+ '<p>Hello</p>'
816
+ """
817
+ stripped = content.strip()
818
+ if not stripped.startswith("{"):
819
+ return content
820
+
821
+ try:
822
+ parsed = json.loads(stripped)
823
+ except json.JSONDecodeError:
824
+ return content
825
+
826
+ doc = parsed.get("article", parsed)
827
+ if not isinstance(doc, dict) or doc.get("type") != "doc":
828
+ return content
829
+
830
+ nodes = doc.get("content", [])
831
+ return _pm_nodes_to_html(nodes)
@@ -20,6 +20,7 @@ from devrev.models.articles import (
20
20
  SetSharedWithMembership,
21
21
  )
22
22
  from devrev.models.base import SetTagWithValue
23
+ from devrev.utils.content_converter import OutputFormat
23
24
  from devrev_mcp.server import _config, mcp
24
25
  from devrev_mcp.utils.errors import format_devrev_error
25
26
  from devrev_mcp.utils.formatting import serialize_model, serialize_models
@@ -65,7 +66,10 @@ async def devrev_articles_list(
65
66
 
66
67
  @mcp.tool()
67
68
  async def devrev_articles_get(
68
- ctx: Context[Any, Any, Any], id: str, include_content: bool = False
69
+ ctx: Context[Any, Any, Any],
70
+ id: str,
71
+ include_content: bool = False,
72
+ output_format: OutputFormat | None = None,
69
73
  ) -> dict[str, Any]:
70
74
  """Get a specific article by ID.
71
75
 
@@ -73,6 +77,10 @@ async def devrev_articles_get(
73
77
  ctx: MCP context containing the DevRev client.
74
78
  id: The article ID.
75
79
  include_content: If True, fetch and include article body content.
80
+ output_format: When include_content is True, convert the content to
81
+ this format before returning. Accepted values:
82
+ ``"text/markdown"``, ``"text/html"``, ``"devrev/rt"``.
83
+ If omitted the raw stored content is returned as-is.
76
84
 
77
85
  Returns:
78
86
  Dictionary containing the article details. When include_content=True,
@@ -84,7 +92,9 @@ async def devrev_articles_get(
84
92
  app = ctx.request_context.lifespan_context
85
93
  try:
86
94
  if include_content:
87
- article_with_content = await app.get_client().articles.get_with_content(id)
95
+ article_with_content = await app.get_client().articles.get_with_content(
96
+ id, output_format=output_format
97
+ )
88
98
  return serialize_model(article_with_content)
89
99
  else:
90
100
  request = ArticlesGetRequest(id=id)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: devrev-Python-SDK
3
- Version: 2.12.2
3
+ Version: 2.13.0
4
4
  Summary: A modern, type-safe Python SDK for the DevRev API
5
5
  Project-URL: Homepage, https://github.com/mgmonteleone/py-dev-rev
6
6
  Project-URL: Documentation, https://github.com/mgmonteleone/py-dev-rev
@@ -36,7 +36,7 @@ devrev/models/widgets.py,sha256=7WWN17_ySqnu1pjYSIS5B8J5dEaqkoB8X8GWzvI0ZYc,4577
36
36
  devrev/models/works.py,sha256=0PypJjDA5T3d-mmuvo5tl6_EUoz22YREmTWdFHx5jCc,10924
37
37
  devrev/services/__init__.py,sha256=eH_sj5qTjuNhCayawlZKZRmhKhv2mSSnbi_Qh3DnMaI,3838
38
38
  devrev/services/accounts.py,sha256=X7FgcODex0XKLiV_VvXKDl2Jm8XsNpn9qp40oRjZqME,9704
39
- devrev/services/articles.py,sha256=cLuBxe4CJi_KCA8Eb105bcsksYmX9ltMb8LxT_wIzXw,37873
39
+ devrev/services/articles.py,sha256=xOpJOG9f29a5W3IFROplL1a9eJdGucbUFkK-MAIIYlc,41637
40
40
  devrev/services/artifacts.py,sha256=SJzIi5M4np0ENoOTTGEAcoqoRFMVd8pe-BCo9vvhYzk,14124
41
41
  devrev/services/base.py,sha256=g55dfhd2-uAhgXuguiBacSP4yOAa03Q7hzKDG2vbZ8U,7100
42
42
  devrev/services/brands.py,sha256=W6FB9XXTtEzGOfm9IBI6dqY8WXrMeH6urwjiZKrDVgI,5679
@@ -61,8 +61,8 @@ devrev/services/track_events.py,sha256=lI4wXkWu3uUuXtuRg1MGNkTZ7B0Lc1PjM8Kw-6sUn
61
61
  devrev/services/uoms.py,sha256=AA3ymoHj24FIbsZpYC4tg2elSdQ3iINTVOz7MraZcj8,8163
62
62
  devrev/services/webhooks.py,sha256=-TSkcaya1y48WB24_vHd-bqO5xSqxRsCLiilncNzQZU,3917
63
63
  devrev/services/works.py,sha256=b-HWpPMLnda-1P6iB-_QzHu_MoB7z4g_OQGh26rqhtQ,11252
64
- devrev/utils/__init__.py,sha256=zN601yi01d6RlFoSgqGjos5CxhwCga0tm3stJEelFak,422
65
- devrev/utils/content_converter.py,sha256=sGEGHDaFV3hm2bV59QsPNHLQNn_PzwZSaN6pgbuVxEY,11366
64
+ devrev/utils/__init__.py,sha256=NOrbpkjDVLH8n9xf-xpZJiIIa_GVI_6vqTm3E8L3Udw,857
65
+ devrev/utils/content_converter.py,sha256=emRBLiVoOfDGpPDzrMRnqQr4-QkqN13OdWlYOyU_LCg,28141
66
66
  devrev/utils/deprecation.py,sha256=7qB2Dx531oP7mNi7q2txOYsOKC9YwdHqlKPMFHOW9Ws,1275
67
67
  devrev/utils/http.py,sha256=mvjZyPTKRO_M60EC_8b7LP2WD52a-e1sH9WFVFVbK54,34248
68
68
  devrev/utils/logging.py,sha256=0Kp0jW03_IX1wOhTyQk-BfRZkvOIU6H8hnDW9xjIdF4,8037
@@ -92,7 +92,7 @@ devrev_mcp/resources/ticket.py,sha256=D-S8Unsae8iV5dvPmCYQT4xcOEgX9E7Kcc8lg6Pxe8
92
92
  devrev_mcp/resources/user.py,sha256=0Paq2w_nbj_dCQ8R0S81zlgjUhDAUzvn1_NmshadqM8,1553
93
93
  devrev_mcp/tools/__init__.py,sha256=wiou4HHy6HeOQY0El3KYqy_S7c2IC4hjsYHjMm7aH-w,54
94
94
  devrev_mcp/tools/accounts.py,sha256=AzRkJkKaYLYRaGL71KsoMEHUPBJ4IXgUs121tMjae-k,5777
95
- devrev_mcp/tools/articles.py,sha256=l-vpG-dgpMU9bNzpk7kBEJDm9YtobtlotV081xTREFc,13384
95
+ devrev_mcp/tools/articles.py,sha256=Mnr-O6ZCkTaGOXOEEP5sSFQhHezXZLJSguc7SjhTaeI,13823
96
96
  devrev_mcp/tools/conversations.py,sha256=koslrPsYf558NR_sw-FNrmBgjtqDZ23iHJ1nTgC4B0w,5615
97
97
  devrev_mcp/tools/engagements.py,sha256=0net7c_MogSJ9khOcI98c54env_5f6t6eKobovVAc8Q,8670
98
98
  devrev_mcp/tools/groups.py,sha256=HoqlSScIOeWE4Qr1UwIUrHLuJ5Hi5yrnnKTCyMxOBhQ,8289
@@ -112,7 +112,7 @@ devrev_mcp/utils/__init__.py,sha256=2_5b1KC5kjoUqFY1ZSdB2Tefd2ekjbZ-eHyFWBKI-0A,
112
112
  devrev_mcp/utils/errors.py,sha256=5mRAo76rJvvEVi6b1ZokPxDtX5JKkptaqmiYDLCkwBE,2110
113
113
  devrev_mcp/utils/formatting.py,sha256=6JssG5x1BxjdgSiQ8Ou3H-9Wo3wgWTWmejsrGez4wKc,2431
114
114
  devrev_mcp/utils/pagination.py,sha256=EOUgL-ZdSToM1Q-ydXmjhibsef5K1u1g3CaS9K8I2fY,1286
115
- devrev_python_sdk-2.12.2.dist-info/METADATA,sha256=LzYvwb7DvAFGOubOY8mJRE5Sgm4pz048BHj4cTCy24c,40907
116
- devrev_python_sdk-2.12.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
117
- devrev_python_sdk-2.12.2.dist-info/entry_points.txt,sha256=XiV4J_yy0yzVZVxg7T66YERVIlqdPNp3O-NHTHkllqQ,63
118
- devrev_python_sdk-2.12.2.dist-info/RECORD,,
115
+ devrev_python_sdk-2.13.0.dist-info/METADATA,sha256=UQ-fDydUuXv4Vco7j80b6_n8_37vG3cCJZR_LjbtP88,40907
116
+ devrev_python_sdk-2.13.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
117
+ devrev_python_sdk-2.13.0.dist-info/entry_points.txt,sha256=XiV4J_yy0yzVZVxg7T66YERVIlqdPNp3O-NHTHkllqQ,63
118
+ devrev_python_sdk-2.13.0.dist-info/RECORD,,