PyPI - chatgpt-md-converter - Versions diffs - 0.4.0b3__py3-none-any.whl → 0.4.0b4__py3-none-any.whl - Mend

chatgpt-md-converter 0.4.0b3py3-none-any.whl → 0.4.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

chatgpt_md_converter/telegram_markdown/code_blocks.py CHANGED Viewed

@@ -2,6 +2,8 @@
 import re
+from .html_escape import escape_code_content
 _CODE_BLOCK_RE = re.compile(
     r"(?P<fence>`{3,})(?P<lang>\w+)?\n?[\s\S]*?(?<=\n)?(?P=fence)",
     flags=re.DOTALL,
@@ -62,11 +64,7 @@ def extract_and_convert_code_blocks(text: str):
     def _replacement(match: re.Match[str]) -> tuple[str, str]:
         language = match.group("lang") or ""
         code_content = match.group("code")
-        escaped = (
-            code_content.replace("&", "&amp;")
-            .replace("<", "&lt;")
-            .replace(">", "&gt;")
-        )
+        escaped = escape_code_content(code_content)
         placeholder = f"CODEBLOCKPLACEHOLDER_{len(placeholders)}_"
         placeholders.append(placeholder)
         if language:

chatgpt_md_converter/telegram_markdown/html_escape.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""HTML escaping utilities for code content.
+LLMs sometimes pre-escape HTML entities (&lt; &gt; &amp; &quot;) in
+markdown code blocks and inline code. We unescape first, then
+re-escape exactly once to avoid double-escaping like &amp;lt;.
+"""
+import re
+_HTML_ENTITY_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#\d+|#x[\da-fA-F]+);")
+def _is_pre_escaped(text: str) -> bool:
+    """Return True if the text contains any HTML character references."""
+    return bool(_HTML_ENTITY_RE.search(text))
+def _unescape_html(text: str) -> str:
+    """Unescape common HTML character references to their literal chars."""
+    text = text.replace("&amp;", "&")
+    text = text.replace("&lt;", "<")
+    text = text.replace("&gt;", ">")
+    text = text.replace("&quot;", '"')
+    text = text.replace("&apos;", "'")
+    return text
+def escape_code_content(text: str) -> str:
+    """Escape code content for Telegram HTML, handling pre-escaped input.
+    If the input already contains HTML entities (from LLM pre-escaping),
+    unescape them first so we produce a single level of escaping.
+    """
+    if _is_pre_escaped(text):
+        text = _unescape_html(text)
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+    )

chatgpt_md_converter/telegram_markdown/renderer.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import re
 from .code_blocks import extract_and_convert_code_blocks, reinsert_code_blocks
+from .html_escape import escape_code_content
 from .inline import (apply_custom_italic, convert_html_chars,
                      extract_inline_code_snippets, split_by_tag)
 from .postprocess import remove_blockquote_escaping, remove_spoiler_escaping
@@ -44,11 +45,7 @@ def telegram_format(text: str) -> str:
     output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
     for placeholder, snippet in inline_snippets.items():
-        escaped = (
-            snippet.replace("&", "&amp;")
-            .replace("<", "&lt;")
-            .replace(">", "&gt;")
-        )
+        escaped = escape_code_content(snippet)
         output = output.replace(placeholder, f"<code>{escaped}</code>")
     output = reinsert_code_blocks(output, block_map)

{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.4.0b3
+Version: 0.4.0b4
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov

{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/RECORD RENAMED Viewed

@@ -17,13 +17,14 @@ chatgpt_md_converter/telegram_entities/extractors/headings.py,sha256=AzjF9jElWfw
 chatgpt_md_converter/telegram_entities/extractors/inline.py,sha256=DYSs7cJEFY3-fGtdMdOA7DO5ERtEF8r2GQns5WcPyto,8745
 chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=AmCS8mx7ObY2aL5q7owULemjx-Ivuto_4PtKsL7K45Q,2898
 chatgpt_md_converter/telegram_markdown/__init__.py,sha256=C0Oexz9brpdE-TqEpiAUV78TsZdSrnnH_5yYpEJ03Us,131
-chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=VPkSisvb6DiS5KAcq0OaX4sqR1YX4VgZvJEXZeAjIWk,3067
+chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=Y3IitUs846B8V7WqczTcLGf3AhuATtRz0DwBn_8udaw,3020
+chatgpt_md_converter/telegram_markdown/html_escape.py,sha256=qf7icPXE5BcRyX58tUQ_WQpx38v5LmdDsBCyG4XagYQ,1264
 chatgpt_md_converter/telegram_markdown/inline.py,sha256=MPzj5VpDqrlvPy69CCwUIOsWgtgIFfbB4CliV5Wz-TY,2207
 chatgpt_md_converter/telegram_markdown/postprocess.py,sha256=jUf01tAIqHQ1NxNlVGsvU-Yw8SDOHtMoS7MUzaQLf_8,775
 chatgpt_md_converter/telegram_markdown/preprocess.py,sha256=k9XBtwgXkh07SlsqbdcZHwOMHhUGOjiIbOehO5wBnu0,1561
-chatgpt_md_converter/telegram_markdown/renderer.py,sha256=XQI15mkJe0wSxhuvRl_Md56wX7PsjtmyD8vUFw6RoCA,2326
-chatgpt_md_converter-0.4.0b3.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
-chatgpt_md_converter-0.4.0b3.dist-info/METADATA,sha256=fj0d7XDaFrmttdJag0-cGWv9ulLK22JRMhOx0fpmWHk,6606
-chatgpt_md_converter-0.4.0b3.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-chatgpt_md_converter-0.4.0b3.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
-chatgpt_md_converter-0.4.0b3.dist-info/RECORD,,
+chatgpt_md_converter/telegram_markdown/renderer.py,sha256=zwobwAa6nybEVLNUciEsL2VuG8_jtPh_o3PriONLmzg,2278
+chatgpt_md_converter-0.4.0b4.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
+chatgpt_md_converter-0.4.0b4.dist-info/METADATA,sha256=lB9PWcyKIasLgVgvMrVOSA5NM3poDpAfKoiQm619RtQ,6606
+chatgpt_md_converter-0.4.0b4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+chatgpt_md_converter-0.4.0b4.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
+chatgpt_md_converter-0.4.0b4.dist-info/RECORD,,

{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/top_level.txt RENAMED Viewed

File without changes

chatgpt-md-converter 0.4.0b3__py3-none-any.whl → 0.4.0b4__py3-none-any.whl

chatgpt-md-converter 0.4.0b3py3-none-any.whl → 0.4.0b4py3-none-any.whl