chatgpt-md-converter 0.4.0b3__py3-none-any.whl → 0.4.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@
2
2
 
3
3
  import re
4
4
 
5
+ from .html_escape import escape_code_content
6
+
5
7
  _CODE_BLOCK_RE = re.compile(
6
8
  r"(?P<fence>`{3,})(?P<lang>\w+)?\n?[\s\S]*?(?<=\n)?(?P=fence)",
7
9
  flags=re.DOTALL,
@@ -62,11 +64,7 @@ def extract_and_convert_code_blocks(text: str):
62
64
  def _replacement(match: re.Match[str]) -> tuple[str, str]:
63
65
  language = match.group("lang") or ""
64
66
  code_content = match.group("code")
65
- escaped = (
66
- code_content.replace("&", "&amp;")
67
- .replace("<", "&lt;")
68
- .replace(">", "&gt;")
69
- )
67
+ escaped = escape_code_content(code_content)
70
68
  placeholder = f"CODEBLOCKPLACEHOLDER_{len(placeholders)}_"
71
69
  placeholders.append(placeholder)
72
70
  if language:
@@ -0,0 +1,40 @@
1
+ """HTML escaping utilities for code content.
2
+
3
+ LLMs sometimes pre-escape HTML entities (&lt; &gt; &amp; &quot;) in
4
+ markdown code blocks and inline code. We unescape first, then
5
+ re-escape exactly once to avoid double-escaping like &amp;lt;.
6
+ """
7
+
8
+ import re
9
+
10
+ _HTML_ENTITY_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#\d+|#x[\da-fA-F]+);")
11
+
12
+
13
+ def _is_pre_escaped(text: str) -> bool:
14
+ """Return True if the text contains any HTML character references."""
15
+ return bool(_HTML_ENTITY_RE.search(text))
16
+
17
+
18
+ def _unescape_html(text: str) -> str:
19
+ """Unescape common HTML character references to their literal chars."""
20
+ text = text.replace("&amp;", "&")
21
+ text = text.replace("&lt;", "<")
22
+ text = text.replace("&gt;", ">")
23
+ text = text.replace("&quot;", '"')
24
+ text = text.replace("&apos;", "'")
25
+ return text
26
+
27
+
28
+ def escape_code_content(text: str) -> str:
29
+ """Escape code content for Telegram HTML, handling pre-escaped input.
30
+
31
+ If the input already contains HTML entities (from LLM pre-escaping),
32
+ unescape them first so we produce a single level of escaping.
33
+ """
34
+ if _is_pre_escaped(text):
35
+ text = _unescape_html(text)
36
+ return (
37
+ text.replace("&", "&amp;")
38
+ .replace("<", "&lt;")
39
+ .replace(">", "&gt;")
40
+ )
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import re
6
6
 
7
7
  from .code_blocks import extract_and_convert_code_blocks, reinsert_code_blocks
8
+ from .html_escape import escape_code_content
8
9
  from .inline import (apply_custom_italic, convert_html_chars,
9
10
  extract_inline_code_snippets, split_by_tag)
10
11
  from .postprocess import remove_blockquote_escaping, remove_spoiler_escaping
@@ -44,11 +45,7 @@ def telegram_format(text: str) -> str:
44
45
  output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
45
46
 
46
47
  for placeholder, snippet in inline_snippets.items():
47
- escaped = (
48
- snippet.replace("&", "&amp;")
49
- .replace("<", "&lt;")
50
- .replace(">", "&gt;")
51
- )
48
+ escaped = escape_code_content(snippet)
52
49
  output = output.replace(placeholder, f"<code>{escaped}</code>")
53
50
 
54
51
  output = reinsert_code_blocks(output, block_map)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.4.0b3
3
+ Version: 0.4.0b4
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -17,13 +17,14 @@ chatgpt_md_converter/telegram_entities/extractors/headings.py,sha256=AzjF9jElWfw
17
17
  chatgpt_md_converter/telegram_entities/extractors/inline.py,sha256=DYSs7cJEFY3-fGtdMdOA7DO5ERtEF8r2GQns5WcPyto,8745
18
18
  chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=AmCS8mx7ObY2aL5q7owULemjx-Ivuto_4PtKsL7K45Q,2898
19
19
  chatgpt_md_converter/telegram_markdown/__init__.py,sha256=C0Oexz9brpdE-TqEpiAUV78TsZdSrnnH_5yYpEJ03Us,131
20
- chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=VPkSisvb6DiS5KAcq0OaX4sqR1YX4VgZvJEXZeAjIWk,3067
20
+ chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=Y3IitUs846B8V7WqczTcLGf3AhuATtRz0DwBn_8udaw,3020
21
+ chatgpt_md_converter/telegram_markdown/html_escape.py,sha256=qf7icPXE5BcRyX58tUQ_WQpx38v5LmdDsBCyG4XagYQ,1264
21
22
  chatgpt_md_converter/telegram_markdown/inline.py,sha256=MPzj5VpDqrlvPy69CCwUIOsWgtgIFfbB4CliV5Wz-TY,2207
22
23
  chatgpt_md_converter/telegram_markdown/postprocess.py,sha256=jUf01tAIqHQ1NxNlVGsvU-Yw8SDOHtMoS7MUzaQLf_8,775
23
24
  chatgpt_md_converter/telegram_markdown/preprocess.py,sha256=k9XBtwgXkh07SlsqbdcZHwOMHhUGOjiIbOehO5wBnu0,1561
24
- chatgpt_md_converter/telegram_markdown/renderer.py,sha256=XQI15mkJe0wSxhuvRl_Md56wX7PsjtmyD8vUFw6RoCA,2326
25
- chatgpt_md_converter-0.4.0b3.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
26
- chatgpt_md_converter-0.4.0b3.dist-info/METADATA,sha256=fj0d7XDaFrmttdJag0-cGWv9ulLK22JRMhOx0fpmWHk,6606
27
- chatgpt_md_converter-0.4.0b3.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
28
- chatgpt_md_converter-0.4.0b3.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
29
- chatgpt_md_converter-0.4.0b3.dist-info/RECORD,,
25
+ chatgpt_md_converter/telegram_markdown/renderer.py,sha256=zwobwAa6nybEVLNUciEsL2VuG8_jtPh_o3PriONLmzg,2278
26
+ chatgpt_md_converter-0.4.0b4.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
27
+ chatgpt_md_converter-0.4.0b4.dist-info/METADATA,sha256=lB9PWcyKIasLgVgvMrVOSA5NM3poDpAfKoiQm619RtQ,6606
28
+ chatgpt_md_converter-0.4.0b4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
29
+ chatgpt_md_converter-0.4.0b4.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
30
+ chatgpt_md_converter-0.4.0b4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5