chatgpt-md-converter 0.4.0b3__py3-none-any.whl → 0.4.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatgpt_md_converter/telegram_markdown/code_blocks.py +3 -5
- chatgpt_md_converter/telegram_markdown/html_escape.py +40 -0
- chatgpt_md_converter/telegram_markdown/renderer.py +2 -5
- {chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/METADATA +1 -1
- {chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/RECORD +8 -7
- {chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/WHEEL +1 -1
- {chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/licenses/LICENSE +0 -0
- {chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
|
|
5
|
+
from .html_escape import escape_code_content
|
|
6
|
+
|
|
5
7
|
_CODE_BLOCK_RE = re.compile(
|
|
6
8
|
r"(?P<fence>`{3,})(?P<lang>\w+)?\n?[\s\S]*?(?<=\n)?(?P=fence)",
|
|
7
9
|
flags=re.DOTALL,
|
|
@@ -62,11 +64,7 @@ def extract_and_convert_code_blocks(text: str):
|
|
|
62
64
|
def _replacement(match: re.Match[str]) -> tuple[str, str]:
|
|
63
65
|
language = match.group("lang") or ""
|
|
64
66
|
code_content = match.group("code")
|
|
65
|
-
escaped = (
|
|
66
|
-
code_content.replace("&", "&")
|
|
67
|
-
.replace("<", "<")
|
|
68
|
-
.replace(">", ">")
|
|
69
|
-
)
|
|
67
|
+
escaped = escape_code_content(code_content)
|
|
70
68
|
placeholder = f"CODEBLOCKPLACEHOLDER_{len(placeholders)}_"
|
|
71
69
|
placeholders.append(placeholder)
|
|
72
70
|
if language:
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""HTML escaping utilities for code content.
|
|
2
|
+
|
|
3
|
+
LLMs sometimes pre-escape HTML entities (< > & ") in
|
|
4
|
+
markdown code blocks and inline code. We unescape first, then
|
|
5
|
+
re-escape exactly once to avoid double-escaping like &lt;.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
_HTML_ENTITY_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#\d+|#x[\da-fA-F]+);")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _is_pre_escaped(text: str) -> bool:
|
|
14
|
+
"""Return True if the text contains any HTML character references."""
|
|
15
|
+
return bool(_HTML_ENTITY_RE.search(text))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _unescape_html(text: str) -> str:
|
|
19
|
+
"""Unescape common HTML character references to their literal chars."""
|
|
20
|
+
text = text.replace("&", "&")
|
|
21
|
+
text = text.replace("<", "<")
|
|
22
|
+
text = text.replace(">", ">")
|
|
23
|
+
text = text.replace(""", '"')
|
|
24
|
+
text = text.replace("'", "'")
|
|
25
|
+
return text
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def escape_code_content(text: str) -> str:
|
|
29
|
+
"""Escape code content for Telegram HTML, handling pre-escaped input.
|
|
30
|
+
|
|
31
|
+
If the input already contains HTML entities (from LLM pre-escaping),
|
|
32
|
+
unescape them first so we produce a single level of escaping.
|
|
33
|
+
"""
|
|
34
|
+
if _is_pre_escaped(text):
|
|
35
|
+
text = _unescape_html(text)
|
|
36
|
+
return (
|
|
37
|
+
text.replace("&", "&")
|
|
38
|
+
.replace("<", "<")
|
|
39
|
+
.replace(">", ">")
|
|
40
|
+
)
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import re
|
|
6
6
|
|
|
7
7
|
from .code_blocks import extract_and_convert_code_blocks, reinsert_code_blocks
|
|
8
|
+
from .html_escape import escape_code_content
|
|
8
9
|
from .inline import (apply_custom_italic, convert_html_chars,
|
|
9
10
|
extract_inline_code_snippets, split_by_tag)
|
|
10
11
|
from .postprocess import remove_blockquote_escaping, remove_spoiler_escaping
|
|
@@ -44,11 +45,7 @@ def telegram_format(text: str) -> str:
|
|
|
44
45
|
output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
|
|
45
46
|
|
|
46
47
|
for placeholder, snippet in inline_snippets.items():
|
|
47
|
-
escaped = (
|
|
48
|
-
snippet.replace("&", "&")
|
|
49
|
-
.replace("<", "<")
|
|
50
|
-
.replace(">", ">")
|
|
51
|
-
)
|
|
48
|
+
escaped = escape_code_content(snippet)
|
|
52
49
|
output = output.replace(placeholder, f"<code>{escaped}</code>")
|
|
53
50
|
|
|
54
51
|
output = reinsert_code_blocks(output, block_map)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chatgpt_md_converter
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b4
|
|
4
4
|
Summary: A package for converting markdown to HTML for chat Telegram bots
|
|
5
5
|
Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
|
|
6
6
|
Author: Kostiantyn Kriuchkov
|
|
@@ -17,13 +17,14 @@ chatgpt_md_converter/telegram_entities/extractors/headings.py,sha256=AzjF9jElWfw
|
|
|
17
17
|
chatgpt_md_converter/telegram_entities/extractors/inline.py,sha256=DYSs7cJEFY3-fGtdMdOA7DO5ERtEF8r2GQns5WcPyto,8745
|
|
18
18
|
chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=AmCS8mx7ObY2aL5q7owULemjx-Ivuto_4PtKsL7K45Q,2898
|
|
19
19
|
chatgpt_md_converter/telegram_markdown/__init__.py,sha256=C0Oexz9brpdE-TqEpiAUV78TsZdSrnnH_5yYpEJ03Us,131
|
|
20
|
-
chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=
|
|
20
|
+
chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=Y3IitUs846B8V7WqczTcLGf3AhuATtRz0DwBn_8udaw,3020
|
|
21
|
+
chatgpt_md_converter/telegram_markdown/html_escape.py,sha256=qf7icPXE5BcRyX58tUQ_WQpx38v5LmdDsBCyG4XagYQ,1264
|
|
21
22
|
chatgpt_md_converter/telegram_markdown/inline.py,sha256=MPzj5VpDqrlvPy69CCwUIOsWgtgIFfbB4CliV5Wz-TY,2207
|
|
22
23
|
chatgpt_md_converter/telegram_markdown/postprocess.py,sha256=jUf01tAIqHQ1NxNlVGsvU-Yw8SDOHtMoS7MUzaQLf_8,775
|
|
23
24
|
chatgpt_md_converter/telegram_markdown/preprocess.py,sha256=k9XBtwgXkh07SlsqbdcZHwOMHhUGOjiIbOehO5wBnu0,1561
|
|
24
|
-
chatgpt_md_converter/telegram_markdown/renderer.py,sha256=
|
|
25
|
-
chatgpt_md_converter-0.4.
|
|
26
|
-
chatgpt_md_converter-0.4.
|
|
27
|
-
chatgpt_md_converter-0.4.
|
|
28
|
-
chatgpt_md_converter-0.4.
|
|
29
|
-
chatgpt_md_converter-0.4.
|
|
25
|
+
chatgpt_md_converter/telegram_markdown/renderer.py,sha256=zwobwAa6nybEVLNUciEsL2VuG8_jtPh_o3PriONLmzg,2278
|
|
26
|
+
chatgpt_md_converter-0.4.0b4.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
|
|
27
|
+
chatgpt_md_converter-0.4.0b4.dist-info/METADATA,sha256=lB9PWcyKIasLgVgvMrVOSA5NM3poDpAfKoiQm619RtQ,6606
|
|
28
|
+
chatgpt_md_converter-0.4.0b4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
29
|
+
chatgpt_md_converter-0.4.0b4.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
|
|
30
|
+
chatgpt_md_converter-0.4.0b4.dist-info/RECORD,,
|
{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{chatgpt_md_converter-0.4.0b3.dist-info → chatgpt_md_converter-0.4.0b4.dist-info}/top_level.txt
RENAMED
|
File without changes
|