chatgpt-md-converter 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/PKG-INFO +1 -1
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/extractors.py +8 -2
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/telegram_formatter.py +9 -12
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/setup.py +1 -1
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/tests/test_parser.py +10 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/LICENSE +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/__init__.py +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/converters.py +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/formatters.py +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/helpers.py +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
- {chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/setup.cfg +0 -0
{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/extractors.py
RENAMED
|
@@ -27,13 +27,19 @@ def extract_and_convert_code_blocks(text: str):
|
|
|
27
27
|
def replacer(match):
|
|
28
28
|
language = match.group(1) if match.group(1) else ""
|
|
29
29
|
code_content = match.group(3)
|
|
30
|
+
|
|
31
|
+
# Properly escape HTML entities in code content
|
|
32
|
+
escaped_content = (
|
|
33
|
+
code_content.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
34
|
+
)
|
|
35
|
+
|
|
30
36
|
placeholder = f"CODEBLOCKPLACEHOLDER{len(placeholders)}"
|
|
31
37
|
placeholders.append(placeholder)
|
|
32
38
|
if not language:
|
|
33
|
-
html_code_block = f"<pre><code>{
|
|
39
|
+
html_code_block = f"<pre><code>{escaped_content}</code></pre>"
|
|
34
40
|
else:
|
|
35
41
|
html_code_block = (
|
|
36
|
-
f'<pre><code class="language-{language}">{
|
|
42
|
+
f'<pre><code class="language-{language}">{escaped_content}</code></pre>'
|
|
37
43
|
)
|
|
38
44
|
return (placeholder, html_code_block)
|
|
39
45
|
|
{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/telegram_formatter.py
RENAMED
|
@@ -35,18 +35,14 @@ def telegram_format(text: str) -> str:
|
|
|
35
35
|
# Step 0: Combine blockquotes
|
|
36
36
|
text = combine_blockquotes(text)
|
|
37
37
|
|
|
38
|
-
# Step 1:
|
|
39
|
-
text = convert_html_chars(text)
|
|
40
|
-
|
|
41
|
-
# Step 2: Extract and convert triple-backtick code blocks first
|
|
38
|
+
# Step 1: Extract and convert triple-backtick code blocks first
|
|
42
39
|
output, triple_code_blocks = extract_and_convert_code_blocks(text)
|
|
43
40
|
|
|
44
|
-
# Step 2
|
|
41
|
+
# Step 2: Extract inline code snippets
|
|
45
42
|
output, inline_code_snippets = extract_inline_code_snippets(output)
|
|
46
43
|
|
|
47
|
-
# Step 3:
|
|
48
|
-
|
|
49
|
-
output = output.replace("<", "<").replace(">", ">")
|
|
44
|
+
# Step 3: Convert HTML reserved symbols in the text (not in code blocks)
|
|
45
|
+
output = convert_html_chars(output)
|
|
50
46
|
|
|
51
47
|
# Convert headings (H1-H6)
|
|
52
48
|
output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
|
|
@@ -80,20 +76,21 @@ def telegram_format(text: str) -> str:
|
|
|
80
76
|
link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
|
|
81
77
|
output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
|
|
82
78
|
|
|
83
|
-
# Step
|
|
79
|
+
# Step 4: Reinsert inline code snippets, applying HTML escaping to the content
|
|
84
80
|
for placeholder, snippet in inline_code_snippets.items():
|
|
81
|
+
# Apply HTML escaping to the content of inline code
|
|
85
82
|
escaped_snippet = (
|
|
86
83
|
snippet.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
87
84
|
)
|
|
88
85
|
output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
|
|
89
86
|
|
|
90
|
-
# Step
|
|
87
|
+
# Step 5: Reinsert the converted triple-backtick code blocks
|
|
91
88
|
output = reinsert_code_blocks(output, triple_code_blocks)
|
|
92
89
|
|
|
93
|
-
# Step
|
|
90
|
+
# Step 6: Remove blockquote escaping
|
|
94
91
|
output = remove_blockquote_escaping(output)
|
|
95
92
|
|
|
96
|
-
# Step
|
|
93
|
+
# Step 7: Remove spoiler tag escaping
|
|
97
94
|
output = remove_spoiler_escaping(output)
|
|
98
95
|
|
|
99
96
|
# Clean up multiple consecutive newlines, but preserve intentional spacing
|
|
@@ -720,3 +720,13 @@ def test_multiple_spoilers():
|
|
|
720
720
|
expected_output = 'First <span class="tg-spoiler">spoiler</span> and then another <span class="tg-spoiler">spoiler with <i>italic</i></span>'
|
|
721
721
|
output = telegram_format(input_text)
|
|
722
722
|
assert output == expected_output, "Failed handling multiple spoilers"
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def test_ukrainian_text_with_inline_code():
|
|
726
|
+
"""Test that Ukrainian text with inline code is properly formatted"""
|
|
727
|
+
input_text = (
|
|
728
|
+
"""звісно, майстре тестування. ой та зрозуміло `<LAUGH>` що ти тут тестуєш."""
|
|
729
|
+
)
|
|
730
|
+
expected_output = """звісно, майстре тестування. ой та зрозуміло <code><LAUGH></code> що ти тут тестуєш."""
|
|
731
|
+
output = telegram_format(input_text)
|
|
732
|
+
assert output == expected_output, f"Output was: {output}"
|
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/converters.py
RENAMED
|
File without changes
|
{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/formatters.py
RENAMED
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|