PyPI - chatgpt-md-converter - Versions diffs - 0.3.4__tar.gz → 0.3.5__tar.gz - Mend

chatgpt-md-converter 0.3.4tar.gz → 0.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.3.4
+Version: 0.3.5
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/Latand/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/extractors.py RENAMED Viewed

@@ -27,13 +27,19 @@ def extract_and_convert_code_blocks(text: str):
     def replacer(match):
         language = match.group(1) if match.group(1) else ""
         code_content = match.group(3)
+        # Properly escape HTML entities in code content
+        escaped_content = (
+            code_content.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+        )
         placeholder = f"CODEBLOCKPLACEHOLDER{len(placeholders)}"
         placeholders.append(placeholder)
         if not language:
-            html_code_block = f"<pre><code>{code_content}</code></pre>"
+            html_code_block = f"<pre><code>{escaped_content}</code></pre>"
         else:
             html_code_block = (
-                f'<pre><code class="language-{language}">{code_content}</code></pre>'
+                f'<pre><code class="language-{language}">{escaped_content}</code></pre>'
             )
         return (placeholder, html_code_block)

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter/telegram_formatter.py RENAMED Viewed

@@ -35,18 +35,14 @@ def telegram_format(text: str) -> str:
     # Step 0: Combine blockquotes
     text = combine_blockquotes(text)
-    # Step 1: Convert HTML reserved symbols
-    text = convert_html_chars(text)
-    # Step 2: Extract and convert triple-backtick code blocks first
+    # Step 1: Extract and convert triple-backtick code blocks first
     output, triple_code_blocks = extract_and_convert_code_blocks(text)
-    # Step 2.5: Extract inline code snippets (single backticks) so they won't be parsed as italics, etc.
+    # Step 2: Extract inline code snippets
     output, inline_code_snippets = extract_inline_code_snippets(output)
-    # Step 3: Escape HTML special characters in the output text (for non-code parts)
-    # We do NOT want to escape what's inside placeholders here, only what's outside code placeholders.
-    output = output.replace("<", "&lt;").replace(">", "&gt;")
+    # Step 3: Convert HTML reserved symbols in the text (not in code blocks)
+    output = convert_html_chars(output)
     # Convert headings (H1-H6)
     output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
@@ -80,20 +76,21 @@ def telegram_format(text: str) -> str:
     link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
     output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
-    # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
+    # Step 4: Reinsert inline code snippets, applying HTML escaping to the content
     for placeholder, snippet in inline_code_snippets.items():
+        # Apply HTML escaping to the content of inline code
         escaped_snippet = (
             snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
         )
         output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
-    # Step 4: Reinsert the converted triple-backtick code blocks
+    # Step 5: Reinsert the converted triple-backtick code blocks
     output = reinsert_code_blocks(output, triple_code_blocks)
-    # Step 5: Remove blockquote escaping
+    # Step 6: Remove blockquote escaping
     output = remove_blockquote_escaping(output)
-    # Step 6: Remove spoiler tag escaping
+    # Step 7: Remove spoiler tag escaping
     output = remove_spoiler_escaping(output)
     # Clean up multiple consecutive newlines, but preserve intentional spacing

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/chatgpt_md_converter.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.3.4
+Version: 0.3.5
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/Latand/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup
 setup(
     name="chatgpt_md_converter",
-    version="0.3.4",
+    version="0.3.5",
     author="Kostiantyn Kriuchkov",
     author_email="latand666@gmail.com",
     description="A package for converting markdown to HTML for chat Telegram bots",

{chatgpt_md_converter-0.3.4 → chatgpt_md_converter-0.3.5}/tests/test_parser.py RENAMED Viewed

@@ -720,3 +720,13 @@ def test_multiple_spoilers():
     expected_output = 'First <span class="tg-spoiler">spoiler</span> and then another <span class="tg-spoiler">spoiler with <i>italic</i></span>'
     output = telegram_format(input_text)
     assert output == expected_output, "Failed handling multiple spoilers"
+def test_ukrainian_text_with_inline_code():
+    """Test that Ukrainian text with inline code is properly formatted"""
+    input_text = (
+        """звісно, майстре тестування. ой та зрозуміло `<LAUGH>` що ти тут тестуєш."""
+    )
+    expected_output = """звісно, майстре тестування. ой та зрозуміло <code>&lt;LAUGH&gt;</code> що ти тут тестуєш."""
+    output = telegram_format(input_text)
+    assert output == expected_output, f"Output was: {output}"