PyPI - chatgpt-md-converter - Versions diffs - 0.4.0b1__tar.gz → 0.4.0b3__tar.gz - Mend

chatgpt-md-converter 0.4.0b1tar.gz → 0.4.0b3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.4.0b1
+Version: 0.4.0b3
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/links.py RENAMED Viewed

@@ -10,7 +10,10 @@ from ..entity import EntityType, TelegramEntity
 _LINK_PATTERN = re.compile(r"!?\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)")
-def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
+def extract_link_entities(
+    text: str,
+    existing_entities: List[TelegramEntity] | None = None,
+) -> Tuple[str, List[TelegramEntity], List[TelegramEntity]]:
     """
     Extract Markdown links and return plain text with TEXT_LINK entities.
@@ -19,14 +22,18 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
     Args:
         text: Input text with Markdown links
+        existing_entities: Optional list of entities to adjust offsets for
     Returns:
-        Tuple of (text_with_links_replaced, list_of_entities)
+        Tuple of (text_with_links_replaced, link_entities, adjusted_existing_entities)
     """
     entities: List[TelegramEntity] = []
     result_parts: List[str] = []
     last_end = 0
+    # Track adjustments: list of (position_in_original, chars_removed)
+    adjustments: List[Tuple[int, int]] = []
     for match in _LINK_PATTERN.finditer(text):
         # Add text before this link
         result_parts.append(text[last_end : match.start()])
@@ -38,6 +45,12 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
         link_text = match.group(1)
         url = match.group(2)
+        # Calculate how many chars are removed
+        # Original: [text](url) or ![text](url)
+        # New: text
+        chars_removed = len(match.group(0)) - len(link_text)
+        adjustments.append((match.start(), chars_removed))
         # Add the link text (without the markdown syntax)
         result_parts.append(link_text)
@@ -56,4 +69,23 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
     # Add remaining text
     result_parts.append(text[last_end:])
-    return "".join(result_parts), entities
+    # Adjust existing entities
+    adjusted_existing: List[TelegramEntity] = []
+    if existing_entities:
+        for e in existing_entities:
+            new_offset = e.offset
+            # Apply all adjustments that occur before this entity
+            for adj_pos, chars_removed in adjustments:
+                if adj_pos < e.offset:
+                    new_offset -= chars_removed
+            adjusted_existing.append(
+                TelegramEntity(
+                    type=e.type,
+                    offset=new_offset,
+                    length=e.length,
+                    url=e.url,
+                    language=e.language,
+                )
+            )
+    return "".join(result_parts), entities, adjusted_existing

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/parser.py RENAMED Viewed

@@ -167,8 +167,8 @@ def parse_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
     text, inline_entities = extract_inline_formatting_entities(text)
     all_entities.extend(inline_entities)
-    # Extract links AFTER inline formatting so offsets are correct
-    text, link_entities = extract_link_entities(text)
+    # Extract links AFTER inline formatting, adjusting existing entity offsets
+    text, link_entities, all_entities = extract_link_entities(text, all_entities)
     all_entities.extend(link_entities)
     # Phase 4: Restore code placeholders and create entities

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/renderer.py RENAMED Viewed

@@ -34,6 +34,12 @@ def telegram_format(text: str) -> str:
     output = re.sub(r"【[^】]+】", "", output)
+    # Handle Telegram custom emoji before generic links
+    # ![emoji](tg://emoji?id=123) -> <tg-emoji emoji-id="123">emoji</tg-emoji>
+    emoji_pattern = r"!\[([^\]]*)\]\(tg://emoji\?id=(\d+)\)"
+    output = re.sub(emoji_pattern, r'<tg-emoji emoji-id="\2">\1</tg-emoji>', output)
+    # Handle all links including images (! prefix is stripped for non-emoji images)
     link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
     output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.4.0b1
+Version: 0.4.0b3
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup
 setup(
     name="chatgpt_md_converter",
-    version="0.4.0b1",
+    version="0.4.0b3",
     author="Kostiantyn Kriuchkov",
     author_email="latand666@gmail.com",
     description="A package for converting markdown to HTML for chat Telegram bots",

{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_parser.py RENAMED Viewed

@@ -1038,3 +1038,19 @@ def test_inline_code_with_escaped_backtick_trailing_text():
     expected_output = "Escaped \\*asterisks\\* and <code>code with \\</code> backtick`"
     output = telegram_format(input_text)
     assert output == expected_output
+def test_custom_emoji_conversion():
+    """Test that custom emoji markdown is converted to tg-emoji HTML tag."""
+    input_text = "Hello ![❤️](tg://emoji?id=5226457415154701085) world"
+    expected_output = 'Hello <tg-emoji emoji-id="5226457415154701085">❤️</tg-emoji> world'
+    output = telegram_format(input_text)
+    assert output == expected_output, "Failed converting custom emoji to <tg-emoji> tag"
+def test_custom_emoji_with_regular_link():
+    """Test that custom emoji and regular links are both handled correctly."""
+    input_text = "Emoji ![👍](tg://emoji?id=5368324170671202286) and [link](https://example.com)"
+    expected_output = 'Emoji <tg-emoji emoji-id="5368324170671202286">👍</tg-emoji> and <a href="https://example.com">link</a>'
+    output = telegram_format(input_text)
+    assert output == expected_output, "Failed handling emoji and link together"