chatgpt-md-converter 0.4.0b1__tar.gz → 0.4.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/PKG-INFO +1 -1
  2. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/links.py +35 -3
  3. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/parser.py +2 -2
  4. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/renderer.py +6 -0
  5. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
  6. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/setup.py +1 -1
  7. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_parser.py +16 -0
  8. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/LICENSE +0 -0
  9. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/README.md +0 -0
  10. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/__init__.py +0 -0
  11. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/escaping.py +0 -0
  12. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/handlers.py +0 -0
  13. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/renderer.py +0 -0
  14. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/state.py +0 -0
  15. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/tree.py +0 -0
  16. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_splitter.py +0 -0
  17. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_to_markdown.py +0 -0
  18. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/__init__.py +0 -0
  19. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/entity.py +0 -0
  20. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/__init__.py +0 -0
  21. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/blockquotes.py +0 -0
  22. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/headings.py +0 -0
  23. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/inline.py +0 -0
  24. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/utf16.py +0 -0
  25. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_formatter.py +0 -0
  26. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/__init__.py +0 -0
  27. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/code_blocks.py +0 -0
  28. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/inline.py +0 -0
  29. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/postprocess.py +0 -0
  30. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/preprocess.py +0 -0
  31. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
  32. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
  33. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
  34. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/setup.cfg +0 -0
  35. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_entities.py +0 -0
  36. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_html_to_markdown_inline_spacing.py +0 -0
  37. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_roundtrip_markdown.py +0 -0
  38. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_splitter.py +0 -0
  39. {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_telegram_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.4.0b1
3
+ Version: 0.4.0b3
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -10,7 +10,10 @@ from ..entity import EntityType, TelegramEntity
10
10
  _LINK_PATTERN = re.compile(r"!?\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)")
11
11
 
12
12
 
13
- def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
13
+ def extract_link_entities(
14
+ text: str,
15
+ existing_entities: List[TelegramEntity] | None = None,
16
+ ) -> Tuple[str, List[TelegramEntity], List[TelegramEntity]]:
14
17
  """
15
18
  Extract Markdown links and return plain text with TEXT_LINK entities.
16
19
 
@@ -19,14 +22,18 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
19
22
 
20
23
  Args:
21
24
  text: Input text with Markdown links
25
+ existing_entities: Optional list of entities to adjust offsets for
22
26
 
23
27
  Returns:
24
- Tuple of (text_with_links_replaced, list_of_entities)
28
+ Tuple of (text_with_links_replaced, link_entities, adjusted_existing_entities)
25
29
  """
26
30
  entities: List[TelegramEntity] = []
27
31
  result_parts: List[str] = []
28
32
  last_end = 0
29
33
 
34
+ # Track adjustments: list of (position_in_original, chars_removed)
35
+ adjustments: List[Tuple[int, int]] = []
36
+
30
37
  for match in _LINK_PATTERN.finditer(text):
31
38
  # Add text before this link
32
39
  result_parts.append(text[last_end : match.start()])
@@ -38,6 +45,12 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
38
45
  link_text = match.group(1)
39
46
  url = match.group(2)
40
47
 
48
+ # Calculate how many chars are removed
49
+ # Original: [text](url) or ![text](url)
50
+ # New: text
51
+ chars_removed = len(match.group(0)) - len(link_text)
52
+ adjustments.append((match.start(), chars_removed))
53
+
41
54
  # Add the link text (without the markdown syntax)
42
55
  result_parts.append(link_text)
43
56
 
@@ -56,4 +69,23 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
56
69
  # Add remaining text
57
70
  result_parts.append(text[last_end:])
58
71
 
59
- return "".join(result_parts), entities
72
+ # Adjust existing entities
73
+ adjusted_existing: List[TelegramEntity] = []
74
+ if existing_entities:
75
+ for e in existing_entities:
76
+ new_offset = e.offset
77
+ # Apply all adjustments that occur before this entity
78
+ for adj_pos, chars_removed in adjustments:
79
+ if adj_pos < e.offset:
80
+ new_offset -= chars_removed
81
+ adjusted_existing.append(
82
+ TelegramEntity(
83
+ type=e.type,
84
+ offset=new_offset,
85
+ length=e.length,
86
+ url=e.url,
87
+ language=e.language,
88
+ )
89
+ )
90
+
91
+ return "".join(result_parts), entities, adjusted_existing
@@ -167,8 +167,8 @@ def parse_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
167
167
  text, inline_entities = extract_inline_formatting_entities(text)
168
168
  all_entities.extend(inline_entities)
169
169
 
170
- # Extract links AFTER inline formatting so offsets are correct
171
- text, link_entities = extract_link_entities(text)
170
+ # Extract links AFTER inline formatting, adjusting existing entity offsets
171
+ text, link_entities, all_entities = extract_link_entities(text, all_entities)
172
172
  all_entities.extend(link_entities)
173
173
 
174
174
  # Phase 4: Restore code placeholders and create entities
@@ -34,6 +34,12 @@ def telegram_format(text: str) -> str:
34
34
 
35
35
  output = re.sub(r"【[^】]+】", "", output)
36
36
 
37
+ # Handle Telegram custom emoji before generic links
38
+ # ![emoji](tg://emoji?id=123) -> <tg-emoji emoji-id="123">emoji</tg-emoji>
39
+ emoji_pattern = r"!\[([^\]]*)\]\(tg://emoji\?id=(\d+)\)"
40
+ output = re.sub(emoji_pattern, r'<tg-emoji emoji-id="\2">\1</tg-emoji>', output)
41
+
42
+ # Handle all links including images (! prefix is stripped for non-emoji images)
37
43
  link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
38
44
  output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
39
45
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.4.0b1
3
+ Version: 0.4.0b3
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="chatgpt_md_converter",
5
- version="0.4.0b1",
5
+ version="0.4.0b3",
6
6
  author="Kostiantyn Kriuchkov",
7
7
  author_email="latand666@gmail.com",
8
8
  description="A package for converting markdown to HTML for chat Telegram bots",
@@ -1038,3 +1038,19 @@ def test_inline_code_with_escaped_backtick_trailing_text():
1038
1038
  expected_output = "Escaped \\*asterisks\\* and <code>code with \\</code> backtick`"
1039
1039
  output = telegram_format(input_text)
1040
1040
  assert output == expected_output
1041
+
1042
+
1043
+ def test_custom_emoji_conversion():
1044
+ """Test that custom emoji markdown is converted to tg-emoji HTML tag."""
1045
+ input_text = "Hello ![❤️](tg://emoji?id=5226457415154701085) world"
1046
+ expected_output = 'Hello <tg-emoji emoji-id="5226457415154701085">❤️</tg-emoji> world'
1047
+ output = telegram_format(input_text)
1048
+ assert output == expected_output, "Failed converting custom emoji to <tg-emoji> tag"
1049
+
1050
+
1051
+ def test_custom_emoji_with_regular_link():
1052
+ """Test that custom emoji and regular links are both handled correctly."""
1053
+ input_text = "Emoji ![👍](tg://emoji?id=5368324170671202286) and [link](https://example.com)"
1054
+ expected_output = 'Emoji <tg-emoji emoji-id="5368324170671202286">👍</tg-emoji> and <a href="https://example.com">link</a>'
1055
+ output = telegram_format(input_text)
1056
+ assert output == expected_output, "Failed handling emoji and link together"