chatgpt-md-converter 0.4.0b1__py3-none-any.whl → 0.4.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,10 @@ from ..entity import EntityType, TelegramEntity
10
10
  _LINK_PATTERN = re.compile(r"!?\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)")
11
11
 
12
12
 
13
- def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
13
+ def extract_link_entities(
14
+ text: str,
15
+ existing_entities: List[TelegramEntity] | None = None,
16
+ ) -> Tuple[str, List[TelegramEntity], List[TelegramEntity]]:
14
17
  """
15
18
  Extract Markdown links and return plain text with TEXT_LINK entities.
16
19
 
@@ -19,14 +22,18 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
19
22
 
20
23
  Args:
21
24
  text: Input text with Markdown links
25
+ existing_entities: Optional list of entities to adjust offsets for
22
26
 
23
27
  Returns:
24
- Tuple of (text_with_links_replaced, list_of_entities)
28
+ Tuple of (text_with_links_replaced, link_entities, adjusted_existing_entities)
25
29
  """
26
30
  entities: List[TelegramEntity] = []
27
31
  result_parts: List[str] = []
28
32
  last_end = 0
29
33
 
34
+ # Track adjustments: list of (position_in_original, chars_removed)
35
+ adjustments: List[Tuple[int, int]] = []
36
+
30
37
  for match in _LINK_PATTERN.finditer(text):
31
38
  # Add text before this link
32
39
  result_parts.append(text[last_end : match.start()])
@@ -38,6 +45,12 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
38
45
  link_text = match.group(1)
39
46
  url = match.group(2)
40
47
 
48
+ # Calculate how many chars are removed
49
+ # Original: [text](url) or ![text](url)
50
+ # New: text
51
+ chars_removed = len(match.group(0)) - len(link_text)
52
+ adjustments.append((match.start(), chars_removed))
53
+
41
54
  # Add the link text (without the markdown syntax)
42
55
  result_parts.append(link_text)
43
56
 
@@ -56,4 +69,23 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
56
69
  # Add remaining text
57
70
  result_parts.append(text[last_end:])
58
71
 
59
- return "".join(result_parts), entities
72
+ # Adjust existing entities
73
+ adjusted_existing: List[TelegramEntity] = []
74
+ if existing_entities:
75
+ for e in existing_entities:
76
+ new_offset = e.offset
77
+ # Apply all adjustments that occur before this entity
78
+ for adj_pos, chars_removed in adjustments:
79
+ if adj_pos < e.offset:
80
+ new_offset -= chars_removed
81
+ adjusted_existing.append(
82
+ TelegramEntity(
83
+ type=e.type,
84
+ offset=new_offset,
85
+ length=e.length,
86
+ url=e.url,
87
+ language=e.language,
88
+ )
89
+ )
90
+
91
+ return "".join(result_parts), entities, adjusted_existing
@@ -167,8 +167,8 @@ def parse_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
167
167
  text, inline_entities = extract_inline_formatting_entities(text)
168
168
  all_entities.extend(inline_entities)
169
169
 
170
- # Extract links AFTER inline formatting so offsets are correct
171
- text, link_entities = extract_link_entities(text)
170
+ # Extract links AFTER inline formatting, adjusting existing entity offsets
171
+ text, link_entities, all_entities = extract_link_entities(text, all_entities)
172
172
  all_entities.extend(link_entities)
173
173
 
174
174
  # Phase 4: Restore code placeholders and create entities
@@ -34,6 +34,12 @@ def telegram_format(text: str) -> str:
34
34
 
35
35
  output = re.sub(r"【[^】]+】", "", output)
36
36
 
37
+ # Handle Telegram custom emoji before generic links
38
+ # ![emoji](tg://emoji?id=123) -> <tg-emoji emoji-id="123">emoji</tg-emoji>
39
+ emoji_pattern = r"!\[([^\]]*)\]\(tg://emoji\?id=(\d+)\)"
40
+ output = re.sub(emoji_pattern, r'<tg-emoji emoji-id="\2">\1</tg-emoji>', output)
41
+
42
+ # Handle all links including images (! prefix is stripped for non-emoji images)
37
43
  link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
38
44
  output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
39
45
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.4.0b1
3
+ Version: 0.4.0b3
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -9,21 +9,21 @@ chatgpt_md_converter/html_markdown/state.py,sha256=sxbz0ucCakI0KgR86EMZx0nvfU1oi
9
9
  chatgpt_md_converter/html_markdown/tree.py,sha256=ryohrhO2X5QepZev3087qPoGmMznqHDwH00TNGoW6a4,2154
10
10
  chatgpt_md_converter/telegram_entities/__init__.py,sha256=dopG-8_gWX8xPeD-9dyHdurs5VPrz-wAFFRvHNKiUNg,1855
11
11
  chatgpt_md_converter/telegram_entities/entity.py,sha256=oygQxwBsE7AGm2etq6HFZIeo7tBCwsUGniLP17-_Oz0,1705
12
- chatgpt_md_converter/telegram_entities/parser.py,sha256=P7uQeGaNLLuFa5QLEkkEhdSqaB9xIlUwuZjbXZ8hkGQ,9885
12
+ chatgpt_md_converter/telegram_entities/parser.py,sha256=rNYtWwZuet5_HObrupehnOiNaBoheDusGwOTaX5mQBs,9925
13
13
  chatgpt_md_converter/telegram_entities/utf16.py,sha256=eH-yX7d1wZwb3nRdk3kq1LFd-NQMqYHutPbkvX5_DC0,1283
14
14
  chatgpt_md_converter/telegram_entities/extractors/__init__.py,sha256=FinTAoRNjuHza0LcEBtpNnBvSR8PFo6cVVDkLg0cV6w,407
15
15
  chatgpt_md_converter/telegram_entities/extractors/blockquotes.py,sha256=Di8nG5Oej0hLbBB-WJ3GtlZCvCaa_BNmoUdpFGo9mnY,3596
16
16
  chatgpt_md_converter/telegram_entities/extractors/headings.py,sha256=AzjF9jElWfw3d4Qx-81fku7gyTkvb0pKlmow0zUXSk4,1602
17
17
  chatgpt_md_converter/telegram_entities/extractors/inline.py,sha256=DYSs7cJEFY3-fGtdMdOA7DO5ERtEF8r2GQns5WcPyto,8745
18
- chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=fe35PDGKzbF0cRac3HgQ9mVFvYvrtt9LDmS_pL9GPlk,1671
18
+ chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=AmCS8mx7ObY2aL5q7owULemjx-Ivuto_4PtKsL7K45Q,2898
19
19
  chatgpt_md_converter/telegram_markdown/__init__.py,sha256=C0Oexz9brpdE-TqEpiAUV78TsZdSrnnH_5yYpEJ03Us,131
20
20
  chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=VPkSisvb6DiS5KAcq0OaX4sqR1YX4VgZvJEXZeAjIWk,3067
21
21
  chatgpt_md_converter/telegram_markdown/inline.py,sha256=MPzj5VpDqrlvPy69CCwUIOsWgtgIFfbB4CliV5Wz-TY,2207
22
22
  chatgpt_md_converter/telegram_markdown/postprocess.py,sha256=jUf01tAIqHQ1NxNlVGsvU-Yw8SDOHtMoS7MUzaQLf_8,775
23
23
  chatgpt_md_converter/telegram_markdown/preprocess.py,sha256=k9XBtwgXkh07SlsqbdcZHwOMHhUGOjiIbOehO5wBnu0,1561
24
- chatgpt_md_converter/telegram_markdown/renderer.py,sha256=39ZehJq6PVWm-sigeBz7vCycwzEmV4Mwiw36jkGIgXI,1960
25
- chatgpt_md_converter-0.4.0b1.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
26
- chatgpt_md_converter-0.4.0b1.dist-info/METADATA,sha256=9wvQrKaXzPu-_VKWRW_cK4vmbaZkwgtzMrbENVHLZb4,6606
27
- chatgpt_md_converter-0.4.0b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
- chatgpt_md_converter-0.4.0b1.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
29
- chatgpt_md_converter-0.4.0b1.dist-info/RECORD,,
24
+ chatgpt_md_converter/telegram_markdown/renderer.py,sha256=XQI15mkJe0wSxhuvRl_Md56wX7PsjtmyD8vUFw6RoCA,2326
25
+ chatgpt_md_converter-0.4.0b3.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
26
+ chatgpt_md_converter-0.4.0b3.dist-info/METADATA,sha256=fj0d7XDaFrmttdJag0-cGWv9ulLK22JRMhOx0fpmWHk,6606
27
+ chatgpt_md_converter-0.4.0b3.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
28
+ chatgpt_md_converter-0.4.0b3.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
29
+ chatgpt_md_converter-0.4.0b3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5