chatgpt-md-converter 0.4.0b1__py3-none-any.whl → 0.4.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatgpt_md_converter/telegram_entities/extractors/links.py +35 -3
- chatgpt_md_converter/telegram_entities/parser.py +2 -2
- {chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/METADATA +1 -1
- {chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/RECORD +7 -7
- {chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/WHEEL +0 -0
- {chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/licenses/LICENSE +0 -0
- {chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/top_level.txt +0 -0
|
@@ -10,7 +10,10 @@ from ..entity import EntityType, TelegramEntity
|
|
|
10
10
|
_LINK_PATTERN = re.compile(r"!?\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def extract_link_entities(
|
|
13
|
+
def extract_link_entities(
|
|
14
|
+
text: str,
|
|
15
|
+
existing_entities: List[TelegramEntity] | None = None,
|
|
16
|
+
) -> Tuple[str, List[TelegramEntity], List[TelegramEntity]]:
|
|
14
17
|
"""
|
|
15
18
|
Extract Markdown links and return plain text with TEXT_LINK entities.
|
|
16
19
|
|
|
@@ -19,14 +22,18 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
19
22
|
|
|
20
23
|
Args:
|
|
21
24
|
text: Input text with Markdown links
|
|
25
|
+
existing_entities: Optional list of entities to adjust offsets for
|
|
22
26
|
|
|
23
27
|
Returns:
|
|
24
|
-
Tuple of (text_with_links_replaced,
|
|
28
|
+
Tuple of (text_with_links_replaced, link_entities, adjusted_existing_entities)
|
|
25
29
|
"""
|
|
26
30
|
entities: List[TelegramEntity] = []
|
|
27
31
|
result_parts: List[str] = []
|
|
28
32
|
last_end = 0
|
|
29
33
|
|
|
34
|
+
# Track adjustments: list of (position_in_original, chars_removed)
|
|
35
|
+
adjustments: List[Tuple[int, int]] = []
|
|
36
|
+
|
|
30
37
|
for match in _LINK_PATTERN.finditer(text):
|
|
31
38
|
# Add text before this link
|
|
32
39
|
result_parts.append(text[last_end : match.start()])
|
|
@@ -38,6 +45,12 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
38
45
|
link_text = match.group(1)
|
|
39
46
|
url = match.group(2)
|
|
40
47
|
|
|
48
|
+
# Calculate how many chars are removed
|
|
49
|
+
# Original: [text](url) or 
|
|
50
|
+
# New: text
|
|
51
|
+
chars_removed = len(match.group(0)) - len(link_text)
|
|
52
|
+
adjustments.append((match.start(), chars_removed))
|
|
53
|
+
|
|
41
54
|
# Add the link text (without the markdown syntax)
|
|
42
55
|
result_parts.append(link_text)
|
|
43
56
|
|
|
@@ -56,4 +69,23 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
56
69
|
# Add remaining text
|
|
57
70
|
result_parts.append(text[last_end:])
|
|
58
71
|
|
|
59
|
-
|
|
72
|
+
# Adjust existing entities
|
|
73
|
+
adjusted_existing: List[TelegramEntity] = []
|
|
74
|
+
if existing_entities:
|
|
75
|
+
for e in existing_entities:
|
|
76
|
+
new_offset = e.offset
|
|
77
|
+
# Apply all adjustments that occur before this entity
|
|
78
|
+
for adj_pos, chars_removed in adjustments:
|
|
79
|
+
if adj_pos < e.offset:
|
|
80
|
+
new_offset -= chars_removed
|
|
81
|
+
adjusted_existing.append(
|
|
82
|
+
TelegramEntity(
|
|
83
|
+
type=e.type,
|
|
84
|
+
offset=new_offset,
|
|
85
|
+
length=e.length,
|
|
86
|
+
url=e.url,
|
|
87
|
+
language=e.language,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return "".join(result_parts), entities, adjusted_existing
|
|
@@ -167,8 +167,8 @@ def parse_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
167
167
|
text, inline_entities = extract_inline_formatting_entities(text)
|
|
168
168
|
all_entities.extend(inline_entities)
|
|
169
169
|
|
|
170
|
-
# Extract links AFTER inline formatting
|
|
171
|
-
text, link_entities = extract_link_entities(text)
|
|
170
|
+
# Extract links AFTER inline formatting, adjusting existing entity offsets
|
|
171
|
+
text, link_entities, all_entities = extract_link_entities(text, all_entities)
|
|
172
172
|
all_entities.extend(link_entities)
|
|
173
173
|
|
|
174
174
|
# Phase 4: Restore code placeholders and create entities
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chatgpt_md_converter
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b2
|
|
4
4
|
Summary: A package for converting markdown to HTML for chat Telegram bots
|
|
5
5
|
Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
|
|
6
6
|
Author: Kostiantyn Kriuchkov
|
|
@@ -9,21 +9,21 @@ chatgpt_md_converter/html_markdown/state.py,sha256=sxbz0ucCakI0KgR86EMZx0nvfU1oi
|
|
|
9
9
|
chatgpt_md_converter/html_markdown/tree.py,sha256=ryohrhO2X5QepZev3087qPoGmMznqHDwH00TNGoW6a4,2154
|
|
10
10
|
chatgpt_md_converter/telegram_entities/__init__.py,sha256=dopG-8_gWX8xPeD-9dyHdurs5VPrz-wAFFRvHNKiUNg,1855
|
|
11
11
|
chatgpt_md_converter/telegram_entities/entity.py,sha256=oygQxwBsE7AGm2etq6HFZIeo7tBCwsUGniLP17-_Oz0,1705
|
|
12
|
-
chatgpt_md_converter/telegram_entities/parser.py,sha256=
|
|
12
|
+
chatgpt_md_converter/telegram_entities/parser.py,sha256=rNYtWwZuet5_HObrupehnOiNaBoheDusGwOTaX5mQBs,9925
|
|
13
13
|
chatgpt_md_converter/telegram_entities/utf16.py,sha256=eH-yX7d1wZwb3nRdk3kq1LFd-NQMqYHutPbkvX5_DC0,1283
|
|
14
14
|
chatgpt_md_converter/telegram_entities/extractors/__init__.py,sha256=FinTAoRNjuHza0LcEBtpNnBvSR8PFo6cVVDkLg0cV6w,407
|
|
15
15
|
chatgpt_md_converter/telegram_entities/extractors/blockquotes.py,sha256=Di8nG5Oej0hLbBB-WJ3GtlZCvCaa_BNmoUdpFGo9mnY,3596
|
|
16
16
|
chatgpt_md_converter/telegram_entities/extractors/headings.py,sha256=AzjF9jElWfw3d4Qx-81fku7gyTkvb0pKlmow0zUXSk4,1602
|
|
17
17
|
chatgpt_md_converter/telegram_entities/extractors/inline.py,sha256=DYSs7cJEFY3-fGtdMdOA7DO5ERtEF8r2GQns5WcPyto,8745
|
|
18
|
-
chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=
|
|
18
|
+
chatgpt_md_converter/telegram_entities/extractors/links.py,sha256=AmCS8mx7ObY2aL5q7owULemjx-Ivuto_4PtKsL7K45Q,2898
|
|
19
19
|
chatgpt_md_converter/telegram_markdown/__init__.py,sha256=C0Oexz9brpdE-TqEpiAUV78TsZdSrnnH_5yYpEJ03Us,131
|
|
20
20
|
chatgpt_md_converter/telegram_markdown/code_blocks.py,sha256=VPkSisvb6DiS5KAcq0OaX4sqR1YX4VgZvJEXZeAjIWk,3067
|
|
21
21
|
chatgpt_md_converter/telegram_markdown/inline.py,sha256=MPzj5VpDqrlvPy69CCwUIOsWgtgIFfbB4CliV5Wz-TY,2207
|
|
22
22
|
chatgpt_md_converter/telegram_markdown/postprocess.py,sha256=jUf01tAIqHQ1NxNlVGsvU-Yw8SDOHtMoS7MUzaQLf_8,775
|
|
23
23
|
chatgpt_md_converter/telegram_markdown/preprocess.py,sha256=k9XBtwgXkh07SlsqbdcZHwOMHhUGOjiIbOehO5wBnu0,1561
|
|
24
24
|
chatgpt_md_converter/telegram_markdown/renderer.py,sha256=39ZehJq6PVWm-sigeBz7vCycwzEmV4Mwiw36jkGIgXI,1960
|
|
25
|
-
chatgpt_md_converter-0.4.
|
|
26
|
-
chatgpt_md_converter-0.4.
|
|
27
|
-
chatgpt_md_converter-0.4.
|
|
28
|
-
chatgpt_md_converter-0.4.
|
|
29
|
-
chatgpt_md_converter-0.4.
|
|
25
|
+
chatgpt_md_converter-0.4.0b2.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
|
|
26
|
+
chatgpt_md_converter-0.4.0b2.dist-info/METADATA,sha256=pxqmox4G4H1wVWmbDAWa0WB4Rh8BdT4AmXuFHfuD2cc,6606
|
|
27
|
+
chatgpt_md_converter-0.4.0b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
28
|
+
chatgpt_md_converter-0.4.0b2.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
|
|
29
|
+
chatgpt_md_converter-0.4.0b2.dist-info/RECORD,,
|
|
File without changes
|
{chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{chatgpt_md_converter-0.4.0b1.dist-info → chatgpt_md_converter-0.4.0b2.dist-info}/top_level.txt
RENAMED
|
File without changes
|