chatgpt-md-converter 0.4.0b1__tar.gz → 0.4.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/PKG-INFO +1 -1
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/links.py +35 -3
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/parser.py +2 -2
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/renderer.py +6 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/setup.py +1 -1
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_parser.py +16 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/LICENSE +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/README.md +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/__init__.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/escaping.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/handlers.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/renderer.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/state.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_markdown/tree.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_splitter.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_to_markdown.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/__init__.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/entity.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/__init__.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/blockquotes.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/headings.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/extractors/inline.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_entities/utf16.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_formatter.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/__init__.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/code_blocks.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/inline.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/postprocess.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/telegram_markdown/preprocess.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/setup.cfg +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_entities.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_html_to_markdown_inline_spacing.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_roundtrip_markdown.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_splitter.py +0 -0
- {chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_telegram_api.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chatgpt_md_converter
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b3
|
|
4
4
|
Summary: A package for converting markdown to HTML for chat Telegram bots
|
|
5
5
|
Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
|
|
6
6
|
Author: Kostiantyn Kriuchkov
|
|
@@ -10,7 +10,10 @@ from ..entity import EntityType, TelegramEntity
|
|
|
10
10
|
_LINK_PATTERN = re.compile(r"!?\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def extract_link_entities(
|
|
13
|
+
def extract_link_entities(
|
|
14
|
+
text: str,
|
|
15
|
+
existing_entities: List[TelegramEntity] | None = None,
|
|
16
|
+
) -> Tuple[str, List[TelegramEntity], List[TelegramEntity]]:
|
|
14
17
|
"""
|
|
15
18
|
Extract Markdown links and return plain text with TEXT_LINK entities.
|
|
16
19
|
|
|
@@ -19,14 +22,18 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
19
22
|
|
|
20
23
|
Args:
|
|
21
24
|
text: Input text with Markdown links
|
|
25
|
+
existing_entities: Optional list of entities to adjust offsets for
|
|
22
26
|
|
|
23
27
|
Returns:
|
|
24
|
-
Tuple of (text_with_links_replaced,
|
|
28
|
+
Tuple of (text_with_links_replaced, link_entities, adjusted_existing_entities)
|
|
25
29
|
"""
|
|
26
30
|
entities: List[TelegramEntity] = []
|
|
27
31
|
result_parts: List[str] = []
|
|
28
32
|
last_end = 0
|
|
29
33
|
|
|
34
|
+
# Track adjustments: list of (position_in_original, chars_removed)
|
|
35
|
+
adjustments: List[Tuple[int, int]] = []
|
|
36
|
+
|
|
30
37
|
for match in _LINK_PATTERN.finditer(text):
|
|
31
38
|
# Add text before this link
|
|
32
39
|
result_parts.append(text[last_end : match.start()])
|
|
@@ -38,6 +45,12 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
38
45
|
link_text = match.group(1)
|
|
39
46
|
url = match.group(2)
|
|
40
47
|
|
|
48
|
+
# Calculate how many chars are removed
|
|
49
|
+
# Original: [text](url) or 
|
|
50
|
+
# New: text
|
|
51
|
+
chars_removed = len(match.group(0)) - len(link_text)
|
|
52
|
+
adjustments.append((match.start(), chars_removed))
|
|
53
|
+
|
|
41
54
|
# Add the link text (without the markdown syntax)
|
|
42
55
|
result_parts.append(link_text)
|
|
43
56
|
|
|
@@ -56,4 +69,23 @@ def extract_link_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
56
69
|
# Add remaining text
|
|
57
70
|
result_parts.append(text[last_end:])
|
|
58
71
|
|
|
59
|
-
|
|
72
|
+
# Adjust existing entities
|
|
73
|
+
adjusted_existing: List[TelegramEntity] = []
|
|
74
|
+
if existing_entities:
|
|
75
|
+
for e in existing_entities:
|
|
76
|
+
new_offset = e.offset
|
|
77
|
+
# Apply all adjustments that occur before this entity
|
|
78
|
+
for adj_pos, chars_removed in adjustments:
|
|
79
|
+
if adj_pos < e.offset:
|
|
80
|
+
new_offset -= chars_removed
|
|
81
|
+
adjusted_existing.append(
|
|
82
|
+
TelegramEntity(
|
|
83
|
+
type=e.type,
|
|
84
|
+
offset=new_offset,
|
|
85
|
+
length=e.length,
|
|
86
|
+
url=e.url,
|
|
87
|
+
language=e.language,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return "".join(result_parts), entities, adjusted_existing
|
|
@@ -167,8 +167,8 @@ def parse_entities(text: str) -> Tuple[str, List[TelegramEntity]]:
|
|
|
167
167
|
text, inline_entities = extract_inline_formatting_entities(text)
|
|
168
168
|
all_entities.extend(inline_entities)
|
|
169
169
|
|
|
170
|
-
# Extract links AFTER inline formatting
|
|
171
|
-
text, link_entities = extract_link_entities(text)
|
|
170
|
+
# Extract links AFTER inline formatting, adjusting existing entity offsets
|
|
171
|
+
text, link_entities, all_entities = extract_link_entities(text, all_entities)
|
|
172
172
|
all_entities.extend(link_entities)
|
|
173
173
|
|
|
174
174
|
# Phase 4: Restore code placeholders and create entities
|
|
@@ -34,6 +34,12 @@ def telegram_format(text: str) -> str:
|
|
|
34
34
|
|
|
35
35
|
output = re.sub(r"【[^】]+】", "", output)
|
|
36
36
|
|
|
37
|
+
# Handle Telegram custom emoji before generic links
|
|
38
|
+
#  -> <tg-emoji emoji-id="123">emoji</tg-emoji>
|
|
39
|
+
emoji_pattern = r"!\[([^\]]*)\]\(tg://emoji\?id=(\d+)\)"
|
|
40
|
+
output = re.sub(emoji_pattern, r'<tg-emoji emoji-id="\2">\1</tg-emoji>', output)
|
|
41
|
+
|
|
42
|
+
# Handle all links including images (! prefix is stripped for non-emoji images)
|
|
37
43
|
link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
|
|
38
44
|
output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
|
|
39
45
|
|
{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chatgpt_md_converter
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b3
|
|
4
4
|
Summary: A package for converting markdown to HTML for chat Telegram bots
|
|
5
5
|
Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
|
|
6
6
|
Author: Kostiantyn Kriuchkov
|
|
@@ -1038,3 +1038,19 @@ def test_inline_code_with_escaped_backtick_trailing_text():
|
|
|
1038
1038
|
expected_output = "Escaped \\*asterisks\\* and <code>code with \\</code> backtick`"
|
|
1039
1039
|
output = telegram_format(input_text)
|
|
1040
1040
|
assert output == expected_output
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def test_custom_emoji_conversion():
|
|
1044
|
+
"""Test that custom emoji markdown is converted to tg-emoji HTML tag."""
|
|
1045
|
+
input_text = "Hello  world"
|
|
1046
|
+
expected_output = 'Hello <tg-emoji emoji-id="5226457415154701085">❤️</tg-emoji> world'
|
|
1047
|
+
output = telegram_format(input_text)
|
|
1048
|
+
assert output == expected_output, "Failed converting custom emoji to <tg-emoji> tag"
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def test_custom_emoji_with_regular_link():
|
|
1052
|
+
"""Test that custom emoji and regular links are both handled correctly."""
|
|
1053
|
+
input_text = "Emoji  and [link](https://example.com)"
|
|
1054
|
+
expected_output = 'Emoji <tg-emoji emoji-id="5368324170671202286">👍</tg-emoji> and <a href="https://example.com">link</a>'
|
|
1055
|
+
output = telegram_format(input_text)
|
|
1056
|
+
assert output == expected_output, "Failed handling emoji and link together"
|
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/chatgpt_md_converter/html_splitter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.4.0b1 → chatgpt_md_converter-0.4.0b3}/tests/test_roundtrip_markdown.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|