chatgpt-md-converter 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatgpt_md_converter/converters.py +5 -0
- chatgpt_md_converter/formatters.py +38 -9
- chatgpt_md_converter/helpers.py +20 -1
- chatgpt_md_converter/telegram_formatter.py +10 -8
- {chatgpt_md_converter-0.2.0.dist-info → chatgpt_md_converter-0.3.1.dist-info}/METADATA +45 -5
- chatgpt_md_converter-0.3.1.dist-info/RECORD +11 -0
- {chatgpt_md_converter-0.2.0.dist-info → chatgpt_md_converter-0.3.1.dist-info}/WHEEL +1 -1
- chatgpt_md_converter-0.2.0.dist-info/RECORD +0 -11
- {chatgpt_md_converter-0.2.0.dist-info → chatgpt_md_converter-0.3.1.dist-info/licenses}/LICENSE +0 -0
- {chatgpt_md_converter-0.2.0.dist-info → chatgpt_md_converter-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -19,4 +19,9 @@ def split_by_tag(out_text: str, md_tag: str, html_tag: str) -> str:
|
|
|
19
19
|
r"(?<!\w){}(.*?){}(?!\w)".format(re.escape(md_tag), re.escape(md_tag)),
|
|
20
20
|
re.DOTALL,
|
|
21
21
|
)
|
|
22
|
+
|
|
23
|
+
# Special handling for the tg-spoiler tag
|
|
24
|
+
if html_tag == 'span class="tg-spoiler"':
|
|
25
|
+
return tag_pattern.sub(r'<span class="tg-spoiler">\1</span>', out_text)
|
|
26
|
+
|
|
22
27
|
return tag_pattern.sub(r"<{}>\1</{}>".format(html_tag, html_tag), out_text)
|
|
@@ -1,29 +1,57 @@
|
|
|
1
1
|
def combine_blockquotes(text: str) -> str:
|
|
2
2
|
"""
|
|
3
3
|
Combines multiline blockquotes into a single blockquote while keeping the \n characters.
|
|
4
|
+
Supports both regular blockquotes (>) and expandable blockquotes (**>).
|
|
4
5
|
"""
|
|
5
6
|
lines = text.split("\n")
|
|
6
7
|
combined_lines = []
|
|
7
8
|
blockquote_lines = []
|
|
8
9
|
in_blockquote = False
|
|
10
|
+
is_expandable = False
|
|
9
11
|
|
|
10
12
|
for line in lines:
|
|
11
|
-
if line.startswith("
|
|
13
|
+
if line.startswith("**>"):
|
|
14
|
+
# Expandable blockquote
|
|
12
15
|
in_blockquote = True
|
|
16
|
+
is_expandable = True
|
|
17
|
+
blockquote_lines.append(line[3:].strip())
|
|
18
|
+
elif line.startswith(">"):
|
|
19
|
+
# Regular blockquote
|
|
20
|
+
if not in_blockquote:
|
|
21
|
+
# This is a new blockquote
|
|
22
|
+
in_blockquote = True
|
|
23
|
+
is_expandable = False
|
|
13
24
|
blockquote_lines.append(line[1:].strip())
|
|
14
25
|
else:
|
|
15
26
|
if in_blockquote:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
27
|
+
# End of blockquote, combine the lines
|
|
28
|
+
if is_expandable:
|
|
29
|
+
combined_lines.append(
|
|
30
|
+
"<blockquote expandable>"
|
|
31
|
+
+ "\n".join(blockquote_lines)
|
|
32
|
+
+ "</blockquote>"
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
combined_lines.append(
|
|
36
|
+
"<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
|
|
37
|
+
)
|
|
19
38
|
blockquote_lines = []
|
|
20
39
|
in_blockquote = False
|
|
40
|
+
is_expandable = False
|
|
21
41
|
combined_lines.append(line)
|
|
22
42
|
|
|
23
43
|
if in_blockquote:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
44
|
+
# Handle the case where the file ends with a blockquote
|
|
45
|
+
if is_expandable:
|
|
46
|
+
combined_lines.append(
|
|
47
|
+
"<blockquote expandable>"
|
|
48
|
+
+ "\n".join(blockquote_lines)
|
|
49
|
+
+ "</blockquote>"
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
combined_lines.append(
|
|
53
|
+
"<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
|
|
54
|
+
)
|
|
27
55
|
|
|
28
56
|
return "\n".join(combined_lines)
|
|
29
57
|
|
|
@@ -35,5 +63,6 @@ def fix_asterisk_equations(text: str) -> str:
|
|
|
35
63
|
e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
|
|
36
64
|
"""
|
|
37
65
|
import re
|
|
38
|
-
|
|
39
|
-
|
|
66
|
+
|
|
67
|
+
eq_pattern = re.compile(r"(\d+)\s*\*\s*(\d+)")
|
|
68
|
+
return eq_pattern.sub(r"\1×\2", text)
|
chatgpt_md_converter/helpers.py
CHANGED
|
@@ -1,8 +1,27 @@
|
|
|
1
1
|
def remove_blockquote_escaping(output: str) -> str:
|
|
2
2
|
"""
|
|
3
|
-
Removes the escaping from blockquote tags.
|
|
3
|
+
Removes the escaping from blockquote tags, including expandable blockquotes.
|
|
4
4
|
"""
|
|
5
|
+
# Regular blockquotes
|
|
5
6
|
output = output.replace("<blockquote>", "<blockquote>").replace(
|
|
6
7
|
"</blockquote>", "</blockquote>"
|
|
7
8
|
)
|
|
9
|
+
|
|
10
|
+
# Expandable blockquotes
|
|
11
|
+
output = output.replace(
|
|
12
|
+
"<blockquote expandable>", "<blockquote expandable>"
|
|
13
|
+
).replace("</blockquote>", "</blockquote>")
|
|
14
|
+
|
|
15
|
+
return output
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def remove_spoiler_escaping(output: str) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Ensures spoiler tags are correctly formatted (rather than being escaped).
|
|
21
|
+
"""
|
|
22
|
+
# Fix any incorrectly escaped spoiler tags
|
|
23
|
+
output = output.replace(
|
|
24
|
+
'<span class="tg-spoiler">', '<span class="tg-spoiler">'
|
|
25
|
+
)
|
|
26
|
+
output = output.replace("</span>", "</span>")
|
|
8
27
|
return output
|
|
@@ -3,7 +3,7 @@ import re
|
|
|
3
3
|
from .converters import convert_html_chars, split_by_tag
|
|
4
4
|
from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
|
|
5
5
|
from .formatters import combine_blockquotes
|
|
6
|
-
from .helpers import remove_blockquote_escaping
|
|
6
|
+
from .helpers import remove_blockquote_escaping, remove_spoiler_escaping
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def extract_inline_code_snippets(text: str):
|
|
@@ -54,22 +54,19 @@ def telegram_format(text: str) -> str:
|
|
|
54
54
|
# Convert unordered lists (do this before italic detection so that leading '*' is recognized as bullet)
|
|
55
55
|
output = re.sub(r"^(\s*)[\-\*]\s+(.+)$", r"\1• \2", output, flags=re.MULTILINE)
|
|
56
56
|
|
|
57
|
-
# Remove this old inline code replacement — now handled by extract_inline_code_snippets()
|
|
58
|
-
# output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
|
|
59
|
-
|
|
60
57
|
# Nested Bold and Italic
|
|
61
58
|
output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
|
|
62
59
|
output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
|
|
63
60
|
|
|
64
|
-
# Process markdown for bold (**), underline (__), strikethrough (~~)
|
|
61
|
+
# Process markdown for bold (**), underline (__), strikethrough (~~), and spoiler (||)
|
|
65
62
|
output = split_by_tag(output, "**", "b")
|
|
66
63
|
output = split_by_tag(output, "__", "u")
|
|
67
64
|
output = split_by_tag(output, "~~", "s")
|
|
65
|
+
output = split_by_tag(output, "||", 'span class="tg-spoiler"')
|
|
68
66
|
|
|
69
67
|
# Custom approach for single-asterisk italic
|
|
70
68
|
italic_pattern = re.compile(
|
|
71
|
-
r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])",
|
|
72
|
-
re.DOTALL
|
|
69
|
+
r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])", re.DOTALL
|
|
73
70
|
)
|
|
74
71
|
output = italic_pattern.sub(r"<i>\1</i>", output)
|
|
75
72
|
|
|
@@ -85,7 +82,9 @@ def telegram_format(text: str) -> str:
|
|
|
85
82
|
|
|
86
83
|
# Step 3.5: Reinsert inline code snippets, escaping special chars in code content
|
|
87
84
|
for placeholder, snippet in inline_code_snippets.items():
|
|
88
|
-
escaped_snippet =
|
|
85
|
+
escaped_snippet = (
|
|
86
|
+
snippet.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
87
|
+
)
|
|
89
88
|
output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
|
|
90
89
|
|
|
91
90
|
# Step 4: Reinsert the converted triple-backtick code blocks
|
|
@@ -94,6 +93,9 @@ def telegram_format(text: str) -> str:
|
|
|
94
93
|
# Step 5: Remove blockquote escaping
|
|
95
94
|
output = remove_blockquote_escaping(output)
|
|
96
95
|
|
|
96
|
+
# Step 6: Remove spoiler tag escaping
|
|
97
|
+
output = remove_spoiler_escaping(output)
|
|
98
|
+
|
|
97
99
|
# Clean up multiple consecutive newlines, but preserve intentional spacing
|
|
98
100
|
output = re.sub(r"\n{3,}", "\n\n", output)
|
|
99
101
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: chatgpt_md_converter
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: A package for converting markdown to HTML for chat Telegram bots
|
|
5
5
|
Home-page: https://github.com/Latand/formatter-chatgpt-telegram
|
|
6
6
|
Author: Kostiantyn Kriuchkov
|
|
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.8
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
14
23
|
|
|
15
24
|
# ChatGPT Markdown to Telegram HTML Parser
|
|
16
25
|
|
|
@@ -22,9 +31,11 @@ This project provides a solution for converting Markdown formatted text into HTM
|
|
|
22
31
|
|
|
23
32
|
- Converts Markdown syntax to Telegram-compatible HTML.
|
|
24
33
|
- Supports inline code, bold, italic, underline, and strikethrough formatting.
|
|
34
|
+
- Supports spoiler tags with `||text||` syntax.
|
|
25
35
|
- Handles nested bold and italic formatting.
|
|
26
36
|
- Converts Markdown links and lists to their HTML equivalents.
|
|
27
37
|
- Processes code blocks with optional language specification, preserving formatting within `<pre><code>` tags.
|
|
38
|
+
- Supports regular blockquotes (`>`) and expandable blockquotes (`**>`) for Telegram.
|
|
28
39
|
- Automatically appends missing closing delimiters for code blocks.
|
|
29
40
|
- Escapes HTML special characters to prevent unwanted HTML rendering outside code blocks.
|
|
30
41
|
|
|
@@ -32,7 +43,7 @@ This project provides a solution for converting Markdown formatted text into HTM
|
|
|
32
43
|
|
|
33
44
|
To use the Markdown to Telegram HTML Parser in your ChatGPT bot, integrate the provided Python functions into your bot's processing pipeline. Here is a brief overview of how to incorporate the parser:
|
|
34
45
|
|
|
35
|
-
1. **Ensure Closing Delimiters**: Automatically appends missing closing delimiters for `` ` `` and
|
|
46
|
+
1. **Ensure Closing Delimiters**: Automatically appends missing closing delimiters for `` ` `` and ` ` ``` to ensure proper parsing.
|
|
36
47
|
|
|
37
48
|
2. **Extract and Convert Code Blocks**: Extracts Markdown code blocks, converts them to HTML `<pre><code>` format, and replaces them with placeholders to prevent formatting within code blocks.
|
|
38
49
|
|
|
@@ -52,11 +63,39 @@ pip install chatgpt-md-converter
|
|
|
52
63
|
|
|
53
64
|
## Example
|
|
54
65
|
|
|
55
|
-
|
|
66
|
+
````python
|
|
56
67
|
from chatgpt_md_converter import telegram_format
|
|
57
68
|
|
|
58
|
-
|
|
69
|
+
# Basic formatting
|
|
70
|
+
text = """
|
|
71
|
+
Here is some **bold**, __underline__, and `inline code`.
|
|
72
|
+
This is a ||spoiler text||.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
print('Hello, world!')
|
|
76
|
+
````
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
# Blockquotes
|
|
81
|
+
|
|
82
|
+
blockquote_text = """
|
|
83
|
+
|
|
84
|
+
> Regular blockquote
|
|
85
|
+
> Multiple lines
|
|
86
|
+
|
|
87
|
+
\*\*>Expandable blockquote
|
|
88
|
+
|
|
89
|
+
> Hidden by default
|
|
90
|
+
> Multiple lines
|
|
91
|
+
> """
|
|
92
|
+
|
|
93
|
+
formatted_text = telegram_format(text)
|
|
94
|
+
formatted_blockquote = telegram_format(blockquote_text)
|
|
95
|
+
|
|
59
96
|
print(formatted_text)
|
|
97
|
+
print(formatted_blockquote)
|
|
98
|
+
|
|
60
99
|
```
|
|
61
100
|
|
|
62
101
|
## Requirements
|
|
@@ -67,3 +106,4 @@ print(formatted_text)
|
|
|
67
106
|
## Contribution
|
|
68
107
|
|
|
69
108
|
Feel free to contribute to this project by submitting pull requests or opening issues for bugs, feature requests, or improvements.
|
|
109
|
+
```
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
|
|
2
|
+
chatgpt_md_converter/converters.py,sha256=fgebhbhMcIOqnr0xuV04v81RD91FfaGfA0kO417cDqc,831
|
|
3
|
+
chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
|
|
4
|
+
chatgpt_md_converter/formatters.py,sha256=UbjRG7bLETIGDaFDbFybwW8dKYBMDmgLmIasJiw_j60,2304
|
|
5
|
+
chatgpt_md_converter/helpers.py,sha256=2Nc9_s0HcLq79mBt7Hje19LzbO6z9mUNgayoMyWkIhI,874
|
|
6
|
+
chatgpt_md_converter/telegram_formatter.py,sha256=L0ESIY1AOuRXdIto2lWR38zuYuIwlLBScGINMrm8VVk,4091
|
|
7
|
+
chatgpt_md_converter-0.3.1.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
|
|
8
|
+
chatgpt_md_converter-0.3.1.dist-info/METADATA,sha256=Oz1UdVaAe77WGG5AtJQQybJh5c2TjFD92fn9do7-N88,3718
|
|
9
|
+
chatgpt_md_converter-0.3.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
10
|
+
chatgpt_md_converter-0.3.1.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
|
|
11
|
+
chatgpt_md_converter-0.3.1.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
|
|
2
|
-
chatgpt_md_converter/converters.py,sha256=nfbKCcYCAYBk_0RQntCVQFQgAlEUWrGtLWULE1wETmU,657
|
|
3
|
-
chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
|
|
4
|
-
chatgpt_md_converter/formatters.py,sha256=daekV8M-42E3_N1uXx6M4EbZpSToHo8Vt8fl8AP_yyA,1197
|
|
5
|
-
chatgpt_md_converter/helpers.py,sha256=9CtBeMzKYrymECNPl0MXsW0Vscp4A02a64a5z0sVWqE,261
|
|
6
|
-
chatgpt_md_converter/telegram_formatter.py,sha256=MDyC_gkjN7J-LoMxQaJ1awcEQZzcaYFosOdCgDeDkRU,4036
|
|
7
|
-
chatgpt_md_converter-0.2.0.dist-info/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
|
|
8
|
-
chatgpt_md_converter-0.2.0.dist-info/METADATA,sha256=zm80EZ56yxE7Z3AZkZXIm9CKxPDamouKuqFjS4y1xgU,3086
|
|
9
|
-
chatgpt_md_converter-0.2.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
10
|
-
chatgpt_md_converter-0.2.0.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
|
|
11
|
-
chatgpt_md_converter-0.2.0.dist-info/RECORD,,
|
{chatgpt_md_converter-0.2.0.dist-info → chatgpt_md_converter-0.3.1.dist-info/licenses}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|