chatgpt-md-converter 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,4 +19,9 @@ def split_by_tag(out_text: str, md_tag: str, html_tag: str) -> str:
19
19
  r"(?<!\w){}(.*?){}(?!\w)".format(re.escape(md_tag), re.escape(md_tag)),
20
20
  re.DOTALL,
21
21
  )
22
+
23
+ # Special handling for the tg-spoiler tag
24
+ if html_tag == 'span class="tg-spoiler"':
25
+ return tag_pattern.sub(r'<span class="tg-spoiler">\1</span>', out_text)
26
+
22
27
  return tag_pattern.sub(r"<{}>\1</{}>".format(html_tag, html_tag), out_text)
@@ -1,28 +1,68 @@
1
1
  def combine_blockquotes(text: str) -> str:
2
2
  """
3
3
  Combines multiline blockquotes into a single blockquote while keeping the \n characters.
4
+ Supports both regular blockquotes (>) and expandable blockquotes (**>).
4
5
  """
5
6
  lines = text.split("\n")
6
7
  combined_lines = []
7
8
  blockquote_lines = []
8
9
  in_blockquote = False
10
+ is_expandable = False
9
11
 
10
12
  for line in lines:
11
- if line.startswith(">"):
13
+ if line.startswith("**>"):
14
+ # Expandable blockquote
12
15
  in_blockquote = True
16
+ is_expandable = True
17
+ blockquote_lines.append(line[3:].strip())
18
+ elif line.startswith(">"):
19
+ # Regular blockquote
20
+ if not in_blockquote:
21
+ # This is a new blockquote
22
+ in_blockquote = True
23
+ is_expandable = False
13
24
  blockquote_lines.append(line[1:].strip())
14
25
  else:
15
26
  if in_blockquote:
16
- combined_lines.append(
17
- "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
18
- )
27
+ # End of blockquote, combine the lines
28
+ if is_expandable:
29
+ combined_lines.append(
30
+ "<blockquote expandable>"
31
+ + "\n".join(blockquote_lines)
32
+ + "</blockquote>"
33
+ )
34
+ else:
35
+ combined_lines.append(
36
+ "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
37
+ )
19
38
  blockquote_lines = []
20
39
  in_blockquote = False
40
+ is_expandable = False
21
41
  combined_lines.append(line)
22
42
 
23
43
  if in_blockquote:
24
- combined_lines.append(
25
- "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
26
- )
44
+ # Handle the case where the file ends with a blockquote
45
+ if is_expandable:
46
+ combined_lines.append(
47
+ "<blockquote expandable>"
48
+ + "\n".join(blockquote_lines)
49
+ + "</blockquote>"
50
+ )
51
+ else:
52
+ combined_lines.append(
53
+ "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
54
+ )
27
55
 
28
56
  return "\n".join(combined_lines)
57
+
58
+
59
+ def fix_asterisk_equations(text: str) -> str:
60
+ """
61
+ Replaces numeric expressions with '*' in them with '×'
62
+ to avoid accidental italic formatting.
63
+ e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
64
+ """
65
+ import re
66
+
67
+ eq_pattern = re.compile(r"(\d+)\s*\*\s*(\d+)")
68
+ return eq_pattern.sub(r"\1×\2", text)
@@ -1,8 +1,27 @@
1
1
  def remove_blockquote_escaping(output: str) -> str:
2
2
  """
3
- Removes the escaping from blockquote tags.
3
+ Removes the escaping from blockquote tags, including expandable blockquotes.
4
4
  """
5
+ # Regular blockquotes
5
6
  output = output.replace("&lt;blockquote&gt;", "<blockquote>").replace(
6
7
  "&lt;/blockquote&gt;", "</blockquote>"
7
8
  )
9
+
10
+ # Expandable blockquotes
11
+ output = output.replace(
12
+ "&lt;blockquote expandable&gt;", "<blockquote expandable>"
13
+ ).replace("&lt;/blockquote&gt;", "</blockquote>")
14
+
15
+ return output
16
+
17
+
18
+ def remove_spoiler_escaping(output: str) -> str:
19
+ """
20
+ Ensures spoiler tags are correctly formatted (rather than being escaped).
21
+ """
22
+ # Fix any incorrectly escaped spoiler tags
23
+ output = output.replace(
24
+ '&lt;span class="tg-spoiler"&gt;', '<span class="tg-spoiler">'
25
+ )
26
+ output = output.replace("&lt;/span&gt;", "</span>")
8
27
  return output
@@ -1,57 +1,102 @@
1
1
  import re
2
+
2
3
  from .converters import convert_html_chars, split_by_tag
3
4
  from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
4
5
  from .formatters import combine_blockquotes
5
- from .helpers import remove_blockquote_escaping
6
+ from .helpers import remove_blockquote_escaping, remove_spoiler_escaping
7
+
8
+
9
+ def extract_inline_code_snippets(text: str):
10
+ """
11
+ Extracts inline code (single-backtick content) from the text,
12
+ replacing it with placeholders, returning modified text and a dict of placeholders -> code text.
13
+ This ensures characters like '*' or '_' inside inline code won't be interpreted as Markdown.
14
+ """
15
+ placeholders = []
16
+ code_snippets = {}
17
+ inline_code_pattern = re.compile(r"`([^`]+)`")
18
+
19
+ def replacer(match):
20
+ snippet = match.group(1)
21
+ placeholder = f"INLINECODEPLACEHOLDER{len(placeholders)}"
22
+ placeholders.append(placeholder)
23
+ code_snippets[placeholder] = snippet
24
+ return placeholder
25
+
26
+ new_text = inline_code_pattern.sub(replacer, text)
27
+ return new_text, code_snippets
6
28
 
7
29
 
8
30
  def telegram_format(text: str) -> str:
9
31
  """
10
32
  Converts markdown in the provided text to HTML supported by Telegram.
11
33
  """
34
+
12
35
  # Step 0: Combine blockquotes
13
36
  text = combine_blockquotes(text)
14
37
 
15
38
  # Step 1: Convert HTML reserved symbols
16
39
  text = convert_html_chars(text)
17
40
 
18
- # Step 2: Extract and convert code blocks first
19
- output, code_blocks = extract_and_convert_code_blocks(text)
41
+ # Step 2: Extract and convert triple-backtick code blocks first
42
+ output, triple_code_blocks = extract_and_convert_code_blocks(text)
43
+
44
+ # Step 2.5: Extract inline code snippets (single backticks) so they won't be parsed as italics, etc.
45
+ output, inline_code_snippets = extract_inline_code_snippets(output)
20
46
 
21
- # Step 3: Escape HTML special characters in the output text
47
+ # Step 3: Escape HTML special characters in the output text (for non-code parts)
48
+ # We do NOT want to escape what's inside placeholders here, only what's outside code placeholders.
22
49
  output = output.replace("<", "&lt;").replace(">", "&gt;")
23
50
 
24
- # Inline code
25
- output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
51
+ # Convert headings (H1-H6)
52
+ output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
53
+
54
+ # Convert unordered lists (do this before italic detection so that leading '*' is recognized as bullet)
55
+ output = re.sub(r"^(\s*)[\-\*]\s+(.+)$", r"\1• \2", output, flags=re.MULTILINE)
26
56
 
27
57
  # Nested Bold and Italic
28
58
  output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
29
59
  output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
30
60
 
31
- # Process markdown formatting tags (bold, underline, italic, strikethrough)
32
- # and convert them to their respective HTML tags
61
+ # Process markdown for bold (**), underline (__), strikethrough (~~), and spoiler (||)
33
62
  output = split_by_tag(output, "**", "b")
34
63
  output = split_by_tag(output, "__", "u")
35
- output = split_by_tag(output, "_", "i")
36
- output = split_by_tag(output, "*", "i")
37
64
  output = split_by_tag(output, "~~", "s")
65
+ output = split_by_tag(output, "||", 'span class="tg-spoiler"')
38
66
 
39
- # Remove storage links
40
- output = re.sub(r"【[^】]+】", "", output)
67
+ # Custom approach for single-asterisk italic
68
+ italic_pattern = re.compile(
69
+ r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])", re.DOTALL
70
+ )
71
+ output = italic_pattern.sub(r"<i>\1</i>", output)
72
+
73
+ # Process single underscore-based italic
74
+ output = split_by_tag(output, "_", "i")
41
75
 
42
- # Convert links
43
- output = re.sub(r"!?\[(.*?)\]\((.*?)\)", r'<a href="\2">\1</a>', output)
76
+ # Remove storage links (Vector storage placeholders like 【4:0†source】)
77
+ output = re.sub(r"[^】]+】", "", output)
44
78
 
45
- # Convert headings
46
- output = re.sub(r"^\s*#+ (.+)", r"<b>\1</b>", output, flags=re.MULTILINE)
79
+ # Convert Markdown links/images to <a href="">…</a>
80
+ link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
81
+ output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
47
82
 
48
- # Convert unordered lists, preserving indentation
49
- output = re.sub(r"^(\s*)[\-\*] (.+)", r"\1• \2", output, flags=re.MULTILINE)
83
+ # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
84
+ for placeholder, snippet in inline_code_snippets.items():
85
+ escaped_snippet = (
86
+ snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
87
+ )
88
+ output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
50
89
 
51
- # Step 4: Reinsert the converted HTML code blocks
52
- output = reinsert_code_blocks(output, code_blocks)
90
+ # Step 4: Reinsert the converted triple-backtick code blocks
91
+ output = reinsert_code_blocks(output, triple_code_blocks)
53
92
 
54
93
  # Step 5: Remove blockquote escaping
55
94
  output = remove_blockquote_escaping(output)
56
95
 
57
- return output
96
+ # Step 6: Remove spoiler tag escaping
97
+ output = remove_spoiler_escaping(output)
98
+
99
+ # Clean up multiple consecutive newlines, but preserve intentional spacing
100
+ output = re.sub(r"\n{3,}", "\n\n", output)
101
+
102
+ return output.strip()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.1.2
3
+ Version: 0.3.0
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.8
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-python
22
+ Dynamic: summary
14
23
 
15
24
  # ChatGPT Markdown to Telegram HTML Parser
16
25
 
@@ -0,0 +1,11 @@
1
+ chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
2
+ chatgpt_md_converter/converters.py,sha256=fgebhbhMcIOqnr0xuV04v81RD91FfaGfA0kO417cDqc,831
3
+ chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
4
+ chatgpt_md_converter/formatters.py,sha256=UbjRG7bLETIGDaFDbFybwW8dKYBMDmgLmIasJiw_j60,2304
5
+ chatgpt_md_converter/helpers.py,sha256=2Nc9_s0HcLq79mBt7Hje19LzbO6z9mUNgayoMyWkIhI,874
6
+ chatgpt_md_converter/telegram_formatter.py,sha256=L0ESIY1AOuRXdIto2lWR38zuYuIwlLBScGINMrm8VVk,4091
7
+ chatgpt_md_converter-0.3.0.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
8
+ chatgpt_md_converter-0.3.0.dist-info/METADATA,sha256=IjGkCXRdnzaDtSFgwBs1njGXultCqQ4t-9lqPf0vjKc,3282
9
+ chatgpt_md_converter-0.3.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ chatgpt_md_converter-0.3.0.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
11
+ chatgpt_md_converter-0.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
2
- chatgpt_md_converter/converters.py,sha256=nfbKCcYCAYBk_0RQntCVQFQgAlEUWrGtLWULE1wETmU,657
3
- chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
4
- chatgpt_md_converter/formatters.py,sha256=gG_SavtZI0BVl7SqkwGZ_usCB89ZPpAQWofpDUd9DzU,878
5
- chatgpt_md_converter/helpers.py,sha256=9CtBeMzKYrymECNPl0MXsW0Vscp4A02a64a5z0sVWqE,261
6
- chatgpt_md_converter/telegram_formatter.py,sha256=3XSNWda_5LKRShjZlkO-D7c1Uq77pfvUGlhqliEO0eU,2007
7
- chatgpt_md_converter-0.1.2.dist-info/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
8
- chatgpt_md_converter-0.1.2.dist-info/METADATA,sha256=roSPyHowfr_bCIlyWkja5ozrq3j8zjAQI1cI_0Iqodo,3086
9
- chatgpt_md_converter-0.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
10
- chatgpt_md_converter-0.1.2.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
11
- chatgpt_md_converter-0.1.2.dist-info/RECORD,,