chatgpt-md-converter 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/PKG-INFO +11 -2
  2. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter/converters.py +5 -0
  3. chatgpt_md_converter-0.3.0/chatgpt_md_converter/formatters.py +68 -0
  4. chatgpt_md_converter-0.3.0/chatgpt_md_converter/helpers.py +27 -0
  5. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter/telegram_formatter.py +10 -8
  6. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter.egg-info/PKG-INFO +11 -2
  7. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/setup.py +1 -1
  8. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/tests/test_parser.py +64 -0
  9. chatgpt_md_converter-0.2.0/chatgpt_md_converter/formatters.py +0 -39
  10. chatgpt_md_converter-0.2.0/chatgpt_md_converter/helpers.py +0 -8
  11. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/LICENSE +0 -0
  12. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter/__init__.py +0 -0
  13. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter/extractors.py +0 -0
  14. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
  15. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
  16. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
  17. {chatgpt_md_converter-0.2.0 → chatgpt_md_converter-0.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.8
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-python
22
+ Dynamic: summary
14
23
 
15
24
  # ChatGPT Markdown to Telegram HTML Parser
16
25
 
@@ -19,4 +19,9 @@ def split_by_tag(out_text: str, md_tag: str, html_tag: str) -> str:
19
19
  r"(?<!\w){}(.*?){}(?!\w)".format(re.escape(md_tag), re.escape(md_tag)),
20
20
  re.DOTALL,
21
21
  )
22
+
23
+ # Special handling for the tg-spoiler tag
24
+ if html_tag == 'span class="tg-spoiler"':
25
+ return tag_pattern.sub(r'<span class="tg-spoiler">\1</span>', out_text)
26
+
22
27
  return tag_pattern.sub(r"<{}>\1</{}>".format(html_tag, html_tag), out_text)
@@ -0,0 +1,68 @@
1
+ def combine_blockquotes(text: str) -> str:
2
+ """
3
+ Combines multiline blockquotes into a single blockquote while keeping the \n characters.
4
+ Supports both regular blockquotes (>) and expandable blockquotes (**>).
5
+ """
6
+ lines = text.split("\n")
7
+ combined_lines = []
8
+ blockquote_lines = []
9
+ in_blockquote = False
10
+ is_expandable = False
11
+
12
+ for line in lines:
13
+ if line.startswith("**>"):
14
+ # Expandable blockquote
15
+ in_blockquote = True
16
+ is_expandable = True
17
+ blockquote_lines.append(line[3:].strip())
18
+ elif line.startswith(">"):
19
+ # Regular blockquote
20
+ if not in_blockquote:
21
+ # This is a new blockquote
22
+ in_blockquote = True
23
+ is_expandable = False
24
+ blockquote_lines.append(line[1:].strip())
25
+ else:
26
+ if in_blockquote:
27
+ # End of blockquote, combine the lines
28
+ if is_expandable:
29
+ combined_lines.append(
30
+ "<blockquote expandable>"
31
+ + "\n".join(blockquote_lines)
32
+ + "</blockquote>"
33
+ )
34
+ else:
35
+ combined_lines.append(
36
+ "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
37
+ )
38
+ blockquote_lines = []
39
+ in_blockquote = False
40
+ is_expandable = False
41
+ combined_lines.append(line)
42
+
43
+ if in_blockquote:
44
+ # Handle the case where the file ends with a blockquote
45
+ if is_expandable:
46
+ combined_lines.append(
47
+ "<blockquote expandable>"
48
+ + "\n".join(blockquote_lines)
49
+ + "</blockquote>"
50
+ )
51
+ else:
52
+ combined_lines.append(
53
+ "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
54
+ )
55
+
56
+ return "\n".join(combined_lines)
57
+
58
+
59
+ def fix_asterisk_equations(text: str) -> str:
60
+ """
61
+ Replaces numeric expressions with '*' in them with '×'
62
+ to avoid accidental italic formatting.
63
+ e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
64
+ """
65
+ import re
66
+
67
+ eq_pattern = re.compile(r"(\d+)\s*\*\s*(\d+)")
68
+ return eq_pattern.sub(r"\1×\2", text)
@@ -0,0 +1,27 @@
1
+ def remove_blockquote_escaping(output: str) -> str:
2
+ """
3
+ Removes the escaping from blockquote tags, including expandable blockquotes.
4
+ """
5
+ # Regular blockquotes
6
+ output = output.replace("&lt;blockquote&gt;", "<blockquote>").replace(
7
+ "&lt;/blockquote&gt;", "</blockquote>"
8
+ )
9
+
10
+ # Expandable blockquotes
11
+ output = output.replace(
12
+ "&lt;blockquote expandable&gt;", "<blockquote expandable>"
13
+ ).replace("&lt;/blockquote&gt;", "</blockquote>")
14
+
15
+ return output
16
+
17
+
18
+ def remove_spoiler_escaping(output: str) -> str:
19
+ """
20
+ Ensures spoiler tags are correctly formatted (rather than being escaped).
21
+ """
22
+ # Fix any incorrectly escaped spoiler tags
23
+ output = output.replace(
24
+ '&lt;span class="tg-spoiler"&gt;', '<span class="tg-spoiler">'
25
+ )
26
+ output = output.replace("&lt;/span&gt;", "</span>")
27
+ return output
@@ -3,7 +3,7 @@ import re
3
3
  from .converters import convert_html_chars, split_by_tag
4
4
  from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
5
5
  from .formatters import combine_blockquotes
6
- from .helpers import remove_blockquote_escaping
6
+ from .helpers import remove_blockquote_escaping, remove_spoiler_escaping
7
7
 
8
8
 
9
9
  def extract_inline_code_snippets(text: str):
@@ -54,22 +54,19 @@ def telegram_format(text: str) -> str:
54
54
  # Convert unordered lists (do this before italic detection so that leading '*' is recognized as bullet)
55
55
  output = re.sub(r"^(\s*)[\-\*]\s+(.+)$", r"\1• \2", output, flags=re.MULTILINE)
56
56
 
57
- # Remove this old inline code replacement — now handled by extract_inline_code_snippets()
58
- # output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
59
-
60
57
  # Nested Bold and Italic
61
58
  output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
62
59
  output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
63
60
 
64
- # Process markdown for bold (**), underline (__), strikethrough (~~)
61
+ # Process markdown for bold (**), underline (__), strikethrough (~~), and spoiler (||)
65
62
  output = split_by_tag(output, "**", "b")
66
63
  output = split_by_tag(output, "__", "u")
67
64
  output = split_by_tag(output, "~~", "s")
65
+ output = split_by_tag(output, "||", 'span class="tg-spoiler"')
68
66
 
69
67
  # Custom approach for single-asterisk italic
70
68
  italic_pattern = re.compile(
71
- r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])",
72
- re.DOTALL
69
+ r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])", re.DOTALL
73
70
  )
74
71
  output = italic_pattern.sub(r"<i>\1</i>", output)
75
72
 
@@ -85,7 +82,9 @@ def telegram_format(text: str) -> str:
85
82
 
86
83
  # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
87
84
  for placeholder, snippet in inline_code_snippets.items():
88
- escaped_snippet = snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
85
+ escaped_snippet = (
86
+ snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
87
+ )
89
88
  output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
90
89
 
91
90
  # Step 4: Reinsert the converted triple-backtick code blocks
@@ -94,6 +93,9 @@ def telegram_format(text: str) -> str:
94
93
  # Step 5: Remove blockquote escaping
95
94
  output = remove_blockquote_escaping(output)
96
95
 
96
+ # Step 6: Remove spoiler tag escaping
97
+ output = remove_spoiler_escaping(output)
98
+
97
99
  # Clean up multiple consecutive newlines, but preserve intentional spacing
98
100
  output = re.sub(r"\n{3,}", "\n\n", output)
99
101
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.8
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-python
22
+ Dynamic: summary
14
23
 
15
24
  # ChatGPT Markdown to Telegram HTML Parser
16
25
 
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="chatgpt_md_converter",
5
- version="0.2.0",
5
+ version="0.3.0",
6
6
  author="Kostiantyn Kriuchkov",
7
7
  author_email="latand666@gmail.com",
8
8
  description="A package for converting markdown to HTML for chat Telegram bots",
@@ -656,3 +656,67 @@ def test_heading_followed_by_equation():
656
656
  2*x + y = 4"""
657
657
  output = telegram_format(input_text)
658
658
  assert output.strip() == expected_output.strip(), f"Got: {output}"
659
+
660
+
661
+ def test_spoiler_conversion():
662
+ input_text = "This contains a ||spoiler|| text"
663
+ expected_output = 'This contains a <span class="tg-spoiler">spoiler</span> text'
664
+ output = telegram_format(input_text)
665
+ assert (
666
+ output == expected_output
667
+ ), 'Failed converting || to <span class="tg-spoiler"> tags'
668
+
669
+
670
+ def test_spoiler_with_formatting():
671
+ input_text = "This contains a ||*italic spoiler*|| text"
672
+ expected_output = (
673
+ 'This contains a <span class="tg-spoiler"><i>italic spoiler</i></span> text'
674
+ )
675
+ output = telegram_format(input_text)
676
+ assert (
677
+ output == expected_output
678
+ ), "Failed converting nested formatting within spoiler tags"
679
+
680
+
681
+ def test_expandable_blockquote_conversion():
682
+ input_text = """**>The expandable block quotation started
683
+ >Expandable block quotation continued
684
+ >The last line of the expandable block quotation"""
685
+ expected_output = """<blockquote expandable>The expandable block quotation started
686
+ Expandable block quotation continued
687
+ The last line of the expandable block quotation</blockquote>"""
688
+ output = telegram_format(input_text)
689
+ assert output == expected_output, "Failed converting expandable blockquote"
690
+
691
+
692
+ def test_regular_and_expandable_blockquotes():
693
+ input_text = """>Regular blockquote
694
+ >Regular blockquote continued
695
+
696
+ **>Expandable blockquote
697
+ >Expandable blockquote continued"""
698
+ expected_output = """<blockquote>Regular blockquote
699
+ Regular blockquote continued</blockquote>
700
+
701
+ <blockquote expandable>Expandable blockquote
702
+ Expandable blockquote continued</blockquote>"""
703
+ output = telegram_format(input_text)
704
+ assert (
705
+ output.strip() == expected_output.strip()
706
+ ), "Failed handling mixed blockquote types"
707
+
708
+
709
+ def test_blockquote_with_spoiler():
710
+ input_text = """>Regular blockquote with ||spoiler|| text
711
+ >Continued"""
712
+ expected_output = """<blockquote>Regular blockquote with <span class="tg-spoiler">spoiler</span> text
713
+ Continued</blockquote>"""
714
+ output = telegram_format(input_text)
715
+ assert output == expected_output, "Failed handling spoiler inside blockquote"
716
+
717
+
718
+ def test_multiple_spoilers():
719
+ input_text = "First ||spoiler|| and then another ||spoiler with *italic*||"
720
+ expected_output = 'First <span class="tg-spoiler">spoiler</span> and then another <span class="tg-spoiler">spoiler with <i>italic</i></span>'
721
+ output = telegram_format(input_text)
722
+ assert output == expected_output, "Failed handling multiple spoilers"
@@ -1,39 +0,0 @@
1
- def combine_blockquotes(text: str) -> str:
2
- """
3
- Combines multiline blockquotes into a single blockquote while keeping the \n characters.
4
- """
5
- lines = text.split("\n")
6
- combined_lines = []
7
- blockquote_lines = []
8
- in_blockquote = False
9
-
10
- for line in lines:
11
- if line.startswith(">"):
12
- in_blockquote = True
13
- blockquote_lines.append(line[1:].strip())
14
- else:
15
- if in_blockquote:
16
- combined_lines.append(
17
- "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
18
- )
19
- blockquote_lines = []
20
- in_blockquote = False
21
- combined_lines.append(line)
22
-
23
- if in_blockquote:
24
- combined_lines.append(
25
- "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
26
- )
27
-
28
- return "\n".join(combined_lines)
29
-
30
-
31
- def fix_asterisk_equations(text: str) -> str:
32
- """
33
- Replaces numeric expressions with '*' in them with '×'
34
- to avoid accidental italic formatting.
35
- e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
36
- """
37
- import re
38
- eq_pattern = re.compile(r'(\d+)\s*\*\s*(\d+)')
39
- return eq_pattern.sub(r'\1×\2', text)
@@ -1,8 +0,0 @@
1
- def remove_blockquote_escaping(output: str) -> str:
2
- """
3
- Removes the escaping from blockquote tags.
4
- """
5
- output = output.replace("&lt;blockquote&gt;", "<blockquote>").replace(
6
- "&lt;/blockquote&gt;", "</blockquote>"
7
- )
8
- return output