chatgpt-md-converter 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -128,7 +128,7 @@ Feel free to contribute to this project by submitting pull requests or opening i
128
128
  > **Note**:
129
129
  > Since standard Markdown doesn't include Telegram-specific features like spoilers (`||text||`) and expandable blockquotes (`**> text`), you'll need to explicitly instruct LLMs to use these formats. Here's a suggested prompt addition to include in your system message or initial instructions:
130
130
 
131
- ```
131
+ ````
132
132
  When formatting your responses for Telegram, please use these special formatting conventions:
133
133
 
134
134
  1. For content that should be hidden as a spoiler (revealed only when users click):
@@ -144,11 +144,11 @@ When formatting your responses for Telegram, please use these special formatting
144
144
 
145
145
  3. Continue using standard markdown for other formatting:
146
146
  - **bold text**
147
- - _italic text_ or _italic text_
148
- - **underlined text**
147
+ - *italic text*
148
+ - __underlined text__
149
149
  - ~~strikethrough~~
150
150
  - `inline code`
151
- - `code blocks`
151
+ - ```code blocks```
152
152
  - [link text](URL)
153
153
 
154
154
  Apply spoilers for:
@@ -165,6 +165,6 @@ Use expandable blockquotes for:
165
165
  - Optional reading
166
166
  - Technical details
167
167
  - Additional context not needed by all users
168
- ```
168
+ ````
169
169
 
170
170
  You can add this prompt to your system message when initializing your ChatGPT interactions to ensure the model properly formats content for optimal display in Telegram.
@@ -27,13 +27,19 @@ def extract_and_convert_code_blocks(text: str):
27
27
  def replacer(match):
28
28
  language = match.group(1) if match.group(1) else ""
29
29
  code_content = match.group(3)
30
+
31
+ # Properly escape HTML entities in code content
32
+ escaped_content = (
33
+ code_content.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
34
+ )
35
+
30
36
  placeholder = f"CODEBLOCKPLACEHOLDER{len(placeholders)}"
31
37
  placeholders.append(placeholder)
32
38
  if not language:
33
- html_code_block = f"<pre><code>{code_content}</code></pre>"
39
+ html_code_block = f"<pre><code>{escaped_content}</code></pre>"
34
40
  else:
35
41
  html_code_block = (
36
- f'<pre><code class="language-{language}">{code_content}</code></pre>'
42
+ f'<pre><code class="language-{language}">{escaped_content}</code></pre>'
37
43
  )
38
44
  return (placeholder, html_code_block)
39
45
 
@@ -35,18 +35,14 @@ def telegram_format(text: str) -> str:
35
35
  # Step 0: Combine blockquotes
36
36
  text = combine_blockquotes(text)
37
37
 
38
- # Step 1: Convert HTML reserved symbols
39
- text = convert_html_chars(text)
40
-
41
- # Step 2: Extract and convert triple-backtick code blocks first
38
+ # Step 1: Extract and convert triple-backtick code blocks first
42
39
  output, triple_code_blocks = extract_and_convert_code_blocks(text)
43
40
 
44
- # Step 2.5: Extract inline code snippets (single backticks) so they won't be parsed as italics, etc.
41
+ # Step 2: Extract inline code snippets
45
42
  output, inline_code_snippets = extract_inline_code_snippets(output)
46
43
 
47
- # Step 3: Escape HTML special characters in the output text (for non-code parts)
48
- # We do NOT want to escape what's inside placeholders here, only what's outside code placeholders.
49
- output = output.replace("<", "&lt;").replace(">", "&gt;")
44
+ # Step 3: Convert HTML reserved symbols in the text (not in code blocks)
45
+ output = convert_html_chars(output)
50
46
 
51
47
  # Convert headings (H1-H6)
52
48
  output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
@@ -80,20 +76,21 @@ def telegram_format(text: str) -> str:
80
76
  link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
81
77
  output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
82
78
 
83
- # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
79
+ # Step 4: Reinsert inline code snippets, applying HTML escaping to the content
84
80
  for placeholder, snippet in inline_code_snippets.items():
81
+ # Apply HTML escaping to the content of inline code
85
82
  escaped_snippet = (
86
83
  snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
87
84
  )
88
85
  output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
89
86
 
90
- # Step 4: Reinsert the converted triple-backtick code blocks
87
+ # Step 5: Reinsert the converted triple-backtick code blocks
91
88
  output = reinsert_code_blocks(output, triple_code_blocks)
92
89
 
93
- # Step 5: Remove blockquote escaping
90
+ # Step 6: Remove blockquote escaping
94
91
  output = remove_blockquote_escaping(output)
95
92
 
96
- # Step 6: Remove spoiler tag escaping
93
+ # Step 7: Remove spoiler tag escaping
97
94
  output = remove_spoiler_escaping(output)
98
95
 
99
96
  # Clean up multiple consecutive newlines, but preserve intentional spacing
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -128,7 +128,7 @@ Feel free to contribute to this project by submitting pull requests or opening i
128
128
  > **Note**:
129
129
  > Since standard Markdown doesn't include Telegram-specific features like spoilers (`||text||`) and expandable blockquotes (`**> text`), you'll need to explicitly instruct LLMs to use these formats. Here's a suggested prompt addition to include in your system message or initial instructions:
130
130
 
131
- ```
131
+ ````
132
132
  When formatting your responses for Telegram, please use these special formatting conventions:
133
133
 
134
134
  1. For content that should be hidden as a spoiler (revealed only when users click):
@@ -144,11 +144,11 @@ When formatting your responses for Telegram, please use these special formatting
144
144
 
145
145
  3. Continue using standard markdown for other formatting:
146
146
  - **bold text**
147
- - _italic text_ or _italic text_
148
- - **underlined text**
147
+ - *italic text*
148
+ - __underlined text__
149
149
  - ~~strikethrough~~
150
150
  - `inline code`
151
- - `code blocks`
151
+ - ```code blocks```
152
152
  - [link text](URL)
153
153
 
154
154
  Apply spoilers for:
@@ -165,6 +165,6 @@ Use expandable blockquotes for:
165
165
  - Optional reading
166
166
  - Technical details
167
167
  - Additional context not needed by all users
168
- ```
168
+ ````
169
169
 
170
170
  You can add this prompt to your system message when initializing your ChatGPT interactions to ensure the model properly formats content for optimal display in Telegram.
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="chatgpt_md_converter",
5
- version="0.3.3",
5
+ version="0.3.5",
6
6
  author="Kostiantyn Kriuchkov",
7
7
  author_email="latand666@gmail.com",
8
8
  description="A package for converting markdown to HTML for chat Telegram bots",
@@ -720,3 +720,13 @@ def test_multiple_spoilers():
720
720
  expected_output = 'First <span class="tg-spoiler">spoiler</span> and then another <span class="tg-spoiler">spoiler with <i>italic</i></span>'
721
721
  output = telegram_format(input_text)
722
722
  assert output == expected_output, "Failed handling multiple spoilers"
723
+
724
+
725
+ def test_ukrainian_text_with_inline_code():
726
+ """Test that Ukrainian text with inline code is properly formatted"""
727
+ input_text = (
728
+ """звісно, майстре тестування. ой та зрозуміло `<LAUGH>` що ти тут тестуєш."""
729
+ )
730
+ expected_output = """звісно, майстре тестування. ой та зрозуміло <code>&lt;LAUGH&gt;</code> що ти тут тестуєш."""
731
+ output = telegram_format(input_text)
732
+ assert output == expected_output, f"Output was: {output}"