PyPI - chatgpt-md-converter - Versions diffs - 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

chatgpt-md-converter 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

chatgpt_md_converter/converters.py CHANGED Viewed

@@ -19,4 +19,9 @@ def split_by_tag(out_text: str, md_tag: str, html_tag: str) -> str:
         r"(?<!\w){}(.*?){}(?!\w)".format(re.escape(md_tag), re.escape(md_tag)),
         re.DOTALL,
     )
+    # Special handling for the tg-spoiler tag
+    if html_tag == 'span class="tg-spoiler"':
+        return tag_pattern.sub(r'<span class="tg-spoiler">\1</span>', out_text)
     return tag_pattern.sub(r"<{}>\1</{}>".format(html_tag, html_tag), out_text)

chatgpt_md_converter/formatters.py CHANGED Viewed

@@ -1,28 +1,68 @@
 def combine_blockquotes(text: str) -> str:
     """
     Combines multiline blockquotes into a single blockquote while keeping the \n characters.
+    Supports both regular blockquotes (>) and expandable blockquotes (**>).
     """
     lines = text.split("\n")
     combined_lines = []
     blockquote_lines = []
     in_blockquote = False
+    is_expandable = False
     for line in lines:
-        if line.startswith(">"):
+        if line.startswith("**>"):
+            # Expandable blockquote
             in_blockquote = True
+            is_expandable = True
+            blockquote_lines.append(line[3:].strip())
+        elif line.startswith(">"):
+            # Regular blockquote
+            if not in_blockquote:
+                # This is a new blockquote
+                in_blockquote = True
+                is_expandable = False
             blockquote_lines.append(line[1:].strip())
         else:
             if in_blockquote:
-                combined_lines.append(
-                    "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
-                )
+                # End of blockquote, combine the lines
+                if is_expandable:
+                    combined_lines.append(
+                        "<blockquote expandable>"
+                        + "\n".join(blockquote_lines)
+                        + "</blockquote>"
+                    )
+                else:
+                    combined_lines.append(
+                        "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
+                    )
                 blockquote_lines = []
                 in_blockquote = False
+                is_expandable = False
             combined_lines.append(line)
     if in_blockquote:
-        combined_lines.append(
-            "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
-        )
+        # Handle the case where the file ends with a blockquote
+        if is_expandable:
+            combined_lines.append(
+                "<blockquote expandable>"
+                + "\n".join(blockquote_lines)
+                + "</blockquote>"
+            )
+        else:
+            combined_lines.append(
+                "<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
+            )
     return "\n".join(combined_lines)
+def fix_asterisk_equations(text: str) -> str:
+    """
+    Replaces numeric expressions with '*' in them with '×'
+    to avoid accidental italic formatting.
+    e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
+    """
+    import re
+    eq_pattern = re.compile(r"(\d+)\s*\*\s*(\d+)")
+    return eq_pattern.sub(r"\1×\2", text)

chatgpt_md_converter/helpers.py CHANGED Viewed

@@ -1,8 +1,27 @@
 def remove_blockquote_escaping(output: str) -> str:
     """
-    Removes the escaping from blockquote tags.
+    Removes the escaping from blockquote tags, including expandable blockquotes.
     """
+    # Regular blockquotes
     output = output.replace("&lt;blockquote&gt;", "<blockquote>").replace(
         "&lt;/blockquote&gt;", "</blockquote>"
     )
+    # Expandable blockquotes
+    output = output.replace(
+        "&lt;blockquote expandable&gt;", "<blockquote expandable>"
+    ).replace("&lt;/blockquote&gt;", "</blockquote>")
+    return output
+def remove_spoiler_escaping(output: str) -> str:
+    """
+    Ensures spoiler tags are correctly formatted (rather than being escaped).
+    """
+    # Fix any incorrectly escaped spoiler tags
+    output = output.replace(
+        '&lt;span class="tg-spoiler"&gt;', '<span class="tg-spoiler">'
+    )
+    output = output.replace("&lt;/span&gt;", "</span>")
     return output

chatgpt_md_converter/telegram_formatter.py CHANGED Viewed

@@ -1,57 +1,102 @@
 import re
 from .converters import convert_html_chars, split_by_tag
 from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
 from .formatters import combine_blockquotes
-from .helpers import remove_blockquote_escaping
+from .helpers import remove_blockquote_escaping, remove_spoiler_escaping
+def extract_inline_code_snippets(text: str):
+    """
+    Extracts inline code (single-backtick content) from the text,
+    replacing it with placeholders, returning modified text and a dict of placeholders -> code text.
+    This ensures characters like '*' or '_' inside inline code won't be interpreted as Markdown.
+    """
+    placeholders = []
+    code_snippets = {}
+    inline_code_pattern = re.compile(r"`([^`]+)`")
+    def replacer(match):
+        snippet = match.group(1)
+        placeholder = f"INLINECODEPLACEHOLDER{len(placeholders)}"
+        placeholders.append(placeholder)
+        code_snippets[placeholder] = snippet
+        return placeholder
+    new_text = inline_code_pattern.sub(replacer, text)
+    return new_text, code_snippets
 def telegram_format(text: str) -> str:
     """
     Converts markdown in the provided text to HTML supported by Telegram.
     """
     # Step 0: Combine blockquotes
     text = combine_blockquotes(text)
     # Step 1: Convert HTML reserved symbols
     text = convert_html_chars(text)
-    # Step 2: Extract and convert code blocks first
-    output, code_blocks = extract_and_convert_code_blocks(text)
+    # Step 2: Extract and convert triple-backtick code blocks first
+    output, triple_code_blocks = extract_and_convert_code_blocks(text)
+    # Step 2.5: Extract inline code snippets (single backticks) so they won't be parsed as italics, etc.
+    output, inline_code_snippets = extract_inline_code_snippets(output)
-    # Step 3: Escape HTML special characters in the output text
+    # Step 3: Escape HTML special characters in the output text (for non-code parts)
+    # We do NOT want to escape what's inside placeholders here, only what's outside code placeholders.
     output = output.replace("<", "&lt;").replace(">", "&gt;")
-    # Inline code
-    output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
+    # Convert headings (H1-H6)
+    output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
+    # Convert unordered lists (do this before italic detection so that leading '*' is recognized as bullet)
+    output = re.sub(r"^(\s*)[\-\*]\s+(.+)$", r"\1• \2", output, flags=re.MULTILINE)
     # Nested Bold and Italic
     output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
     output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
-    # Process markdown formatting tags (bold, underline, italic, strikethrough)
-    # and convert them to their respective HTML tags
+    # Process markdown for bold (**), underline (__), strikethrough (~~), and spoiler (||)
     output = split_by_tag(output, "**", "b")
     output = split_by_tag(output, "__", "u")
-    output = split_by_tag(output, "_", "i")
-    output = split_by_tag(output, "*", "i")
     output = split_by_tag(output, "~~", "s")
+    output = split_by_tag(output, "||", 'span class="tg-spoiler"')
-    # Remove storage links
-    output = re.sub(r"【[^】]+】", "", output)
+    # Custom approach for single-asterisk italic
+    italic_pattern = re.compile(
+        r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])", re.DOTALL
+    )
+    output = italic_pattern.sub(r"<i>\1</i>", output)
+    # Process single underscore-based italic
+    output = split_by_tag(output, "_", "i")
-    # Convert links
-    output = re.sub(r"!?\[(.*?)\]\((.*?)\)", r'<a href="\2">\1</a>', output)
+    # Remove storage links (Vector storage placeholders like 【4:0†source】)
+    output = re.sub(r"【[^】]+】", "", output)
-    # Convert headings
-    output = re.sub(r"^\s*#+ (.+)", r"<b>\1</b>", output, flags=re.MULTILINE)
+    # Convert Markdown links/images to <a href="">…</a>
+    link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
+    output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
-    # Convert unordered lists, preserving indentation
-    output = re.sub(r"^(\s*)[\-\*] (.+)", r"\1• \2", output, flags=re.MULTILINE)
+    # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
+    for placeholder, snippet in inline_code_snippets.items():
+        escaped_snippet = (
+            snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+        )
+        output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
-    # Step 4: Reinsert the converted HTML code blocks
-    output = reinsert_code_blocks(output, code_blocks)
+    # Step 4: Reinsert the converted triple-backtick code blocks
+    output = reinsert_code_blocks(output, triple_code_blocks)
     # Step 5: Remove blockquote escaping
     output = remove_blockquote_escaping(output)
-    return output
+    # Step 6: Remove spoiler tag escaping
+    output = remove_spoiler_escaping(output)
+    # Clean up multiple consecutive newlines, but preserve intentional spacing
+    output = re.sub(r"\n{3,}", "\n\n", output)
+    return output.strip()

{chatgpt_md_converter-0.1.2.dist-info → chatgpt_md_converter-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: chatgpt_md_converter
-Version: 0.1.2
+Version: 0.3.0
 Summary: A package for converting markdown to HTML for chat Telegram bots
 Home-page: https://github.com/Latand/formatter-chatgpt-telegram
 Author: Kostiantyn Kriuchkov
@@ -11,6 +11,15 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-python
+Dynamic: summary
 # ChatGPT Markdown to Telegram HTML Parser

chatgpt_md_converter-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
+chatgpt_md_converter/converters.py,sha256=fgebhbhMcIOqnr0xuV04v81RD91FfaGfA0kO417cDqc,831
+chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
+chatgpt_md_converter/formatters.py,sha256=UbjRG7bLETIGDaFDbFybwW8dKYBMDmgLmIasJiw_j60,2304
+chatgpt_md_converter/helpers.py,sha256=2Nc9_s0HcLq79mBt7Hje19LzbO6z9mUNgayoMyWkIhI,874
+chatgpt_md_converter/telegram_formatter.py,sha256=L0ESIY1AOuRXdIto2lWR38zuYuIwlLBScGINMrm8VVk,4091
+chatgpt_md_converter-0.3.0.dist-info/licenses/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
+chatgpt_md_converter-0.3.0.dist-info/METADATA,sha256=IjGkCXRdnzaDtSFgwBs1njGXultCqQ4t-9lqPf0vjKc,3282
+chatgpt_md_converter-0.3.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+chatgpt_md_converter-0.3.0.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
+chatgpt_md_converter-0.3.0.dist-info/RECORD,,

{chatgpt_md_converter-0.1.2.dist-info → chatgpt_md_converter-0.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.43.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

chatgpt_md_converter-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-chatgpt_md_converter/__init__.py,sha256=AfkikySkXsJ8HKQcSlU7B1KBHz54QCGJ7MO5Ka9oWRM,79
-chatgpt_md_converter/converters.py,sha256=nfbKCcYCAYBk_0RQntCVQFQgAlEUWrGtLWULE1wETmU,657
-chatgpt_md_converter/extractors.py,sha256=RNwo57_6jCe-HoX5eCvvZcjSTc2uPax-6QEtXqXA5QQ,1880
-chatgpt_md_converter/formatters.py,sha256=gG_SavtZI0BVl7SqkwGZ_usCB89ZPpAQWofpDUd9DzU,878
-chatgpt_md_converter/helpers.py,sha256=9CtBeMzKYrymECNPl0MXsW0Vscp4A02a64a5z0sVWqE,261
-chatgpt_md_converter/telegram_formatter.py,sha256=3XSNWda_5LKRShjZlkO-D7c1Uq77pfvUGlhqliEO0eU,2007
-chatgpt_md_converter-0.1.2.dist-info/LICENSE,sha256=SDr2jeP-s2g4vf17-jdLXrrqA4_mU7L_RtSJlv4Y2mk,1077
-chatgpt_md_converter-0.1.2.dist-info/METADATA,sha256=roSPyHowfr_bCIlyWkja5ozrq3j8zjAQI1cI_0Iqodo,3086
-chatgpt_md_converter-0.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-chatgpt_md_converter-0.1.2.dist-info/top_level.txt,sha256=T2o7csVtZgr-Pwm83aSUkZn0humJmDFNqW38tRSsNqw,21
-chatgpt_md_converter-0.1.2.dist-info/RECORD,,

{chatgpt_md_converter-0.1.2.dist-info → chatgpt_md_converter-0.3.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{chatgpt_md_converter-0.1.2.dist-info → chatgpt_md_converter-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

chatgpt-md-converter 0.1.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

chatgpt-md-converter 0.1.2py3-none-any.whl → 0.3.0py3-none-any.whl