chatgpt-md-converter 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/PKG-INFO +1 -1
  2. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter/formatters.py +11 -0
  3. chatgpt_md_converter-0.2.0/chatgpt_md_converter/telegram_formatter.py +100 -0
  4. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
  5. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/setup.py +1 -1
  6. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/tests/test_parser.py +265 -5
  7. chatgpt_md_converter-0.1.2/chatgpt_md_converter/telegram_formatter.py +0 -57
  8. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/LICENSE +0 -0
  9. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter/__init__.py +0 -0
  10. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter/converters.py +0 -0
  11. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter/extractors.py +0 -0
  12. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter/helpers.py +0 -0
  13. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
  14. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
  15. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
  16. {chatgpt_md_converter-0.1.2 → chatgpt_md_converter-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chatgpt_md_converter
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -26,3 +26,14 @@ def combine_blockquotes(text: str) -> str:
26
26
  )
27
27
 
28
28
  return "\n".join(combined_lines)
29
+
30
+
31
+ def fix_asterisk_equations(text: str) -> str:
32
+ """
33
+ Replaces numeric expressions with '*' in them with '×'
34
+ to avoid accidental italic formatting.
35
+ e.g. '6*8' -> '6×8', '6 * 8' -> '6×8'
36
+ """
37
+ import re
38
+ eq_pattern = re.compile(r'(\d+)\s*\*\s*(\d+)')
39
+ return eq_pattern.sub(r'\1×\2', text)
@@ -0,0 +1,100 @@
1
+ import re
2
+
3
+ from .converters import convert_html_chars, split_by_tag
4
+ from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
5
+ from .formatters import combine_blockquotes
6
+ from .helpers import remove_blockquote_escaping
7
+
8
+
9
+ def extract_inline_code_snippets(text: str):
10
+ """
11
+ Extracts inline code (single-backtick content) from the text,
12
+ replacing it with placeholders, returning modified text and a dict of placeholders -> code text.
13
+ This ensures characters like '*' or '_' inside inline code won't be interpreted as Markdown.
14
+ """
15
+ placeholders = []
16
+ code_snippets = {}
17
+ inline_code_pattern = re.compile(r"`([^`]+)`")
18
+
19
+ def replacer(match):
20
+ snippet = match.group(1)
21
+ placeholder = f"INLINECODEPLACEHOLDER{len(placeholders)}"
22
+ placeholders.append(placeholder)
23
+ code_snippets[placeholder] = snippet
24
+ return placeholder
25
+
26
+ new_text = inline_code_pattern.sub(replacer, text)
27
+ return new_text, code_snippets
28
+
29
+
30
+ def telegram_format(text: str) -> str:
31
+ """
32
+ Converts markdown in the provided text to HTML supported by Telegram.
33
+ """
34
+
35
+ # Step 0: Combine blockquotes
36
+ text = combine_blockquotes(text)
37
+
38
+ # Step 1: Convert HTML reserved symbols
39
+ text = convert_html_chars(text)
40
+
41
+ # Step 2: Extract and convert triple-backtick code blocks first
42
+ output, triple_code_blocks = extract_and_convert_code_blocks(text)
43
+
44
+ # Step 2.5: Extract inline code snippets (single backticks) so they won't be parsed as italics, etc.
45
+ output, inline_code_snippets = extract_inline_code_snippets(output)
46
+
47
+ # Step 3: Escape HTML special characters in the output text (for non-code parts)
48
+ # We do NOT want to escape what's inside placeholders here, only what's outside code placeholders.
49
+ output = output.replace("<", "&lt;").replace(">", "&gt;")
50
+
51
+ # Convert headings (H1-H6)
52
+ output = re.sub(r"^(#{1,6})\s+(.+)$", r"<b>\2</b>", output, flags=re.MULTILINE)
53
+
54
+ # Convert unordered lists (do this before italic detection so that leading '*' is recognized as bullet)
55
+ output = re.sub(r"^(\s*)[\-\*]\s+(.+)$", r"\1• \2", output, flags=re.MULTILINE)
56
+
57
+ # Remove this old inline code replacement — now handled by extract_inline_code_snippets()
58
+ # output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
59
+
60
+ # Nested Bold and Italic
61
+ output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
62
+ output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
63
+
64
+ # Process markdown for bold (**), underline (__), strikethrough (~~)
65
+ output = split_by_tag(output, "**", "b")
66
+ output = split_by_tag(output, "__", "u")
67
+ output = split_by_tag(output, "~~", "s")
68
+
69
+ # Custom approach for single-asterisk italic
70
+ italic_pattern = re.compile(
71
+ r"(?<![A-Za-z0-9])\*(?=[^\s])(.*?)(?<!\s)\*(?![A-Za-z0-9])",
72
+ re.DOTALL
73
+ )
74
+ output = italic_pattern.sub(r"<i>\1</i>", output)
75
+
76
+ # Process single underscore-based italic
77
+ output = split_by_tag(output, "_", "i")
78
+
79
+ # Remove storage links (Vector storage placeholders like 【4:0†source】)
80
+ output = re.sub(r"【[^】]+】", "", output)
81
+
82
+ # Convert Markdown links/images to <a href="">…</a>
83
+ link_pattern = r"(?:!?)\[((?:[^\[\]]|\[.*?\])*)\]\(([^)]+)\)"
84
+ output = re.sub(link_pattern, r'<a href="\2">\1</a>', output)
85
+
86
+ # Step 3.5: Reinsert inline code snippets, escaping special chars in code content
87
+ for placeholder, snippet in inline_code_snippets.items():
88
+ escaped_snippet = snippet.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
89
+ output = output.replace(placeholder, f"<code>{escaped_snippet}</code>")
90
+
91
+ # Step 4: Reinsert the converted triple-backtick code blocks
92
+ output = reinsert_code_blocks(output, triple_code_blocks)
93
+
94
+ # Step 5: Remove blockquote escaping
95
+ output = remove_blockquote_escaping(output)
96
+
97
+ # Clean up multiple consecutive newlines, but preserve intentional spacing
98
+ output = re.sub(r"\n{3,}", "\n\n", output)
99
+
100
+ return output.strip()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chatgpt_md_converter
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/Latand/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="chatgpt_md_converter",
5
- version="0.1.2",
5
+ version="0.2.0",
6
6
  author="Kostiantyn Kriuchkov",
7
7
  author_email="latand666@gmail.com",
8
8
  description="A package for converting markdown to HTML for chat Telegram bots",
@@ -1,3 +1,4 @@
1
+ from chatgpt_md_converter.extractors import ensure_closing_delimiters
1
2
  from chatgpt_md_converter.telegram_formatter import telegram_format
2
3
 
3
4
 
@@ -64,7 +65,18 @@ for i in range(3):
64
65
 
65
66
  [Link](http://example.com)
66
67
  """
67
- expected_output = """<b>Heading</b>\nThis is a test of <b>bold</b>, <u>underline</u>, and <code>inline code</code>.\n• Item 1\n• Item 2\n\n<pre><code class="language-python">for i in range(3):\n print(i)\n</code></pre>\n\n<a href="http://example.com">Link</a>\n"""
68
+ expected_output = """
69
+ <b>Heading</b>
70
+ This is a test of <b>bold</b>, <u>underline</u>, and <code>inline code</code>.
71
+ • Item 1
72
+ • Item 2
73
+
74
+ <pre><code class="language-python">for i in range(3):
75
+ print(i)
76
+ </code></pre>
77
+
78
+ <a href="http://example.com">Link</a>
79
+ """
68
80
  output = telegram_format(input_text)
69
81
  assert (
70
82
  output.strip() == expected_output.strip()
@@ -129,7 +141,6 @@ def test_code_block_within_bold_text():
129
141
 
130
142
  def test_triple_backticks_with_nested_markdown():
131
143
  input_text = "```python\n**bold text** and __underline__ in code block```"
132
- # Expecting the markdown syntax to be ignored within the code block
133
144
  expected_output = '<pre><code class="language-python">**bold text** and __underline__ in code block</code></pre>'
134
145
  output = telegram_format(input_text)
135
146
  assert (
@@ -139,7 +150,6 @@ def test_triple_backticks_with_nested_markdown():
139
150
 
140
151
  def test_unmatched_code_delimiters():
141
152
  input_text = "This has an `unmatched code delimiter."
142
- # Expecting original input as output due to the unmatched delimiter
143
153
  expected_output = "This has an <code>unmatched code delimiter.</code>"
144
154
  output = telegram_format(input_text)
145
155
  assert output == expected_output, "Failed handling unmatched code delimiters"
@@ -281,7 +291,7 @@ def test_md_large_example():
281
291
  - Item 2
282
292
  - Subitem 1
283
293
  - Subitem 2
284
-
294
+
285
295
  - **Ordered List:**
286
296
 
287
297
  1. First item
@@ -349,7 +359,7 @@ def example_function():
349
359
  • Item 2
350
360
  • Subitem 1
351
361
  • Subitem 2
352
-
362
+
353
363
  • <b>Ordered List:</b>
354
364
 
355
365
  1. First item
@@ -396,3 +406,253 @@ Here is some <code>inline code</code>.
396
406
  assert (
397
407
  output.strip() == expected_output.strip()
398
408
  ), "Failed handling large markdown example"
409
+
410
+
411
+ def test_unclosed_single_backtick():
412
+ """Test that a single unclosed backtick is properly handled"""
413
+ text = "Here is some `code without closing"
414
+ result = ensure_closing_delimiters(text)
415
+ assert result == "Here is some `code without closing`"
416
+
417
+
418
+ def test_unclosed_triple_backtick():
419
+ """Test that unclosed triple backticks are properly handled"""
420
+ text = "Here is some ```code without closing"
421
+ result = ensure_closing_delimiters(text)
422
+ assert result == "Here is some ```code without closing```"
423
+
424
+
425
+ def test_bracket_link_with_additional_text():
426
+ """
427
+ Ensures that text like '[OtherText] [Title](Link)' doesn't
428
+ merge 'OtherText' and 'Title' into the <a> tag text.
429
+ """
430
+ input_text = "[OtherText] [Title](https://example.com)"
431
+ output = telegram_format(input_text)
432
+ expected_output = '[OtherText] <a href="https://example.com">Title</a>'
433
+ assert output == expected_output, f"Output was: {output}"
434
+
435
+
436
+ def test_heading_formatting_with_newlines():
437
+ """
438
+ Checks that headings #, ##, etc. are properly wrapped in <b> tags.
439
+ """
440
+ input_text = """# Heading1
441
+ Some text
442
+ ## Heading2
443
+ More text
444
+ """
445
+ output = telegram_format(input_text)
446
+ lines = output.splitlines()
447
+
448
+ assert "<b>Heading1</b>" in output
449
+ assert "<b>Heading2</b>" in output
450
+ assert lines[0] == "<b>Heading1</b>"
451
+ assert lines[1] == "Some text"
452
+ assert lines[2] == "<b>Heading2</b>"
453
+ assert lines[3] == "More text"
454
+
455
+
456
+ def test_list_formatting_with_newlines():
457
+ """
458
+ Checks that list items (starting with '-' or '*') become bullet points,
459
+ each on its own line with proper spacing.
460
+ """
461
+ input_text = """- Item one
462
+ - Item two
463
+ * Item three
464
+ Some text
465
+ - Item four"""
466
+ output = telegram_format(input_text)
467
+ lines = [line.strip() for line in output.splitlines() if line.strip()]
468
+
469
+ assert "• Item one" in lines
470
+ assert "• Item two" in lines
471
+ assert "• Item three" in lines
472
+ assert "• Item four" in lines
473
+ assert "Some text" in lines
474
+
475
+ bullet_lines = [line for line in lines if line.startswith("•")]
476
+ assert len(bullet_lines) == 4
477
+ assert bullet_lines[0] == "• Item one"
478
+ assert bullet_lines[1] == "• Item two"
479
+ assert bullet_lines[2] == "• Item three"
480
+ assert bullet_lines[3] == "• Item four"
481
+
482
+
483
+ def test_preserve_other_brackets():
484
+ """
485
+ Ensures that other bracketed text not forming a valid link is preserved literally.
486
+ """
487
+ input_text = "Look at [this], but [not a link] something else."
488
+ output = telegram_format(input_text)
489
+ assert "[this]" in output
490
+ assert "[not a link]" in output
491
+ assert "<a href=" not in output
492
+
493
+
494
+ def test_link_with_nested_brackets():
495
+ """Test that links with nested brackets in the text are handled correctly"""
496
+ input_text = "[Link [with brackets]](https://example.com)"
497
+ output = telegram_format(input_text)
498
+ expected_output = '<a href="https://example.com">Link [with brackets]</a>'
499
+ assert output == expected_output, f"Output was: {output}"
500
+
501
+
502
+ def test_link_with_spaces():
503
+ """Test that links with spaces are handled correctly"""
504
+ input_text = "[OtherText] [Title](Link)"
505
+ output = telegram_format(input_text)
506
+ expected_output = '[OtherText] <a href="Link">Title</a>'
507
+ assert output == expected_output, f"Output was: {output}"
508
+
509
+
510
+ def test_ukrainian_bullet_points():
511
+ input_text = """Звісно, ось список цікавих речей у форматі Markdown:
512
+
513
+ * **Парадокс кота Шредінгера:** Чи може кіт бути одночасно живим і мертвим? 🤔
514
+ * **Ефект метелика:** Маленька зміна може мати великі наслідки. 🦋
515
+ * **Теорія струн:** Чи є наш всесвіт просто вібрацією струн? 🎶
516
+ * **Темна матерія та темна енергія:** Що складає 95% всесвіту? 🌌
517
+ * **Квантова заплутаність:** Чи можуть два об'єкти бути зв'язані на відстані? 🔗
518
+ * **Соліпсизм:** Чи існує щось, крім моєї свідомості? 🤨
519
+ * **Парадокс Фермі:** Де всі інші інопланетяни? 👽
520
+ * **Симуляційна гіпотеза:** Чи живемо ми в симуляції? 💻
521
+ * **Ефект Даннінга-Крюгера:** Чому некомпетентні люди переоцінюють себе? 🤓
522
+ * **Когнітивні спотворення:** Як наш мозок обманює нас? 🤯
523
+ """
524
+
525
+ expected_output = """Звісно, ось список цікавих речей у форматі Markdown:
526
+
527
+ • <b>Парадокс кота Шредінгера:</b> Чи може кіт бути одночасно живим і мертвим? 🤔
528
+ • <b>Ефект метелика:</b> Маленька зміна може мати великі наслідки. 🦋
529
+ • <b>Теорія струн:</b> Чи є наш всесвіт просто вібрацією струн? 🎶
530
+ • <b>Темна матерія та темна енергія:</b> Що складає 95% всесвіту? 🌌
531
+ • <b>Квантова заплутаність:</b> Чи можуть два об'єкти бути зв'язані на відстані? 🔗
532
+ • <b>Соліпсизм:</b> Чи існує щось, крім моєї свідомості? 🤨
533
+ • <b>Парадокс Фермі:</b> Де всі інші інопланетяни? 👽
534
+ • <b>Симуляційна гіпотеза:</b> Чи живемо ми в симуляції? 💻
535
+ • <b>Ефект Даннінга-Крюгера:</b> Чому некомпетентні люди переоцінюють себе? 🤓
536
+ • <b>Когнітивні спотворення:</b> Як наш мозок обманює нас? 🤯
537
+ """
538
+
539
+ output = telegram_format(input_text)
540
+ print(output)
541
+ assert output.strip() == expected_output.strip()
542
+
543
+
544
+ def test_asterisk_in_equations():
545
+ """Test that asterisks in mathematical equations are not converted to italic"""
546
+ test_cases = [
547
+ ("2 * 2 = 4", "2 * 2 = 4"),
548
+ ("x*y + z = 10", "x*y + z = 10"),
549
+ ("a * b * c", "a * b * c"),
550
+ ("2*x + 3*y = z", "2*x + 3*y = z"),
551
+ ("This is *italic* but 2 * 2 is not", "This is <i>italic</i> but 2 * 2 is not"),
552
+ ("5 * x + *emphasized* text", "5 * x + <i>emphasized</i> text"),
553
+ ]
554
+
555
+ for input_text, expected_output in test_cases:
556
+ output = telegram_format(input_text)
557
+ assert (
558
+ output == expected_output
559
+ ), f"Failed on input: {input_text}, got: {output}"
560
+
561
+
562
+ def test_complex_equations_with_asterisk():
563
+ """Test more complex mathematical expressions with asterisks"""
564
+ input_text = """The formula is:
565
+ f(x) = 2*x + 3*y
566
+ g(x) = x * (y + z)
567
+ This is *italic* text with equation 2 * 2 = 4
568
+ """
569
+ expected_output = """The formula is:
570
+ f(x) = 2*x + 3*y
571
+ g(x) = x * (y + z)
572
+ This is <i>italic</i> text with equation 2 * 2 = 4"""
573
+
574
+ output = telegram_format(input_text)
575
+ assert output.strip() == expected_output.strip(), f"Output was: {output}"
576
+
577
+
578
+ # ----------------------------------------------------------------------------------------
579
+ # New, more comprehensive and edge-case test methods begin here
580
+ # ----------------------------------------------------------------------------------------
581
+
582
+
583
+ def test_empty_string():
584
+ """Check behavior with an empty string."""
585
+ input_text = ""
586
+ output = telegram_format(input_text)
587
+ assert output == ""
588
+
589
+
590
+ def test_spaces_only():
591
+ """Check behavior with a string that has only spaces."""
592
+ input_text = " "
593
+ output = telegram_format(input_text)
594
+ # Should either remain blank or just be those spaces (strip() might remove them)
595
+ assert output.strip() == ""
596
+
597
+
598
+ def test_asterisk_in_parentheses():
599
+ """Edge case with asterisk in parentheses."""
600
+ input_text = "(2*3) is an equation, but *italic* text is separate."
601
+ expected_output = "(2*3) is an equation, but <i>italic</i> text is separate."
602
+ output = telegram_format(input_text)
603
+ assert output == expected_output
604
+
605
+
606
+ def test_underscore_in_non_italic_context():
607
+ """Edge case with underscores that should not convert to italic."""
608
+ input_text = "This_variable should remain, but _italic_ should convert."
609
+ expected_output = "This_variable should remain, but <i>italic</i> should convert."
610
+ output = telegram_format(input_text)
611
+ assert output == expected_output
612
+
613
+
614
+ def test_code_block_mixed_with_unescaped_html():
615
+ """Ensure code block remains escaped but outside text is processed normally."""
616
+ input_text = """
617
+ Some <div>stuff</div> here.
618
+ ```
619
+ <html><body>Unescaped?</body></html>
620
+ ```
621
+ More text with *italic*.
622
+ """
623
+ expected_output = """
624
+ Some &lt;div&gt;stuff&lt;/div&gt; here.
625
+ <pre><code>&lt;html&gt;&lt;body&gt;Unescaped?&lt;/body&gt;&lt;/html&gt;
626
+ </code></pre>
627
+ More text with <i>italic</i>.
628
+ """
629
+ output = telegram_format(input_text)
630
+ assert output.strip() == expected_output.strip()
631
+
632
+
633
+ def test_equation_with_asterisks_and_italics_combined():
634
+ """More advanced check: combine equations and true italics side by side."""
635
+ input_text = "2*x + 3*y = 10, and *italic* is separate."
636
+ expected_output = "2*x + 3*y = 10, and <i>italic</i> is separate."
637
+ output = telegram_format(input_text)
638
+ assert output == expected_output
639
+
640
+
641
+ def test_inline_code_with_asterisk_and_underscore():
642
+ """Ensure that `*` and `_` inside inline code are not interpreted as markdown."""
643
+ input_text = "Here is `code_with_*_asterisk` outside of `code_with__underscore__`"
644
+ expected_output = "Here is <code>code_with_*_asterisk</code> outside of <code>code_with__underscore__</code>"
645
+ output = telegram_format(input_text)
646
+ assert output == expected_output
647
+
648
+
649
+ def test_heading_followed_by_equation():
650
+ """Check heading usage right before an equation line."""
651
+ input_text = """# MyHeading
652
+ 2*x + y = 4
653
+ """
654
+ # Heading should become <b>MyHeading</b>, equation line remains as is
655
+ expected_output = """<b>MyHeading</b>
656
+ 2*x + y = 4"""
657
+ output = telegram_format(input_text)
658
+ assert output.strip() == expected_output.strip(), f"Got: {output}"
@@ -1,57 +0,0 @@
1
- import re
2
- from .converters import convert_html_chars, split_by_tag
3
- from .extractors import extract_and_convert_code_blocks, reinsert_code_blocks
4
- from .formatters import combine_blockquotes
5
- from .helpers import remove_blockquote_escaping
6
-
7
-
8
- def telegram_format(text: str) -> str:
9
- """
10
- Converts markdown in the provided text to HTML supported by Telegram.
11
- """
12
- # Step 0: Combine blockquotes
13
- text = combine_blockquotes(text)
14
-
15
- # Step 1: Convert HTML reserved symbols
16
- text = convert_html_chars(text)
17
-
18
- # Step 2: Extract and convert code blocks first
19
- output, code_blocks = extract_and_convert_code_blocks(text)
20
-
21
- # Step 3: Escape HTML special characters in the output text
22
- output = output.replace("<", "&lt;").replace(">", "&gt;")
23
-
24
- # Inline code
25
- output = re.sub(r"`(.*?)`", r"<code>\1</code>", output)
26
-
27
- # Nested Bold and Italic
28
- output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
29
- output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
30
-
31
- # Process markdown formatting tags (bold, underline, italic, strikethrough)
32
- # and convert them to their respective HTML tags
33
- output = split_by_tag(output, "**", "b")
34
- output = split_by_tag(output, "__", "u")
35
- output = split_by_tag(output, "_", "i")
36
- output = split_by_tag(output, "*", "i")
37
- output = split_by_tag(output, "~~", "s")
38
-
39
- # Remove storage links
40
- output = re.sub(r"【[^】]+】", "", output)
41
-
42
- # Convert links
43
- output = re.sub(r"!?\[(.*?)\]\((.*?)\)", r'<a href="\2">\1</a>', output)
44
-
45
- # Convert headings
46
- output = re.sub(r"^\s*#+ (.+)", r"<b>\1</b>", output, flags=re.MULTILINE)
47
-
48
- # Convert unordered lists, preserving indentation
49
- output = re.sub(r"^(\s*)[\-\*] (.+)", r"\1• \2", output, flags=re.MULTILINE)
50
-
51
- # Step 4: Reinsert the converted HTML code blocks
52
- output = reinsert_code_blocks(output, code_blocks)
53
-
54
- # Step 5: Remove blockquote escaping
55
- output = remove_blockquote_escaping(output)
56
-
57
- return output