chatgpt-md-converter 0.3.9__tar.gz → 0.3.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/PKG-INFO +1 -1
  2. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_markdown/handlers.py +67 -6
  3. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/code_blocks.py +1 -1
  4. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/inline.py +1 -1
  5. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
  6. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter.egg-info/SOURCES.txt +1 -0
  7. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/setup.py +1 -1
  8. chatgpt_md_converter-0.3.11/tests/test_html_to_markdown_inline_spacing.py +25 -0
  9. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/tests/test_parser.py +60 -0
  10. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/LICENSE +0 -0
  11. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/README.md +0 -0
  12. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/__init__.py +0 -0
  13. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_markdown/escaping.py +0 -0
  14. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_markdown/renderer.py +0 -0
  15. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_markdown/state.py +0 -0
  16. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_markdown/tree.py +0 -0
  17. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_splitter.py +0 -0
  18. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/html_to_markdown.py +0 -0
  19. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_formatter.py +0 -0
  20. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/__init__.py +0 -0
  21. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/postprocess.py +0 -0
  22. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/preprocess.py +0 -0
  23. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter/telegram_markdown/renderer.py +0 -0
  24. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
  25. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
  26. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/setup.cfg +0 -0
  27. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/tests/test_roundtrip_markdown.py +0 -0
  28. {chatgpt_md_converter-0.3.9 → chatgpt_md_converter-0.3.11}/tests/test_splitter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -35,30 +35,91 @@ def render_node(node: Node, state: RenderState) -> str:
35
35
  return render_nodes(node.children, state)
36
36
 
37
37
 
38
+ def _split_surrounding_whitespace(text: str) -> tuple[str, str, str]:
39
+ """Return leading whitespace, core text, and trailing whitespace."""
40
+
41
+ start = 0
42
+ end = len(text)
43
+
44
+ while start < end and text[start].isspace():
45
+ start += 1
46
+
47
+ while end > start and text[end - 1].isspace():
48
+ end -= 1
49
+
50
+ return text[:start], text[start:end], text[end:]
51
+
52
+
53
+ def _italic_boundary_conflict(marker: str, core: str) -> bool:
54
+ if marker == "*":
55
+ return core.startswith("*") or core.endswith("*")
56
+
57
+ if marker == "_":
58
+ starts = core.startswith("_")
59
+ if starts and len(core) > 1 and core[1] == "_":
60
+ starts = False
61
+
62
+ ends = core.endswith("_")
63
+ if ends and len(core) > 1 and core[-2] == "_":
64
+ ends = False
65
+
66
+ return starts or ends
67
+
68
+ return False
69
+
70
+
71
+ def _choose_italic_marker(state: RenderState, core: str) -> str:
72
+ depth = state.italic_depth
73
+
74
+ if state.bold_depth > 0 and depth == 0:
75
+ candidates = ["_", "*"]
76
+ elif depth % 2 == 0:
77
+ candidates = ["*", "_"]
78
+ else:
79
+ candidates = ["_", "*"]
80
+
81
+ for marker in candidates:
82
+ if not _italic_boundary_conflict(marker, core):
83
+ return marker
84
+
85
+ return candidates[0]
86
+
87
+
38
88
  def _handle_bold(node: Node, state: RenderState) -> str:
39
89
  inner_state = state.child(bold_depth=state.bold_depth + 1)
40
90
  inner = render_nodes(node.children, inner_state)
41
- return f"**{inner}**"
91
+ leading, core, trailing = _split_surrounding_whitespace(inner)
92
+ if not core:
93
+ return leading + trailing
94
+ return f"{leading}**{core}**{trailing}"
42
95
 
43
96
 
44
97
  def _handle_italic(node: Node, state: RenderState) -> str:
45
98
  depth = state.italic_depth
46
- in_bold = state.bold_depth > 0 and depth == 0
47
- marker = "_" if in_bold else ("*" if depth % 2 == 0 else "_")
48
99
  inner_state = state.child(italic_depth=depth + 1)
49
100
  inner = render_nodes(node.children, inner_state)
50
- return f"{marker}{inner}{marker}"
101
+ leading, core, trailing = _split_surrounding_whitespace(inner)
102
+ if not core:
103
+ return leading + trailing
104
+ marker = _choose_italic_marker(state, core)
105
+ return f"{leading}{marker}{core}{marker}{trailing}"
51
106
 
52
107
 
53
108
  def _handle_inline_marker(node: Node, state: RenderState) -> str:
54
109
  marker_open, marker_close = _INLINE_MARKERS[node.tag.lower()]
55
110
  inner = render_nodes(node.children, state)
56
- return f"{marker_open}{inner}{marker_close}"
111
+ leading, core, trailing = _split_surrounding_whitespace(inner)
112
+ if not core:
113
+ return leading + trailing
114
+ return f"{leading}{marker_open}{core}{marker_close}{trailing}"
57
115
 
58
116
 
59
117
  def _handle_spoiler(node: Node, state: RenderState) -> str:
60
118
  inner = render_nodes(node.children, state)
61
- return f"||{inner}||"
119
+ leading, core, trailing = _split_surrounding_whitespace(inner)
120
+ if not core:
121
+ return leading + trailing
122
+ return f"{leading}||{core}||{trailing}"
62
123
 
63
124
 
64
125
  def _handle_code(node: Node, state: RenderState) -> str:
@@ -67,7 +67,7 @@ def extract_and_convert_code_blocks(text: str):
67
67
  .replace("<", "&lt;")
68
68
  .replace(">", "&gt;")
69
69
  )
70
- placeholder = f"CODEBLOCKPLACEHOLDER{len(placeholders)}"
70
+ placeholder = f"CODEBLOCKPLACEHOLDER_{len(placeholders)}_"
71
71
  placeholders.append(placeholder)
72
72
  if language:
73
73
  html_block = f'<pre><code class="language-{language}">{escaped}</code></pre>'
@@ -60,7 +60,7 @@ def extract_inline_code_snippets(text: str):
60
60
 
61
61
  def replacer(match: re.Match[str]) -> str:
62
62
  snippet = match.group(1)
63
- placeholder = f"INLINECODEPLACEHOLDER{len(placeholders)}"
63
+ placeholder = f"INLINECODEPLACEHOLDER_{len(placeholders)}_"
64
64
  placeholders.append(placeholder)
65
65
  snippets[placeholder] = snippet
66
66
  return placeholder
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatgpt_md_converter
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: A package for converting markdown to HTML for chat Telegram bots
5
5
  Home-page: https://github.com/botfather-dev/formatter-chatgpt-telegram
6
6
  Author: Kostiantyn Kriuchkov
@@ -20,6 +20,7 @@ chatgpt_md_converter/telegram_markdown/inline.py
20
20
  chatgpt_md_converter/telegram_markdown/postprocess.py
21
21
  chatgpt_md_converter/telegram_markdown/preprocess.py
22
22
  chatgpt_md_converter/telegram_markdown/renderer.py
23
+ tests/test_html_to_markdown_inline_spacing.py
23
24
  tests/test_parser.py
24
25
  tests/test_roundtrip_markdown.py
25
26
  tests/test_splitter.py
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="chatgpt_md_converter",
5
- version="0.3.9",
5
+ version="0.3.11",
6
6
  author="Kostiantyn Kriuchkov",
7
7
  author_email="latand666@gmail.com",
8
8
  description="A package for converting markdown to HTML for chat Telegram bots",
@@ -0,0 +1,25 @@
1
+ import pytest
2
+
3
+ from chatgpt_md_converter import html_to_telegram_markdown
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ ("html", "expected"),
8
+ [
9
+ ("Start <b>bold </b>finish", "Start **bold** finish"),
10
+ ("Start <b> bold</b> finish", "Start **bold** finish"),
11
+ ("Start <i> italics </i>finish", "Start _italics_ finish"),
12
+ ("Start <i>value_</i>end", "Start *value_*end"),
13
+ ("Start <u> underline </u>finish", "Start __underline__ finish"),
14
+ (
15
+ "Start <span class=\"tg-spoiler\"> secret </span>end",
16
+ "Start ||secret|| end",
17
+ ),
18
+ (
19
+ "Intro <b>bold <i> inner </i> block</b> outro",
20
+ "Intro **bold _inner_ block** outro",
21
+ ),
22
+ ],
23
+ )
24
+ def test_html_to_markdown_strips_inline_whitespace(html: str, expected: str) -> None:
25
+ assert html_to_telegram_markdown(html) == expected
@@ -800,6 +800,66 @@ print("hello world ```")
800
800
  assert output == expected_output, show_output()
801
801
 
802
802
 
803
+ def test_inline_code_placeholders_do_not_overlap():
804
+ input_text = """Службова нотатка для тесту.
805
+
806
+ Коли ви запускаєте `alpha.run()`, система піднімає локальний клієнт.
807
+
808
+ У модулі використовується `hook.set()` для реєстрації синхронізації.
809
+
810
+ ```python
811
+ from framework import hook
812
+
813
+ async def configure(base_url: str):
814
+ await hook.set(f"{base_url}/sync")
815
+ ```
816
+
817
+ **Покроковий план**
818
+
819
+ 1. Викликаємо `hook.set()` через менеджер потоків.
820
+ 2. `hook.set()` повертає попередження при повторній реєстрації.
821
+ 3. Якщо потрібно, `hook.clear()` знімає прив'язку.
822
+ 4. Використовуємо `core.loop()` для довготривалих з'єднань.
823
+ 5. `hook.set()` запускає фонову синхронізацію.
824
+
825
+ Поточне середовище потребує **TLS**. Для локального доступу підходить `debug.tunnel`.
826
+
827
+ Чи є питання щодо `hook.set()` чи `hook.clear()`?"""
828
+
829
+ expected_output = """Службова нотатка для тесту.
830
+
831
+ Коли ви запускаєте <code>alpha.run()</code>, система піднімає локальний клієнт.
832
+
833
+ У модулі використовується <code>hook.set()</code> для реєстрації синхронізації.
834
+
835
+ <pre><code class="language-python">from framework import hook
836
+
837
+ async def configure(base_url: str):
838
+ await hook.set(f"{base_url}/sync")
839
+ </code></pre>
840
+
841
+ <b>Покроковий план</b>
842
+
843
+ 1. Викликаємо <code>hook.set()</code> через менеджер потоків.
844
+ 2. <code>hook.set()</code> повертає попередження при повторній реєстрації.
845
+ 3. Якщо потрібно, <code>hook.clear()</code> знімає прив'язку.
846
+ 4. Використовуємо <code>core.loop()</code> для довготривалих з'єднань.
847
+ 5. <code>hook.set()</code> запускає фонову синхронізацію.
848
+
849
+ Поточне середовище потребує <b>TLS</b>. Для локального доступу підходить <code>debug.tunnel</code>.
850
+
851
+ Чи є питання щодо <code>hook.set()</code> чи <code>hook.clear()</code>?"""
852
+
853
+ output = telegram_format(input_text)
854
+
855
+ assert output == expected_output
856
+ assert "<code>hook.set()</code>0" not in output
857
+ assert "<code>hook.set()</code>1" not in output
858
+ assert "<code>hook.set()</code>2" not in output
859
+ assert "<code>hook.set()</code>3" not in output
860
+ assert "<code>hook.set()</code>4" not in output
861
+
862
+
803
863
  def test_nested_code_fence_six_backticks():
804
864
  input_text = """``````markdown
805
865
  `````python