django-tgcms 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. django_tgcms-0.1.0/.gitignore +19 -0
  2. django_tgcms-0.1.0/LICENSE +21 -0
  3. django_tgcms-0.1.0/PKG-INFO +203 -0
  4. django_tgcms-0.1.0/README.md +179 -0
  5. django_tgcms-0.1.0/pyproject.toml +39 -0
  6. django_tgcms-0.1.0/tests/__init__.py +0 -0
  7. django_tgcms-0.1.0/tests/settings.py +9 -0
  8. django_tgcms-0.1.0/tests/test_formatting.py +101 -0
  9. django_tgcms-0.1.0/tests/test_models.py +122 -0
  10. django_tgcms-0.1.0/tgcms/__init__.py +6 -0
  11. django_tgcms-0.1.0/tgcms/admin.py +60 -0
  12. django_tgcms-0.1.0/tgcms/apps.py +7 -0
  13. django_tgcms-0.1.0/tgcms/conf.py +21 -0
  14. django_tgcms-0.1.0/tgcms/formatting/__init__.py +16 -0
  15. django_tgcms-0.1.0/tgcms/formatting/entities.py +31 -0
  16. django_tgcms-0.1.0/tgcms/formatting/html.py +181 -0
  17. django_tgcms-0.1.0/tgcms/forms.py +60 -0
  18. django_tgcms-0.1.0/tgcms/management/__init__.py +0 -0
  19. django_tgcms-0.1.0/tgcms/management/commands/__init__.py +0 -0
  20. django_tgcms-0.1.0/tgcms/management/commands/send_post.py +127 -0
  21. django_tgcms-0.1.0/tgcms/migrations/0001_initial.py +30 -0
  22. django_tgcms-0.1.0/tgcms/migrations/0002_remove_post_entities_remove_post_text_and_more.py +44 -0
  23. django_tgcms-0.1.0/tgcms/migrations/0003_block_file_url_block_telegram_file_id.py +23 -0
  24. django_tgcms-0.1.0/tgcms/migrations/0004_mediaasset_remove_block_file_remove_block_file_url_and_more.py +45 -0
  25. django_tgcms-0.1.0/tgcms/migrations/__init__.py +0 -0
  26. django_tgcms-0.1.0/tgcms/models.py +123 -0
  27. django_tgcms-0.1.0/tgcms/static/tgcms/block_inline.js +214 -0
  28. django_tgcms-0.1.0/tgcms/static/tgcms/editor.css +132 -0
  29. django_tgcms-0.1.0/tgcms/static/tgcms/editor.js +81 -0
  30. django_tgcms-0.1.0/tgcms/templates/tgcms/base.html +17 -0
  31. django_tgcms-0.1.0/tgcms/templates/tgcms/post_form.html +52 -0
  32. django_tgcms-0.1.0/tgcms/templates/tgcms/post_list.html +18 -0
  33. django_tgcms-0.1.0/tgcms/templates/tgcms/post_published.html +12 -0
  34. django_tgcms-0.1.0/tgcms/urls.py +13 -0
  35. django_tgcms-0.1.0/tgcms/views.py +45 -0
  36. django_tgcms-0.1.0/tgcms/widgets.py +46 -0
@@ -0,0 +1,19 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # uv
10
+ .venv/
11
+
12
+ # Django
13
+ *.sqlite3
14
+ local_settings.py
15
+
16
+ # Tooling
17
+ .idea/
18
+ .vscode/
19
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ddnsupp
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: django-tgcms
3
+ Version: 0.1.0
4
+ Summary: Django reusable app: a Telegram post constructor that outputs Bot API-ready {text, entities} payloads.
5
+ Project-URL: Homepage, https://gitlab.com/ddnsupp/django-tgcms
6
+ Author: ddnsupp
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: bot-api,cms,django,entities,telegram
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Framework :: Django
12
+ Classifier: Framework :: Django :: 4.2
13
+ Classifier: Framework :: Django :: 5.0
14
+ Classifier: Framework :: Django :: 5.1
15
+ Classifier: Framework :: Django :: 5.2
16
+ Classifier: Framework :: Django :: 6.0
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: django>=4.2
23
+ Description-Content-Type: text/markdown
24
+
25
+ # django-tgcms
26
+
27
+ A reusable Django app for building Telegram posts. Compose posts from blocks
28
+ (heading, formatted text, photo, video) using a WYSIWYG editor embedded in
29
+ Django admin. `Post.render()` returns a Bot API-ready payload — sending is
30
+ entirely up to you.
31
+
32
+ **No dependencies beyond Django. No bot logic, no HTTP calls.**
33
+
34
+ ---
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install django-tgcms
40
+ # or
41
+ uv add django-tgcms
42
+ ```
43
+
44
+ `settings.py`:
45
+
46
+ ```python
47
+ INSTALLED_APPS = [
48
+ ...
49
+ "tgcms",
50
+ ]
51
+
52
+ # Optional — only needed for the send_post test command
53
+ TGCMS = {
54
+ "BOT_TOKEN": env("YOUR_BOT_TOKEN"), # map whatever name your project uses
55
+ }
56
+ ```
57
+
58
+ `urls.py` (only if you need the built-in views):
59
+
60
+ ```python
61
+ urlpatterns += [
62
+ path("tg/", include("tgcms.urls")),
63
+ ]
64
+ ```
65
+
66
+ Run migrations:
67
+
68
+ ```bash
69
+ python manage.py migrate
70
+ ```
71
+
72
+ ---
73
+
74
+ ## Content — Django admin
75
+
76
+ Posts are edited in the standard Django admin at `/admin/tgcms/post/`.
77
+
78
+ **Block types** (drag-and-drop reordering inside each post):
79
+
80
+ | Type | Stores |
81
+ |---|---|
82
+ | `heading` | Plain text title |
83
+ | `text` | Formatted text — bold, italic, underline, strikethrough, spoiler, code, pre, blockquote, links |
84
+ | `photo` | Media asset + caption |
85
+ | `video` | Media asset + caption |
86
+
87
+ **MediaAsset** (`/admin/tgcms/mediaasset/`) is a shared media registry. One
88
+ asset can be referenced by any number of blocks across any number of posts.
89
+ After the first Telegram send the `telegram_file_id` is cached on the asset —
90
+ subsequent sends reuse it without re-uploading.
91
+
92
+ ---
93
+
94
+ ## Bot integration
95
+
96
+ ```python
97
+ from tgcms.models import Post
98
+
99
+ post = Post.objects.prefetch_related("blocks__media").get(pk=post_id)
100
+ payload = post.render()
101
+ # {
102
+ # "blocks": [
103
+ # {"type": "heading", "text": "Title"},
104
+ # {"type": "text", "text": "Hello!", "entities": [{"type": "bold", "offset": 0, "length": 5}]},
105
+ # {"type": "photo", "media_asset_id": 3, "file": "AgACAgI...", "caption": "..."},
106
+ # ]
107
+ # }
108
+ ```
109
+
110
+ **aiogram broadcast pattern:**
111
+
112
+ ```python
113
+ from asgiref.sync import sync_to_async
114
+
115
+ async def send_post(bot, chat_id: int, post_id: int):
116
+ post = await sync_to_async(
117
+ Post.objects.prefetch_related("blocks__media").get
118
+ )(pk=post_id)
119
+
120
+ for block in post.blocks.all():
121
+ data = block.render()
122
+
123
+ if data["type"] == "heading":
124
+ await bot.send_message(chat_id, f"<b>{data['text']}</b>", parse_mode="HTML")
125
+
126
+ elif data["type"] == "text":
127
+ await bot.send_message(chat_id, data["text"])
128
+
129
+ elif data["type"] == "photo":
130
+ msg = await bot.send_photo(
131
+ chat_id,
132
+ photo=data["file"], # telegram_file_id, S3 URL, or local path
133
+ caption=data.get("caption"),
134
+ )
135
+ # Cache file_id after first upload — all future renders return it
136
+ if block.media and not block.media.telegram_file_id:
137
+ await sync_to_async(block.media.cache_file_id)(msg.photo[-1].file_id)
138
+
139
+ elif data["type"] == "video":
140
+ msg = await bot.send_video(chat_id, video=data["file"], caption=data.get("caption"))
141
+ if block.media and not block.media.telegram_file_id:
142
+ await sync_to_async(block.media.cache_file_id)(msg.video.file_id)
143
+ ```
144
+
145
+ Once `cache_file_id()` is called, `block.media.source` returns the cached
146
+ `telegram_file_id` for every subsequent post that references the same asset.
147
+
148
+ ---
149
+
150
+ ## Testing — management command
151
+
152
+ ```bash
153
+ # Token is read from settings.TGCMS["BOT_TOKEN"] automatically
154
+ python manage.py send_post <post_id> <chat_id>
155
+
156
+ # Or pass it explicitly
157
+ python manage.py send_post 1 @mychannel --token 123456:ABC...
158
+
159
+ # Or via env var
160
+ TELEGRAM_BOT_TOKEN=123456:ABC... python manage.py send_post 1 123456789
161
+ ```
162
+
163
+ Token lookup order: `--token` → `settings.TGCMS["BOT_TOKEN"]` → `TELEGRAM_BOT_TOKEN` env var.
164
+
165
+ ---
166
+
167
+ ## Models
168
+
169
+ ```
170
+ MediaAsset
171
+ file FileField — upload from disk
172
+ file_url URLField — S3 / CDN link
173
+ telegram_file_id Cached after first send (read-only in admin)
174
+ .source Property: returns the best available file reference
175
+ .cache_file_id() Persists telegram_file_id; call once after the first send
176
+
177
+ Post
178
+ title, status draft / published
179
+ .render() Returns {"blocks": [...]}
180
+ .mark_published() Sets status and published_at
181
+
182
+ Block FK → Post, FK → MediaAsset (nullable)
183
+ type heading / text / photo / video
184
+ order Managed by drag-and-drop in admin
185
+ text, entities heading and text blocks
186
+ media FK → MediaAsset, photo and video blocks
187
+ caption, caption_entities
188
+ .render() Returns one block in Bot API format
189
+ ```
190
+
191
+ ---
192
+
193
+ ## UTF-16 offsets
194
+
195
+ `MessageEntity.offset` and `length` are counted in UTF-16 code units, not
196
+ Python characters. Non-BMP characters (e.g. 😀 U+1F600) occupy 2 units, not 1.
197
+ All offset arithmetic in `tgcms.formatting` goes through `utf16_len()`.
198
+
199
+ ---
200
+
201
+ ## License
202
+
203
+ MIT
@@ -0,0 +1,179 @@
1
+ # django-tgcms
2
+
3
+ A reusable Django app for building Telegram posts. Compose posts from blocks
4
+ (heading, formatted text, photo, video) using a WYSIWYG editor embedded in
5
+ Django admin. `Post.render()` returns a Bot API-ready payload — sending is
6
+ entirely up to you.
7
+
8
+ **No dependencies beyond Django. No bot logic, no HTTP calls.**
9
+
10
+ ---
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pip install django-tgcms
16
+ # or
17
+ uv add django-tgcms
18
+ ```
19
+
20
+ `settings.py`:
21
+
22
+ ```python
23
+ INSTALLED_APPS = [
24
+ ...
25
+ "tgcms",
26
+ ]
27
+
28
+ # Optional — only needed for the send_post test command
29
+ TGCMS = {
30
+ "BOT_TOKEN": env("YOUR_BOT_TOKEN"), # map whatever name your project uses
31
+ }
32
+ ```
33
+
34
+ `urls.py` (only if you need the built-in views):
35
+
36
+ ```python
37
+ urlpatterns += [
38
+ path("tg/", include("tgcms.urls")),
39
+ ]
40
+ ```
41
+
42
+ Run migrations:
43
+
44
+ ```bash
45
+ python manage.py migrate
46
+ ```
47
+
48
+ ---
49
+
50
+ ## Content — Django admin
51
+
52
+ Posts are edited in the standard Django admin at `/admin/tgcms/post/`.
53
+
54
+ **Block types** (drag-and-drop reordering inside each post):
55
+
56
+ | Type | Stores |
57
+ |---|---|
58
+ | `heading` | Plain text title |
59
+ | `text` | Formatted text — bold, italic, underline, strikethrough, spoiler, code, pre, blockquote, links |
60
+ | `photo` | Media asset + caption |
61
+ | `video` | Media asset + caption |
62
+
63
+ **MediaAsset** (`/admin/tgcms/mediaasset/`) is a shared media registry. One
64
+ asset can be referenced by any number of blocks across any number of posts.
65
+ After the first Telegram send the `telegram_file_id` is cached on the asset —
66
+ subsequent sends reuse it without re-uploading.
67
+
68
+ ---
69
+
70
+ ## Bot integration
71
+
72
+ ```python
73
+ from tgcms.models import Post
74
+
75
+ post = Post.objects.prefetch_related("blocks__media").get(pk=post_id)
76
+ payload = post.render()
77
+ # {
78
+ # "blocks": [
79
+ # {"type": "heading", "text": "Title"},
80
+ # {"type": "text", "text": "Hello!", "entities": [{"type": "bold", "offset": 0, "length": 5}]},
81
+ # {"type": "photo", "media_asset_id": 3, "file": "AgACAgI...", "caption": "..."},
82
+ # ]
83
+ # }
84
+ ```
85
+
86
+ **aiogram broadcast pattern:**
87
+
88
+ ```python
89
+ from asgiref.sync import sync_to_async
90
+
91
+ async def send_post(bot, chat_id: int, post_id: int):
92
+ post = await sync_to_async(
93
+ Post.objects.prefetch_related("blocks__media").get
94
+ )(pk=post_id)
95
+
96
+ for block in post.blocks.all():
97
+ data = block.render()
98
+
99
+ if data["type"] == "heading":
100
+ await bot.send_message(chat_id, f"<b>{data['text']}</b>", parse_mode="HTML")
101
+
102
+ elif data["type"] == "text":
103
+ await bot.send_message(chat_id, data["text"])
104
+
105
+ elif data["type"] == "photo":
106
+ msg = await bot.send_photo(
107
+ chat_id,
108
+ photo=data["file"], # telegram_file_id, S3 URL, or local path
109
+ caption=data.get("caption"),
110
+ )
111
+ # Cache file_id after first upload — all future renders return it
112
+ if block.media and not block.media.telegram_file_id:
113
+ await sync_to_async(block.media.cache_file_id)(msg.photo[-1].file_id)
114
+
115
+ elif data["type"] == "video":
116
+ msg = await bot.send_video(chat_id, video=data["file"], caption=data.get("caption"))
117
+ if block.media and not block.media.telegram_file_id:
118
+ await sync_to_async(block.media.cache_file_id)(msg.video.file_id)
119
+ ```
120
+
121
+ Once `cache_file_id()` is called, `block.media.source` returns the cached
122
+ `telegram_file_id` for every subsequent post that references the same asset.
123
+
124
+ ---
125
+
126
+ ## Testing — management command
127
+
128
+ ```bash
129
+ # Token is read from settings.TGCMS["BOT_TOKEN"] automatically
130
+ python manage.py send_post <post_id> <chat_id>
131
+
132
+ # Or pass it explicitly
133
+ python manage.py send_post 1 @mychannel --token 123456:ABC...
134
+
135
+ # Or via env var
136
+ TELEGRAM_BOT_TOKEN=123456:ABC... python manage.py send_post 1 123456789
137
+ ```
138
+
139
+ Token lookup order: `--token` → `settings.TGCMS["BOT_TOKEN"]` → `TELEGRAM_BOT_TOKEN` env var.
140
+
141
+ ---
142
+
143
+ ## Models
144
+
145
+ ```
146
+ MediaAsset
147
+ file FileField — upload from disk
148
+ file_url URLField — S3 / CDN link
149
+ telegram_file_id Cached after first send (read-only in admin)
150
+ .source Property: returns the best available file reference
151
+ .cache_file_id() Persists telegram_file_id; call once after the first send
152
+
153
+ Post
154
+ title, status draft / published
155
+ .render() Returns {"blocks": [...]}
156
+ .mark_published() Sets status and published_at
157
+
158
+ Block FK → Post, FK → MediaAsset (nullable)
159
+ type heading / text / photo / video
160
+ order Managed by drag-and-drop in admin
161
+ text, entities heading and text blocks
162
+ media FK → MediaAsset, photo and video blocks
163
+ caption, caption_entities
164
+ .render() Returns one block in Bot API format
165
+ ```
166
+
167
+ ---
168
+
169
+ ## UTF-16 offsets
170
+
171
+ `MessageEntity.offset` and `length` are counted in UTF-16 code units, not
172
+ Python characters. Non-BMP characters (e.g. 😀 U+1F600) occupy 2 units, not 1.
173
+ All offset arithmetic in `tgcms.formatting` goes through `utf16_len()`.
174
+
175
+ ---
176
+
177
+ ## License
178
+
179
+ MIT
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "django-tgcms"
3
+ version = "0.1.0"
4
+ description = "Django reusable app: a Telegram post constructor that outputs Bot API-ready {text, entities} payloads."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = { text = "MIT" }
8
+ authors = [{ name = "ddnsupp" }]
9
+ keywords = ["django", "telegram", "bot-api", "cms", "entities"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Framework :: Django",
13
+ "Framework :: Django :: 4.2",
14
+ "Framework :: Django :: 5.0",
15
+ "Framework :: Django :: 5.1",
16
+ "Framework :: Django :: 5.2",
17
+ "Framework :: Django :: 6.0",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ ]
23
+ dependencies = [
24
+ "Django>=4.2",
25
+ ]
26
+
27
+ [project.urls]
28
+ Homepage = "https://gitlab.com/ddnsupp/django-tgcms"
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["tgcms"]
36
+
37
+ [tool.hatch.build.targets.sdist]
38
+ include = ["tgcms", "tests", "README.md", "LICENSE"]
39
+
File without changes
@@ -0,0 +1,9 @@
1
+ SECRET_KEY = "test-secret-key"
2
+ DATABASES = {"default": {"ENGINE": "django.db.backends.sqlite3", "NAME": ":memory:"}}
3
+ INSTALLED_APPS = [
4
+ "django.contrib.contenttypes",
5
+ "django.contrib.auth",
6
+ "tgcms",
7
+ ]
8
+ DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
9
+ USE_TZ = True
@@ -0,0 +1,101 @@
1
+ import unittest
2
+
3
+ from tgcms.formatting import html_to_text_entities, text_entities_to_html, utf16_len
4
+
5
+
6
+ class UTF16LenTests(unittest.TestCase):
7
+ def test_ascii(self):
8
+ self.assertEqual(utf16_len("hello"), 5)
9
+
10
+ def test_bmp_non_ascii(self):
11
+ # Cyrillic chars are BMP -> 1 code unit each.
12
+ self.assertEqual(utf16_len("привет"), 6)
13
+
14
+ def test_non_bmp_emoji_is_two_units(self):
15
+ # U+1F600 is a surrogate pair -> 2 UTF-16 code units.
16
+ self.assertEqual(utf16_len("\U0001f600"), 2)
17
+ self.assertEqual(utf16_len("a\U0001f600b"), 4)
18
+
19
+
20
+ class HtmlToEntitiesTests(unittest.TestCase):
21
+ def test_basic_bold(self):
22
+ text, entities = html_to_text_entities("<b>hi</b> there")
23
+ self.assertEqual(text, "hi there")
24
+ self.assertEqual(entities, [{"type": "bold", "offset": 0, "length": 2}])
25
+
26
+ def test_offsets_after_emoji_are_utf16(self):
27
+ # The emoji occupies 2 UTF-16 units, so "bold" must start at offset 3.
28
+ text, entities = html_to_text_entities("a\U0001f600<b>bold</b>")
29
+ self.assertEqual(text, "a\U0001f600bold")
30
+ self.assertEqual(entities, [{"type": "bold", "offset": 3, "length": 4}])
31
+
32
+ def test_emoji_inside_entity_length_is_utf16(self):
33
+ text, entities = html_to_text_entities("<b>x\U0001f600y</b>")
34
+ self.assertEqual(entities, [{"type": "bold", "offset": 0, "length": 4}])
35
+
36
+ def test_text_link(self):
37
+ text, entities = html_to_text_entities('<a href="https://t.me">t.me</a>')
38
+ self.assertEqual(text, "t.me")
39
+ self.assertEqual(
40
+ entities,
41
+ [{"type": "text_link", "offset": 0, "length": 4, "url": "https://t.me"}],
42
+ )
43
+
44
+ def test_nested_entities(self):
45
+ text, entities = html_to_text_entities("<b><i>x</i></b>")
46
+ self.assertEqual(text, "x")
47
+ types = sorted(e["type"] for e in entities)
48
+ self.assertEqual(types, ["bold", "italic"])
49
+ self.assertTrue(all(e["offset"] == 0 and e["length"] == 1 for e in entities))
50
+
51
+ def test_spoiler_span(self):
52
+ text, entities = html_to_text_entities('<span class="tg-spoiler">s</span>')
53
+ self.assertEqual(entities, [{"type": "spoiler", "offset": 0, "length": 1}])
54
+
55
+ def test_pre_with_language(self):
56
+ text, entities = html_to_text_entities(
57
+ '<pre><code class="language-python">x=1</code></pre>'
58
+ )
59
+ self.assertEqual(text, "x=1")
60
+ self.assertEqual(
61
+ entities,
62
+ [{"type": "pre", "offset": 0, "length": 3, "language": "python"}],
63
+ )
64
+
65
+ def test_entities_are_sorted(self):
66
+ _, entities = html_to_text_entities("<b>a</b>b<i>c</i>")
67
+ offsets = [e["offset"] for e in entities]
68
+ self.assertEqual(offsets, sorted(offsets))
69
+
70
+
71
+ class RoundTripTests(unittest.TestCase):
72
+ def assert_roundtrip(self, html):
73
+ text, entities = html_to_text_entities(html)
74
+ rebuilt = text_entities_to_html(text, entities)
75
+ # A second parse must yield the same canonical (text, entities).
76
+ text2, entities2 = html_to_text_entities(rebuilt)
77
+ self.assertEqual(text, text2)
78
+ self.assertEqual(entities, entities2)
79
+
80
+ def test_roundtrip_plain(self):
81
+ self.assert_roundtrip("hello world")
82
+
83
+ def test_roundtrip_bold_with_emoji(self):
84
+ self.assert_roundtrip("a\U0001f600<b>bold \U0001f4a1 end</b>")
85
+
86
+ def test_roundtrip_link_and_code(self):
87
+ self.assert_roundtrip('see <a href="https://t.me">t.me</a> and <code>x</code>')
88
+
89
+ def test_roundtrip_nested(self):
90
+ self.assert_roundtrip("<b>bold <i>both</i></b> plain")
91
+
92
+ def test_html_special_chars_escaped(self):
93
+ text, entities = html_to_text_entities("a &lt; b &amp; c")
94
+ self.assertEqual(text, "a < b & c")
95
+ rebuilt = text_entities_to_html(text, entities)
96
+ self.assertIn("&lt;", rebuilt)
97
+ self.assertIn("&amp;", rebuilt)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ unittest.main()