chatgpt-md-converter 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/PKG-INFO +1 -1
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/converters.py +2 -1
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/formatters.py +3 -6
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/telegram_formatter.py +5 -4
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter.egg-info/PKG-INFO +1 -1
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/setup.py +1 -1
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/tests/test_parser.py +142 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/LICENSE +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/__init__.py +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/extractors.py +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/helpers.py +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter.egg-info/SOURCES.txt +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter.egg-info/dependency_links.txt +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter.egg-info/top_level.txt +0 -0
- {chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/setup.cfg +0 -0
{chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/converters.py
RENAMED
|
@@ -16,6 +16,7 @@ def split_by_tag(out_text: str, md_tag: str, html_tag: str) -> str:
|
|
|
16
16
|
Splits the text by markdown tag and replaces it with the specified HTML tag.
|
|
17
17
|
"""
|
|
18
18
|
tag_pattern = re.compile(
|
|
19
|
-
r"{}(.*?){}".format(re.escape(md_tag), re.escape(md_tag)),
|
|
19
|
+
r"(?<!\w){}(.*?){}(?!\w)".format(re.escape(md_tag), re.escape(md_tag)),
|
|
20
|
+
re.DOTALL,
|
|
20
21
|
)
|
|
21
22
|
return tag_pattern.sub(r"<{}>\1</{}>".format(html_tag, html_tag), out_text)
|
{chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/formatters.py
RENAMED
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
def combine_blockquotes(text: str) -> str:
|
|
5
2
|
"""
|
|
6
|
-
Combines multiline blockquotes into a single blockquote.
|
|
3
|
+
Combines multiline blockquotes into a single blockquote while keeping the \n characters.
|
|
7
4
|
"""
|
|
8
5
|
lines = text.split("\n")
|
|
9
6
|
combined_lines = []
|
|
@@ -17,7 +14,7 @@ def combine_blockquotes(text: str) -> str:
|
|
|
17
14
|
else:
|
|
18
15
|
if in_blockquote:
|
|
19
16
|
combined_lines.append(
|
|
20
|
-
"<blockquote>" + "
|
|
17
|
+
"<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
|
|
21
18
|
)
|
|
22
19
|
blockquote_lines = []
|
|
23
20
|
in_blockquote = False
|
|
@@ -25,7 +22,7 @@ def combine_blockquotes(text: str) -> str:
|
|
|
25
22
|
|
|
26
23
|
if in_blockquote:
|
|
27
24
|
combined_lines.append(
|
|
28
|
-
"<blockquote>" + "
|
|
25
|
+
"<blockquote>" + "\n".join(blockquote_lines) + "</blockquote>"
|
|
29
26
|
)
|
|
30
27
|
|
|
31
28
|
return "\n".join(combined_lines)
|
{chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/telegram_formatter.py
RENAMED
|
@@ -26,6 +26,7 @@ def telegram_format(text: str) -> str:
|
|
|
26
26
|
|
|
27
27
|
# Nested Bold and Italic
|
|
28
28
|
output = re.sub(r"\*\*\*(.*?)\*\*\*", r"<b><i>\1</i></b>", output)
|
|
29
|
+
output = re.sub(r"\_\_\_(.*?)\_\_\_", r"<u><i>\1</i></u>", output)
|
|
29
30
|
|
|
30
31
|
# Process markdown formatting tags (bold, underline, italic, strikethrough)
|
|
31
32
|
# and convert them to their respective HTML tags
|
|
@@ -39,14 +40,14 @@ def telegram_format(text: str) -> str:
|
|
|
39
40
|
output = re.sub(r"【[^】]+】", "", output)
|
|
40
41
|
|
|
41
42
|
# Convert links
|
|
42
|
-
output = re.sub(r"
|
|
43
|
-
|
|
44
|
-
# Convert lists
|
|
45
|
-
output = re.sub(r"^\s*[\-\*] (.+)", r"• \1", output, flags=re.MULTILINE)
|
|
43
|
+
output = re.sub(r"!?\[(.*?)\]\((.*?)\)", r'<a href="\2">\1</a>', output)
|
|
46
44
|
|
|
47
45
|
# Convert headings
|
|
48
46
|
output = re.sub(r"^\s*#+ (.+)", r"<b>\1</b>", output, flags=re.MULTILINE)
|
|
49
47
|
|
|
48
|
+
# Convert unordered lists, preserving indentation
|
|
49
|
+
output = re.sub(r"^(\s*)[\-\*] (.+)", r"\1• \2", output, flags=re.MULTILINE)
|
|
50
|
+
|
|
50
51
|
# Step 4: Reinsert the converted HTML code blocks
|
|
51
52
|
output = reinsert_code_blocks(output, code_blocks)
|
|
52
53
|
|
|
@@ -254,3 +254,145 @@ def test_combined_formatting_with_lists():
|
|
|
254
254
|
assert (
|
|
255
255
|
output.strip() == expected_output.strip()
|
|
256
256
|
), "Failed handling combined formatting with lists"
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def test_md_large_example():
|
|
260
|
+
input_text = """
|
|
261
|
+
1. **Headings:**
|
|
262
|
+
# H1 Heading
|
|
263
|
+
## H2 Heading
|
|
264
|
+
### H3 Heading
|
|
265
|
+
#### H4 Heading
|
|
266
|
+
##### H5 Heading
|
|
267
|
+
###### H6 Heading
|
|
268
|
+
|
|
269
|
+
2. **Emphasis:**
|
|
270
|
+
|
|
271
|
+
*Italic text* or _Italic text_
|
|
272
|
+
|
|
273
|
+
**Bold text** or __Underline text__
|
|
274
|
+
|
|
275
|
+
***Bold and italic text*** or ___Underline and italic text___
|
|
276
|
+
|
|
277
|
+
3. **Lists:**
|
|
278
|
+
- **Unordered List:**
|
|
279
|
+
|
|
280
|
+
- Item 1
|
|
281
|
+
- Item 2
|
|
282
|
+
- Subitem 1
|
|
283
|
+
- Subitem 2
|
|
284
|
+
|
|
285
|
+
- **Ordered List:**
|
|
286
|
+
|
|
287
|
+
1. First item
|
|
288
|
+
2. Second item
|
|
289
|
+
1. Subitem 1
|
|
290
|
+
2. Subitem 2
|
|
291
|
+
|
|
292
|
+
4. **Links:**
|
|
293
|
+
|
|
294
|
+
[OpenAI](https://www.openai.com)
|
|
295
|
+
|
|
296
|
+
5. **Images:**
|
|
297
|
+
|
|
298
|
+

|
|
299
|
+

|
|
300
|
+
|
|
301
|
+
6. **Blockquotes:**
|
|
302
|
+
|
|
303
|
+
> This is a blockquote.
|
|
304
|
+
> It can span multiple lines.
|
|
305
|
+
|
|
306
|
+
7. **Inline Code:**
|
|
307
|
+
|
|
308
|
+
Here is some `inline code`.
|
|
309
|
+
|
|
310
|
+
8. **Code Blocks:**
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
def example_function():
|
|
314
|
+
print("Hello World")
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
9. **Tables:**
|
|
318
|
+
|
|
319
|
+
| Header 1 | Header 2 |
|
|
320
|
+
|----------|----------|
|
|
321
|
+
| Row 1 Col 1 | Row 1 Col 2 |
|
|
322
|
+
| Row 2 Col 1 | Row 2 Col 2 |
|
|
323
|
+
|
|
324
|
+
10. **Horizontal Rule:**
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
"""
|
|
328
|
+
expected_output = """
|
|
329
|
+
1. <b>Headings:</b>
|
|
330
|
+
<b>H1 Heading</b>
|
|
331
|
+
<b>H2 Heading</b>
|
|
332
|
+
<b>H3 Heading</b>
|
|
333
|
+
<b>H4 Heading</b>
|
|
334
|
+
<b>H5 Heading</b>
|
|
335
|
+
<b>H6 Heading</b>
|
|
336
|
+
|
|
337
|
+
2. <b>Emphasis:</b>
|
|
338
|
+
|
|
339
|
+
<i>Italic text</i> or <i>Italic text</i>
|
|
340
|
+
|
|
341
|
+
<b>Bold text</b> or <u>Underline text</u>
|
|
342
|
+
|
|
343
|
+
<b><i>Bold and italic text</i></b> or <u><i>Underline and italic text</i></u>
|
|
344
|
+
|
|
345
|
+
3. <b>Lists:</b>
|
|
346
|
+
• <b>Unordered List:</b>
|
|
347
|
+
|
|
348
|
+
• Item 1
|
|
349
|
+
• Item 2
|
|
350
|
+
• Subitem 1
|
|
351
|
+
• Subitem 2
|
|
352
|
+
|
|
353
|
+
• <b>Ordered List:</b>
|
|
354
|
+
|
|
355
|
+
1. First item
|
|
356
|
+
2. Second item
|
|
357
|
+
1. Subitem 1
|
|
358
|
+
2. Subitem 2
|
|
359
|
+
|
|
360
|
+
4. <b>Links:</b>
|
|
361
|
+
|
|
362
|
+
<a href="https://www.openai.com">OpenAI</a>
|
|
363
|
+
|
|
364
|
+
5. <b>Images:</b>
|
|
365
|
+
|
|
366
|
+
<a href="URL_to_image">Alt text for image</a>
|
|
367
|
+
<a href="URL_to_імедж">Alt text for image</a>
|
|
368
|
+
|
|
369
|
+
6. <b>Blockquotes:</b>
|
|
370
|
+
|
|
371
|
+
<blockquote>This is a blockquote.
|
|
372
|
+
It can span multiple lines.</blockquote>
|
|
373
|
+
|
|
374
|
+
7. <b>Inline Code:</b>
|
|
375
|
+
|
|
376
|
+
Here is some <code>inline code</code>.
|
|
377
|
+
|
|
378
|
+
8. <b>Code Blocks:</b>
|
|
379
|
+
|
|
380
|
+
<pre><code class="language-python">def example_function():
|
|
381
|
+
print("Hello World")
|
|
382
|
+
</code></pre>
|
|
383
|
+
|
|
384
|
+
9. <b>Tables:</b>
|
|
385
|
+
|
|
386
|
+
| Header 1 | Header 2 |
|
|
387
|
+
|----------|----------|
|
|
388
|
+
| Row 1 Col 1 | Row 1 Col 2 |
|
|
389
|
+
| Row 2 Col 1 | Row 2 Col 2 |
|
|
390
|
+
|
|
391
|
+
10. <b>Horizontal Rule:</b>
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
"""
|
|
395
|
+
output = telegram_format(input_text)
|
|
396
|
+
assert (
|
|
397
|
+
output.strip() == expected_output.strip()
|
|
398
|
+
), "Failed handling large markdown example"
|
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter/extractors.py
RENAMED
|
File without changes
|
|
File without changes
|
{chatgpt_md_converter-0.1.1 → chatgpt_md_converter-0.1.2}/chatgpt_md_converter.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|