markdown_convert 1.2.23__py3-none-any.whl → 1.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markdown_convert/default.css +6 -0
- markdown_convert/modules/constants.py +1 -0
- markdown_convert/modules/convert.py +52 -28
- markdown_convert/modules/transform.py +69 -0
- {markdown_convert-1.2.23.dist-info → markdown_convert-1.2.26.dist-info}/METADATA +2 -1
- {markdown_convert-1.2.23.dist-info → markdown_convert-1.2.26.dist-info}/RECORD +9 -8
- {markdown_convert-1.2.23.dist-info → markdown_convert-1.2.26.dist-info}/WHEEL +0 -0
- {markdown_convert-1.2.23.dist-info → markdown_convert-1.2.26.dist-info}/entry_points.txt +0 -0
- {markdown_convert-1.2.23.dist-info → markdown_convert-1.2.26.dist-info}/licenses/LICENSE +0 -0
markdown_convert/default.css
CHANGED
|
@@ -4,7 +4,7 @@ Author: @julynx
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
import
|
|
7
|
+
import secrets
|
|
8
8
|
import time
|
|
9
9
|
from datetime import datetime
|
|
10
10
|
from pathlib import Path
|
|
@@ -14,6 +14,7 @@ from playwright.sync_api import sync_playwright
|
|
|
14
14
|
|
|
15
15
|
from .constants import MARKDOWN_EXTENSIONS
|
|
16
16
|
from .resources import get_code_css_path, get_css_path, get_output_path
|
|
17
|
+
from .transform import create_sections, render_mermaid_diagrams, create_html_document
|
|
17
18
|
from .utils import drop_duplicates
|
|
18
19
|
|
|
19
20
|
|
|
@@ -24,6 +25,7 @@ def _generate_pdf_with_playwright(
|
|
|
24
25
|
css_content=None,
|
|
25
26
|
base_dir=None,
|
|
26
27
|
dump_html=False,
|
|
28
|
+
nonce=None,
|
|
27
29
|
):
|
|
28
30
|
"""
|
|
29
31
|
Generate a PDF from HTML content using Playwright.
|
|
@@ -35,22 +37,55 @@ def _generate_pdf_with_playwright(
|
|
|
35
37
|
base_dir (Path, optional): Base directory for resolving relative paths in HTML.
|
|
36
38
|
dump_html (bool, optional): Whether to dump the HTML content to a file.
|
|
37
39
|
"""
|
|
40
|
+
# Generate a cryptographic nonce for the Mermaid script
|
|
41
|
+
|
|
42
|
+
# Content Security Policy using nonce to whitelist only the Mermaid initialization script
|
|
43
|
+
# This prevents arbitrary JavaScript injection while allowing Mermaid to work
|
|
44
|
+
csp = (
|
|
45
|
+
"default-src 'none'; "
|
|
46
|
+
f"script-src 'nonce-{nonce}' https://cdn.jsdelivr.net; "
|
|
47
|
+
f"script-src-elem 'nonce-{nonce}' https://cdn.jsdelivr.net; "
|
|
48
|
+
"style-src 'unsafe-inline'; "
|
|
49
|
+
"img-src data: https: file:; "
|
|
50
|
+
"font-src data: https:; "
|
|
51
|
+
"connect-src https://cdn.jsdelivr.net;"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Wrap HTML content with CSP and CSS
|
|
55
|
+
if css_content:
|
|
56
|
+
full_html = create_html_document(html_content, css_content, csp)
|
|
57
|
+
else:
|
|
58
|
+
full_html = html_content
|
|
59
|
+
|
|
38
60
|
with sync_playwright() as playwright:
|
|
39
|
-
browser = playwright.chromium.launch(
|
|
40
|
-
|
|
61
|
+
browser = playwright.chromium.launch(
|
|
62
|
+
headless=True,
|
|
63
|
+
args=[
|
|
64
|
+
"--disable-dev-shm-usage",
|
|
65
|
+
"--disable-extensions",
|
|
66
|
+
"--disable-plugins",
|
|
67
|
+
"--disable-gpu",
|
|
68
|
+
"--no-first-run",
|
|
69
|
+
"--no-default-browser-check",
|
|
70
|
+
],
|
|
71
|
+
)
|
|
72
|
+
context = browser.new_context(
|
|
73
|
+
java_script_enabled=True,
|
|
74
|
+
permissions=[],
|
|
75
|
+
geolocation=None,
|
|
76
|
+
accept_downloads=False,
|
|
77
|
+
)
|
|
78
|
+
page = context.new_page()
|
|
41
79
|
|
|
42
80
|
# Handle loading based on presence of base_dir
|
|
43
81
|
temp_html = None
|
|
44
82
|
try:
|
|
45
83
|
if base_dir:
|
|
46
84
|
temp_html = base_dir / f".temp_{os.getpid()}.html"
|
|
47
|
-
temp_html.write_text(
|
|
48
|
-
page.goto(temp_html.as_uri(), wait_until="networkidle")
|
|
85
|
+
temp_html.write_text(full_html, encoding="utf-8")
|
|
86
|
+
page.goto(temp_html.as_uri(), wait_until="networkidle", timeout=30000)
|
|
49
87
|
else:
|
|
50
|
-
page.set_content(
|
|
51
|
-
|
|
52
|
-
if css_content:
|
|
53
|
-
page.add_style_tag(content=css_content)
|
|
88
|
+
page.set_content(full_html, wait_until="networkidle", timeout=30000)
|
|
54
89
|
|
|
55
90
|
pdf_params = {
|
|
56
91
|
"format": "A4",
|
|
@@ -88,23 +123,6 @@ def _get_css_content(css_sources):
|
|
|
88
123
|
return css_buffer
|
|
89
124
|
|
|
90
125
|
|
|
91
|
-
def _create_sections(html):
|
|
92
|
-
"""
|
|
93
|
-
Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
|
|
94
|
-
using regular expressions.
|
|
95
|
-
Args:
|
|
96
|
-
html (str): HTML content.
|
|
97
|
-
Returns:
|
|
98
|
-
HTML content with sections wrapped in <section> tags.
|
|
99
|
-
"""
|
|
100
|
-
pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
|
|
101
|
-
|
|
102
|
-
def wrap_section(match):
|
|
103
|
-
return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
|
|
104
|
-
|
|
105
|
-
return pattern.sub(wrap_section, html)
|
|
106
|
-
|
|
107
|
-
|
|
108
126
|
def convert(
|
|
109
127
|
markdown_path,
|
|
110
128
|
css_path=None,
|
|
@@ -137,8 +155,10 @@ def convert(
|
|
|
137
155
|
css_sources = drop_duplicates(css_sources)
|
|
138
156
|
|
|
139
157
|
try:
|
|
158
|
+
nonce = secrets.token_urlsafe(16)
|
|
140
159
|
html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
|
|
141
|
-
html =
|
|
160
|
+
html = create_sections(html)
|
|
161
|
+
html = render_mermaid_diagrams(html, nonce=nonce)
|
|
142
162
|
|
|
143
163
|
_generate_pdf_with_playwright(
|
|
144
164
|
html,
|
|
@@ -146,6 +166,7 @@ def convert(
|
|
|
146
166
|
css_content=_get_css_content(css_sources),
|
|
147
167
|
base_dir=Path(markdown_path).resolve().parent,
|
|
148
168
|
dump_html=dump_html,
|
|
169
|
+
nonce=nonce,
|
|
149
170
|
)
|
|
150
171
|
|
|
151
172
|
except Exception as exc:
|
|
@@ -204,13 +225,16 @@ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
|
|
|
204
225
|
css_sources = [code_css, css_text]
|
|
205
226
|
|
|
206
227
|
try:
|
|
228
|
+
nonce = secrets.token_urlsafe(16)
|
|
207
229
|
html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
|
|
208
|
-
html =
|
|
230
|
+
html = create_sections(html)
|
|
231
|
+
html = render_mermaid_diagrams(html, nonce=nonce)
|
|
209
232
|
|
|
210
233
|
return _generate_pdf_with_playwright(
|
|
211
234
|
html,
|
|
212
235
|
None,
|
|
213
236
|
css_content=_get_css_content(css_sources),
|
|
237
|
+
nonce=nonce,
|
|
214
238
|
)
|
|
215
239
|
|
|
216
240
|
except Exception as exc:
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for transforming HTML content.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_html_document(html_content, css_content, csp):
|
|
9
|
+
"""
|
|
10
|
+
Creates a complete HTML document with the given content, CSS, and Content Security Policy.
|
|
11
|
+
Args:
|
|
12
|
+
html_content (str): The HTML content to include in the body.
|
|
13
|
+
css_content (str): The CSS styles to include in the head.
|
|
14
|
+
csp (str): The Content Security Policy string.
|
|
15
|
+
Returns:
|
|
16
|
+
str: A complete HTML document as a string.
|
|
17
|
+
"""
|
|
18
|
+
return f"""<!DOCTYPE html>
|
|
19
|
+
<html>
|
|
20
|
+
<head>
|
|
21
|
+
<meta charset="UTF-8">
|
|
22
|
+
<meta http-equiv="Content-Security-Policy" content="{csp}">
|
|
23
|
+
<style>
|
|
24
|
+
{css_content}
|
|
25
|
+
</style>
|
|
26
|
+
</head>
|
|
27
|
+
<body>
|
|
28
|
+
{html_content}
|
|
29
|
+
</body>
|
|
30
|
+
</html>"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_sections(html):
|
|
34
|
+
"""
|
|
35
|
+
Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
|
|
36
|
+
using regular expressions.
|
|
37
|
+
Args:
|
|
38
|
+
html (str): HTML content.
|
|
39
|
+
Returns:
|
|
40
|
+
HTML content with sections wrapped in <section> tags.
|
|
41
|
+
"""
|
|
42
|
+
pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
|
|
43
|
+
|
|
44
|
+
def wrap_section(match):
|
|
45
|
+
return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
|
|
46
|
+
|
|
47
|
+
return pattern.sub(wrap_section, html)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def render_mermaid_diagrams(html, *, nonce):
|
|
51
|
+
"""
|
|
52
|
+
Renders Mermaid diagrams in the HTML content.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
html (str): HTML content.
|
|
56
|
+
Returns:
|
|
57
|
+
str: HTML content with rendered Mermaid diagrams.
|
|
58
|
+
"""
|
|
59
|
+
mermaid_script = f"""
|
|
60
|
+
<script type="module" nonce="{nonce}">
|
|
61
|
+
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
|
|
62
|
+
mermaid.initialize({{ startOnLoad: true }});
|
|
63
|
+
</script>
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
if '<div class="mermaid">' in html:
|
|
67
|
+
html = mermaid_script + html
|
|
68
|
+
|
|
69
|
+
return html
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markdown_convert
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.26
|
|
4
4
|
Summary: Convert Markdown files to PDF from your command line.
|
|
5
5
|
Project-URL: homepage, https://github.com/Julynx/markdown_convert
|
|
6
6
|
Author-email: Julio Cabria <juliocabria@tutanota.com>
|
|
@@ -50,6 +50,7 @@ Unlike other similar tools, `markdown-convert`:
|
|
|
50
50
|
- Comes with a sensible default CSS stylesheet out of the box.
|
|
51
51
|
- Supports:
|
|
52
52
|
- **LaTeX math equations:** `$...$` for inline and `$$...$$` for block equations.
|
|
53
|
+
- **Mermaid diagrams:** ` ```mermaid ...``` ` code blocks get rendered as diagrams.
|
|
53
54
|
- **Syntax highlighting for code blocks:** Applied automatically based on the specified language.
|
|
54
55
|
- **Live conversion:** `markdown-convert file.md --mode=live` updates the PDF every time the Markdown file changes.
|
|
55
56
|
- **Custom CSS** `markdown-convert file.md --css=style.css` extends the default CSS with your own stylesheet.
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
markdown_convert/__init__.py,sha256=ysW3pXsDGGK4PzZHcIBTpfVW58IkDUwHffDkf_GM6UU,303
|
|
2
2
|
markdown_convert/__main__.py,sha256=hO7AO0GnzPMPNqls8r5aF2C-7l9aFHDf1m8mXSy1GBE,2809
|
|
3
3
|
markdown_convert/code.css,sha256=Wt4FqFqJcpT-jwY3GN-o4ZRCCXU8DQj-9lqKdGiuoyw,4935
|
|
4
|
-
markdown_convert/default.css,sha256=
|
|
4
|
+
markdown_convert/default.css,sha256=ycMjgZXTjuD8fsQGHDtSRUBdWk_bMpehz_-bvGhyKhA,2965
|
|
5
5
|
markdown_convert/modules/__init__.py,sha256=PFPgiQhMXgyfjD8BkfLC_X8AR1jz-dCxfif2qmNofJs,65
|
|
6
|
-
markdown_convert/modules/constants.py,sha256=
|
|
7
|
-
markdown_convert/modules/convert.py,sha256=
|
|
6
|
+
markdown_convert/modules/constants.py,sha256=3pHREdede6XhR-Jlp_SSm1hg0CtJNApdL-T7Rs-G_Bk,475
|
|
7
|
+
markdown_convert/modules/convert.py,sha256=a3WfnWxATHqqLsMG3WUYmfdGIF3-ZFifUqDCM044WV4,9725
|
|
8
8
|
markdown_convert/modules/resources.py,sha256=tnW8JmCrJNBRbzOcaOVG6GX5jPC8Kzj3dA7gX0B935A,2488
|
|
9
|
+
markdown_convert/modules/transform.py,sha256=Z_htEa4D9xoFJwoXl-ik8UXumfkc14wQrTcIC6QQ2dw,1756
|
|
9
10
|
markdown_convert/modules/utils.py,sha256=NX0WegM8e8MPKNNmweTujAWO8ZghdB8LSGDx20K2E44,655
|
|
10
11
|
markdown_convert/modules/validate.py,sha256=XV_k7cHeifEKDaltF26tCmabs2-Me5msP3enI_eVwfA,1517
|
|
11
|
-
markdown_convert-1.2.
|
|
12
|
-
markdown_convert-1.2.
|
|
13
|
-
markdown_convert-1.2.
|
|
14
|
-
markdown_convert-1.2.
|
|
15
|
-
markdown_convert-1.2.
|
|
12
|
+
markdown_convert-1.2.26.dist-info/METADATA,sha256=lC5zl2KUuQavkQjL2Q6VmKjN3yTKsYgfbTSFI8G9xYE,3883
|
|
13
|
+
markdown_convert-1.2.26.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
markdown_convert-1.2.26.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
|
|
15
|
+
markdown_convert-1.2.26.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
|
|
16
|
+
markdown_convert-1.2.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|