markdown_convert 1.2.23__py3-none-any.whl → 1.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -221,3 +221,9 @@ math {
221
221
  text-rendering: optimizeLegibility;
222
222
  font-family: "Latin Modern Math", "Cambria Math", serif;
223
223
  }
224
+
225
+ /* Mermaid diagrams */
226
+ div.mermaid {
227
+ display: flex;
228
+ justify-content: center;
229
+ }
@@ -20,4 +20,5 @@ MARKDOWN_EXTENSIONS = {
20
20
  "breaks": {"on_newline": True},
21
21
  "tables": True,
22
22
  "latex": True,
23
+ "mermaid": None,
23
24
  }
@@ -4,7 +4,7 @@ Author: @julynx
4
4
  """
5
5
 
6
6
  import os
7
- import re
7
+ import secrets
8
8
  import time
9
9
  from datetime import datetime
10
10
  from pathlib import Path
@@ -14,6 +14,7 @@ from playwright.sync_api import sync_playwright
14
14
 
15
15
  from .constants import MARKDOWN_EXTENSIONS
16
16
  from .resources import get_code_css_path, get_css_path, get_output_path
17
+ from .transform import create_sections, render_mermaid_diagrams, create_html_document
17
18
  from .utils import drop_duplicates
18
19
 
19
20
 
@@ -24,6 +25,7 @@ def _generate_pdf_with_playwright(
24
25
  css_content=None,
25
26
  base_dir=None,
26
27
  dump_html=False,
28
+ nonce=None,
27
29
  ):
28
30
  """
29
31
  Generate a PDF from HTML content using Playwright.
@@ -35,22 +37,55 @@ def _generate_pdf_with_playwright(
35
37
  base_dir (Path, optional): Base directory for resolving relative paths in HTML.
36
38
  dump_html (bool, optional): Whether to dump the HTML content to a file.
37
39
  """
40
+ # Generate a cryptographic nonce for the Mermaid script
41
+
42
+ # Content Security Policy using nonce to whitelist only the Mermaid initialization script
43
+ # This prevents arbitrary JavaScript injection while allowing Mermaid to work
44
+ csp = (
45
+ "default-src 'none'; "
46
+ f"script-src 'nonce-{nonce}' https://cdn.jsdelivr.net; "
47
+ f"script-src-elem 'nonce-{nonce}' https://cdn.jsdelivr.net; "
48
+ "style-src 'unsafe-inline'; "
49
+ "img-src data: https: file:; "
50
+ "font-src data: https:; "
51
+ "connect-src https://cdn.jsdelivr.net;"
52
+ )
53
+
54
+ # Wrap HTML content with CSP and CSS
55
+ if css_content:
56
+ full_html = create_html_document(html_content, css_content, csp)
57
+ else:
58
+ full_html = html_content
59
+
38
60
  with sync_playwright() as playwright:
39
- browser = playwright.chromium.launch(headless=True)
40
- page = browser.new_page()
61
+ browser = playwright.chromium.launch(
62
+ headless=True,
63
+ args=[
64
+ "--disable-dev-shm-usage",
65
+ "--disable-extensions",
66
+ "--disable-plugins",
67
+ "--disable-gpu",
68
+ "--no-first-run",
69
+ "--no-default-browser-check",
70
+ ],
71
+ )
72
+ context = browser.new_context(
73
+ java_script_enabled=True,
74
+ permissions=[],
75
+ geolocation=None,
76
+ accept_downloads=False,
77
+ )
78
+ page = context.new_page()
41
79
 
42
80
  # Handle loading based on presence of base_dir
43
81
  temp_html = None
44
82
  try:
45
83
  if base_dir:
46
84
  temp_html = base_dir / f".temp_{os.getpid()}.html"
47
- temp_html.write_text(html_content, encoding="utf-8")
48
- page.goto(temp_html.as_uri(), wait_until="networkidle")
85
+ temp_html.write_text(full_html, encoding="utf-8")
86
+ page.goto(temp_html.as_uri(), wait_until="networkidle", timeout=30000)
49
87
  else:
50
- page.set_content(html_content, wait_until="networkidle")
51
-
52
- if css_content:
53
- page.add_style_tag(content=css_content)
88
+ page.set_content(full_html, wait_until="networkidle", timeout=30000)
54
89
 
55
90
  pdf_params = {
56
91
  "format": "A4",
@@ -88,23 +123,6 @@ def _get_css_content(css_sources):
88
123
  return css_buffer
89
124
 
90
125
 
91
- def _create_sections(html):
92
- """
93
- Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
94
- using regular expressions.
95
- Args:
96
- html (str): HTML content.
97
- Returns:
98
- HTML content with sections wrapped in <section> tags.
99
- """
100
- pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
101
-
102
- def wrap_section(match):
103
- return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
104
-
105
- return pattern.sub(wrap_section, html)
106
-
107
-
108
126
  def convert(
109
127
  markdown_path,
110
128
  css_path=None,
@@ -137,8 +155,10 @@ def convert(
137
155
  css_sources = drop_duplicates(css_sources)
138
156
 
139
157
  try:
158
+ nonce = secrets.token_urlsafe(16)
140
159
  html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
141
- html = _create_sections(html)
160
+ html = create_sections(html)
161
+ html = render_mermaid_diagrams(html, nonce=nonce)
142
162
 
143
163
  _generate_pdf_with_playwright(
144
164
  html,
@@ -146,6 +166,7 @@ def convert(
146
166
  css_content=_get_css_content(css_sources),
147
167
  base_dir=Path(markdown_path).resolve().parent,
148
168
  dump_html=dump_html,
169
+ nonce=nonce,
149
170
  )
150
171
 
151
172
  except Exception as exc:
@@ -204,13 +225,16 @@ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
204
225
  css_sources = [code_css, css_text]
205
226
 
206
227
  try:
228
+ nonce = secrets.token_urlsafe(16)
207
229
  html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
208
- html = _create_sections(html)
230
+ html = create_sections(html)
231
+ html = render_mermaid_diagrams(html, nonce=nonce)
209
232
 
210
233
  return _generate_pdf_with_playwright(
211
234
  html,
212
235
  None,
213
236
  css_content=_get_css_content(css_sources),
237
+ nonce=nonce,
214
238
  )
215
239
 
216
240
  except Exception as exc:
@@ -0,0 +1,69 @@
1
+ """
2
+ Module for transforming HTML content.
3
+ """
4
+
5
+ import re
6
+
7
+
8
+ def create_html_document(html_content, css_content, csp):
9
+ """
10
+ Creates a complete HTML document with the given content, CSS, and Content Security Policy.
11
+ Args:
12
+ html_content (str): The HTML content to include in the body.
13
+ css_content (str): The CSS styles to include in the head.
14
+ csp (str): The Content Security Policy string.
15
+ Returns:
16
+ str: A complete HTML document as a string.
17
+ """
18
+ return f"""<!DOCTYPE html>
19
+ <html>
20
+ <head>
21
+ <meta charset="UTF-8">
22
+ <meta http-equiv="Content-Security-Policy" content="{csp}">
23
+ <style>
24
+ {css_content}
25
+ </style>
26
+ </head>
27
+ <body>
28
+ {html_content}
29
+ </body>
30
+ </html>"""
31
+
32
+
33
+ def create_sections(html):
34
+ """
35
+ Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
36
+ using regular expressions.
37
+ Args:
38
+ html (str): HTML content.
39
+ Returns:
40
+ HTML content with sections wrapped in <section> tags.
41
+ """
42
+ pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
43
+
44
+ def wrap_section(match):
45
+ return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
46
+
47
+ return pattern.sub(wrap_section, html)
48
+
49
+
50
+ def render_mermaid_diagrams(html, *, nonce):
51
+ """
52
+ Renders Mermaid diagrams in the HTML content.
53
+
54
+ Args:
55
+ html (str): HTML content.
56
+ Returns:
57
+ str: HTML content with rendered Mermaid diagrams.
58
+ """
59
+ mermaid_script = f"""
60
+ <script type="module" nonce="{nonce}">
61
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
62
+ mermaid.initialize({{ startOnLoad: true }});
63
+ </script>
64
+ """
65
+
66
+ if '<div class="mermaid">' in html:
67
+ html = mermaid_script + html
68
+
69
+ return html
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.23
3
+ Version: 1.2.26
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -50,6 +50,7 @@ Unlike other similar tools, `markdown-convert`:
50
50
  - Comes with a sensible default CSS stylesheet out of the box.
51
51
  - Supports:
52
52
  - **LaTeX math equations:** `$...$` for inline and `$$...$$` for block equations.
53
+ - **Mermaid diagrams:** ` ```mermaid ...``` ` code blocks get rendered as diagrams.
53
54
  - **Syntax highlighting for code blocks:** Applied automatically based on the specified language.
54
55
  - **Live conversion:** `markdown-convert file.md --mode=live` updates the PDF every time the Markdown file changes.
55
56
  - **Custom CSS** `markdown-convert file.md --css=style.css` extends the default CSS with your own stylesheet.
@@ -1,15 +1,16 @@
1
1
  markdown_convert/__init__.py,sha256=ysW3pXsDGGK4PzZHcIBTpfVW58IkDUwHffDkf_GM6UU,303
2
2
  markdown_convert/__main__.py,sha256=hO7AO0GnzPMPNqls8r5aF2C-7l9aFHDf1m8mXSy1GBE,2809
3
3
  markdown_convert/code.css,sha256=Wt4FqFqJcpT-jwY3GN-o4ZRCCXU8DQj-9lqKdGiuoyw,4935
4
- markdown_convert/default.css,sha256=sTIJmfRWOe8SVwlhzw9CSXD44TUtgfjKCHJjf6u2O8U,2877
4
+ markdown_convert/default.css,sha256=ycMjgZXTjuD8fsQGHDtSRUBdWk_bMpehz_-bvGhyKhA,2965
5
5
  markdown_convert/modules/__init__.py,sha256=PFPgiQhMXgyfjD8BkfLC_X8AR1jz-dCxfif2qmNofJs,65
6
- markdown_convert/modules/constants.py,sha256=Pdm-yoTuvUQbqWiufiydTeuA5ysSF_ZuAafRBrnXPt8,454
7
- markdown_convert/modules/convert.py,sha256=MPr6CM-KNtfHEnPv0sVegOd26mlmG_v49C55MYXjW_M,8718
6
+ markdown_convert/modules/constants.py,sha256=3pHREdede6XhR-Jlp_SSm1hg0CtJNApdL-T7Rs-G_Bk,475
7
+ markdown_convert/modules/convert.py,sha256=a3WfnWxATHqqLsMG3WUYmfdGIF3-ZFifUqDCM044WV4,9725
8
8
  markdown_convert/modules/resources.py,sha256=tnW8JmCrJNBRbzOcaOVG6GX5jPC8Kzj3dA7gX0B935A,2488
9
+ markdown_convert/modules/transform.py,sha256=Z_htEa4D9xoFJwoXl-ik8UXumfkc14wQrTcIC6QQ2dw,1756
9
10
  markdown_convert/modules/utils.py,sha256=NX0WegM8e8MPKNNmweTujAWO8ZghdB8LSGDx20K2E44,655
10
11
  markdown_convert/modules/validate.py,sha256=XV_k7cHeifEKDaltF26tCmabs2-Me5msP3enI_eVwfA,1517
11
- markdown_convert-1.2.23.dist-info/METADATA,sha256=llxbzt0ME2aa4n_mucnYoLgFyxkWytJlrnhMIRjLOwg,3797
12
- markdown_convert-1.2.23.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
- markdown_convert-1.2.23.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
14
- markdown_convert-1.2.23.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
15
- markdown_convert-1.2.23.dist-info/RECORD,,
12
+ markdown_convert-1.2.26.dist-info/METADATA,sha256=lC5zl2KUuQavkQjL2Q6VmKjN3yTKsYgfbTSFI8G9xYE,3883
13
+ markdown_convert-1.2.26.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ markdown_convert-1.2.26.dist-info/entry_points.txt,sha256=RCmzC7C0sX-SpzIP2Cr34rhg3lMd7BRx-exaZPfK8bU,68
15
+ markdown_convert-1.2.26.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
16
+ markdown_convert-1.2.26.dist-info/RECORD,,