markdown_convert 1.2.22__tar.gz → 1.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@ clean
3
3
  test
4
4
  test*
5
5
 
6
+ desktop.ini
7
+
6
8
  # Byte-compiled / optimized / DLL files
7
9
  __pycache__/
8
10
  *.py[cod]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markdown_convert
3
- Version: 1.2.22
3
+ Version: 1.2.25
4
4
  Summary: Convert Markdown files to PDF from your command line.
5
5
  Project-URL: homepage, https://github.com/Julynx/markdown_convert
6
6
  Author-email: Julio Cabria <juliocabria@tutanota.com>
@@ -32,8 +32,8 @@ def main():
32
32
 
33
33
  # Get the markdown path
34
34
  try:
35
- md_path = arg["markdown_file_path"]
36
- validate_markdown_path(md_path)
35
+ markdown_path = arg["markdown_file_path"]
36
+ validate_markdown_path(markdown_path)
37
37
  except KeyError as key_err:
38
38
  raise IndexError("Missing 'markdown_file_path' argument.") from key_err
39
39
  except Exception as exc:
@@ -61,18 +61,18 @@ def main():
61
61
  try:
62
62
  output_path = arg["--out"]
63
63
  validate_output_path(output_path)
64
- output_path = get_output_path(md_path, output_path)
64
+ output_path = get_output_path(markdown_path, output_path)
65
65
  except KeyError:
66
- output_path = get_output_path(md_path, None)
66
+ output_path = get_output_path(markdown_path, None)
67
67
  except Exception as exc:
68
68
  raise IndexError(f"Invalid 'output_path' argument: {exc}") from exc
69
69
 
70
70
  # Compile the markdown file
71
- print(f"\nGenerating PDF file from '{md_path}'...\n")
71
+ print(f"\nGenerating PDF file from '{markdown_path}'...\n")
72
72
  if mode in ("once", "debug"):
73
- convert(md_path, css_path, output_path, dump_html=mode == "debug")
73
+ convert(markdown_path, css_path, output_path, dump_html=mode == "debug")
74
74
  else:
75
- live_convert(md_path, css_path, output_path)
75
+ live_convert(markdown_path, css_path, output_path)
76
76
 
77
77
  sys_exit(0)
78
78
 
@@ -14,10 +14,11 @@ OPTIONS = ("markdown_file_path", "--mode", "--css", "--out", "-h", "--help")
14
14
 
15
15
  OPTIONS_MODES = ("once", "live", "debug")
16
16
 
17
- MD_EXTENSIONS = {
17
+ MARKDOWN_EXTENSIONS = {
18
18
  "fenced-code-blocks": None,
19
19
  "header-ids": True,
20
20
  "breaks": {"on_newline": True},
21
21
  "tables": True,
22
22
  "latex": True,
23
+ "mermaid": None,
23
24
  }
@@ -4,7 +4,7 @@ Author: @julynx
4
4
  """
5
5
 
6
6
  import os
7
- import re
7
+ import secrets
8
8
  import time
9
9
  from datetime import datetime
10
10
  from pathlib import Path
@@ -12,8 +12,9 @@ from pathlib import Path
12
12
  import markdown2
13
13
  from playwright.sync_api import sync_playwright
14
14
 
15
- from .constants import MD_EXTENSIONS
15
+ from .constants import MARKDOWN_EXTENSIONS
16
16
  from .resources import get_code_css_path, get_css_path, get_output_path
17
+ from .transform import create_sections, render_mermaid_diagrams, create_html_document
17
18
  from .utils import drop_duplicates
18
19
 
19
20
 
@@ -24,6 +25,7 @@ def _generate_pdf_with_playwright(
24
25
  css_content=None,
25
26
  base_dir=None,
26
27
  dump_html=False,
28
+ nonce=None,
27
29
  ):
28
30
  """
29
31
  Generate a PDF from HTML content using Playwright.
@@ -35,27 +37,65 @@ def _generate_pdf_with_playwright(
35
37
  base_dir (Path, optional): Base directory for resolving relative paths in HTML.
36
38
  dump_html (bool, optional): Whether to dump the HTML content to a file.
37
39
  """
38
- with sync_playwright() as p:
39
- browser = p.chromium.launch(headless=True)
40
- page = browser.new_page()
40
+ # Generate a cryptographic nonce for the Mermaid script
41
+
42
+ # Content Security Policy using nonce to whitelist only the Mermaid initialization script
43
+ # This prevents arbitrary JavaScript injection while allowing Mermaid to work
44
+ csp = (
45
+ "default-src 'none'; "
46
+ f"script-src 'nonce-{nonce}' https://cdn.jsdelivr.net; "
47
+ f"script-src-elem 'nonce-{nonce}' https://cdn.jsdelivr.net; "
48
+ "style-src 'unsafe-inline'; "
49
+ "img-src data: https: file:; "
50
+ "font-src data: https:; "
51
+ "connect-src https://cdn.jsdelivr.net;"
52
+ )
53
+
54
+ # Wrap HTML content with CSP and CSS
55
+ if css_content:
56
+ full_html = create_html_document(html_content, css_content, csp)
57
+ else:
58
+ full_html = html_content
59
+
60
+ with sync_playwright() as playwright:
61
+ browser = playwright.chromium.launch(
62
+ headless=True,
63
+ args=[
64
+ "--disable-dev-shm-usage",
65
+ "--disable-extensions",
66
+ "--disable-plugins",
67
+ "--disable-gpu",
68
+ "--no-first-run",
69
+ "--no-default-browser-check",
70
+ ],
71
+ )
72
+ context = browser.new_context(
73
+ java_script_enabled=True,
74
+ permissions=[],
75
+ geolocation=None,
76
+ accept_downloads=False,
77
+ )
78
+ page = context.new_page()
41
79
 
42
80
  # Handle loading based on presence of base_dir
43
81
  temp_html = None
44
82
  try:
45
83
  if base_dir:
46
84
  temp_html = base_dir / f".temp_{os.getpid()}.html"
47
- temp_html.write_text(html_content, encoding="utf-8")
48
- page.goto(temp_html.as_uri(), wait_until="networkidle")
85
+ temp_html.write_text(full_html, encoding="utf-8")
86
+ page.goto(temp_html.as_uri(), wait_until="networkidle", timeout=30000)
49
87
  else:
50
- page.set_content(html_content, wait_until="networkidle")
51
-
52
- if css_content:
53
- page.add_style_tag(content=css_content)
88
+ page.set_content(full_html, wait_until="networkidle", timeout=30000)
54
89
 
55
90
  pdf_params = {
56
91
  "format": "A4",
57
92
  "print_background": True,
58
- "margin": {"top": "20mm", "bottom": "20mm", "left": "20mm", "right": "20mm"},
93
+ "margin": {
94
+ "top": "20mm",
95
+ "bottom": "20mm",
96
+ "left": "20mm",
97
+ "right": "20mm",
98
+ },
59
99
  "path": output_path,
60
100
  } # Playwright ignores None paths
61
101
 
@@ -83,25 +123,8 @@ def _get_css_content(css_sources):
83
123
  return css_buffer
84
124
 
85
125
 
86
- def _create_sections(html):
87
- """
88
- Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
89
- using regular expressions.
90
- Args:
91
- html (str): HTML content.
92
- Returns:
93
- HTML content with sections wrapped in <section> tags.
94
- """
95
- pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
96
-
97
- def wrap_section(match):
98
- return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
99
-
100
- return pattern.sub(wrap_section, html)
101
-
102
-
103
126
  def convert(
104
- md_path,
127
+ markdown_path,
105
128
  css_path=None,
106
129
  output_path=None,
107
130
  *,
@@ -112,7 +135,7 @@ def convert(
112
135
  Convert a markdown file to a pdf file.
113
136
 
114
137
  Args:
115
- md_path (str): Path to the markdown file.
138
+ markdown_path (str): Path to the markdown file.
116
139
  css_path (str=None): Path to the CSS file.
117
140
  output_path (str=None): Path to the output file.
118
141
  extend_default_css (bool=True): Extend the default CSS file.
@@ -122,7 +145,7 @@ def convert(
122
145
  css_path = get_css_path()
123
146
 
124
147
  if output_path is None:
125
- output_path = get_output_path(md_path, None)
148
+ output_path = get_output_path(markdown_path, None)
126
149
 
127
150
  if extend_default_css:
128
151
  css_sources = [get_code_css_path(), get_css_path(), css_path]
@@ -132,27 +155,32 @@ def convert(
132
155
  css_sources = drop_duplicates(css_sources)
133
156
 
134
157
  try:
135
- html = markdown2.markdown_path(md_path, extras=MD_EXTENSIONS)
136
- html = _create_sections(html)
158
+ nonce = secrets.token_urlsafe(16)
159
+ html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
160
+ html = create_sections(html)
161
+ html = render_mermaid_diagrams(html, nonce=nonce)
137
162
 
138
163
  _generate_pdf_with_playwright(
139
164
  html,
140
165
  output_path,
141
166
  css_content=_get_css_content(css_sources),
142
- base_dir=Path(md_path).resolve().parent,
167
+ base_dir=Path(markdown_path).resolve().parent,
143
168
  dump_html=dump_html,
169
+ nonce=nonce,
144
170
  )
145
171
 
146
172
  except Exception as exc:
147
173
  raise RuntimeError(exc) from exc
148
174
 
149
175
 
150
- def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css=True):
176
+ def live_convert(
177
+ markdown_path, css_path=None, output_path=None, *, extend_default_css=True
178
+ ):
151
179
  """
152
180
  Convert a markdown file to a pdf file and watch for changes.
153
181
 
154
182
  Args:
155
- md_path (str): Path to the markdown file.
183
+ markdown_path (str): Path to the markdown file.
156
184
  css_path (str=None): Path to the CSS file.
157
185
  output_path (str=None): Path to the output file.
158
186
  extend_default_css (bool=True): Extend the default CSS file.
@@ -161,10 +189,10 @@ def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css
161
189
  css_path = get_css_path()
162
190
 
163
191
  if output_path is None:
164
- output_path = get_output_path(md_path, None)
192
+ output_path = get_output_path(markdown_path, None)
165
193
 
166
194
  live_converter = LiveConverter(
167
- md_path,
195
+ markdown_path,
168
196
  css_path,
169
197
  output_path,
170
198
  extend_default_css=extend_default_css,
@@ -173,12 +201,12 @@ def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css
173
201
  live_converter.observe()
174
202
 
175
203
 
176
- def convert_text(md_text, css_text=None, *, extend_default_css=True):
204
+ def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
177
205
  """
178
206
  Convert markdown text to a pdf file.
179
207
 
180
208
  Args:
181
- md_text (str): Markdown text.
209
+ markdown_text (str): Markdown text.
182
210
  css_text (str=None): CSS text.
183
211
  extend_default_css (bool=True): Extend the default CSS file.
184
212
 
@@ -197,13 +225,16 @@ def convert_text(md_text, css_text=None, *, extend_default_css=True):
197
225
  css_sources = [code_css, css_text]
198
226
 
199
227
  try:
200
- html = markdown2.markdown(md_text, extras=MD_EXTENSIONS)
201
- html = _create_sections(html)
228
+ nonce = secrets.token_urlsafe(16)
229
+ html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
230
+ html = create_sections(html)
231
+ html = render_mermaid_diagrams(html, nonce=nonce)
202
232
 
203
233
  return _generate_pdf_with_playwright(
204
234
  html,
205
235
  None,
206
236
  css_content=_get_css_content(css_sources),
237
+ nonce=nonce,
207
238
  )
208
239
 
209
240
  except Exception as exc:
@@ -215,17 +246,25 @@ class LiveConverter:
215
246
  Class to convert a markdown file to a pdf file and watch for changes.
216
247
  """
217
248
 
218
- def __init__(self, md_path, css_path, output_path, *, extend_default_css=True, loud=False):
249
+ def __init__(
250
+ self,
251
+ markdown_path,
252
+ css_path,
253
+ output_path,
254
+ *,
255
+ extend_default_css=True,
256
+ loud=False,
257
+ ):
219
258
  """
220
259
  Initialize the LiveConverter class.
221
260
 
222
261
  Args:
223
- md_path (str): Path to the markdown file.
262
+ markdown_path (str): Path to the markdown file.
224
263
  css_path (str): Path to the CSS file.
225
264
  output_path (str): Path to the output file.
226
265
  extend_default_css (bool): Extend the default CSS file.
227
266
  """
228
- self.md_path = Path(md_path).absolute()
267
+ self.md_path = Path(markdown_path).absolute()
229
268
  self.css_path = Path(css_path).absolute()
230
269
  self.output_path = output_path
231
270
  self.extend_default_css = extend_default_css
@@ -272,14 +311,17 @@ class LiveConverter:
272
311
  try:
273
312
  while True:
274
313
 
275
- md_modified = self.get_last_modified_date(self.md_path)
314
+ markdown_modified = self.get_last_modified_date(self.md_path)
276
315
  css_modified = self.get_last_modified_date(self.css_path)
277
316
 
278
- if md_modified != self.md_last_modified or css_modified != self.css_last_modified:
317
+ if (
318
+ markdown_modified != self.md_last_modified
319
+ or css_modified != self.css_last_modified
320
+ ):
279
321
 
280
322
  self.write_pdf()
281
323
 
282
- self.md_last_modified = md_modified
324
+ self.md_last_modified = markdown_modified
283
325
  self.css_last_modified = css_modified
284
326
 
285
327
  time.sleep(poll_interval)
@@ -17,28 +17,28 @@ from .constants import BLUE, CYAN, GREEN, YELLOW, OPTIONS, OPTIONS_MODES
17
17
  from .utils import color
18
18
 
19
19
 
20
- def get_output_path(md_path, output_dir=None):
20
+ def get_output_path(markdown_path, output_dir=None):
21
21
  """
22
22
  Get the output path for the pdf file.
23
23
 
24
24
  Args:
25
- md_path (str): The path to the markdown file.
25
+ markdown_path (str): The path to the markdown file.
26
26
  output_dir (str): The output directory.
27
27
 
28
28
  Returns:
29
29
  str: The output path.
30
30
  """
31
- md_path = Path(md_path)
31
+ markdown_path = Path(markdown_path)
32
32
 
33
33
  if output_dir is None:
34
- return md_path.parent / f"{md_path.stem}.pdf"
34
+ return markdown_path.parent / f"{markdown_path.stem}.pdf"
35
35
 
36
36
  output_dir = Path(output_dir)
37
37
 
38
38
  if output_dir.suffix == ".pdf":
39
39
  return output_dir
40
40
 
41
- return output_dir.parent / f"{Path(md_path).stem}.pdf"
41
+ return output_dir.parent / f"{Path(markdown_path).stem}.pdf"
42
42
 
43
43
 
44
44
  def get_css_path():
@@ -76,11 +76,14 @@ def get_usage():
76
76
  f"{color(GREEN, 'markdown-convert')} "
77
77
  f"[{color(YELLOW, OPTIONS[0])}] [{color(BLUE, 'options')}]"
78
78
  )
79
- opt_1 = f"{color(BLUE, OPTIONS[1])}{color(CYAN, '=')}{color(CYAN, '|'.join(OPTIONS_MODES))}"
80
- opt_2 = (
79
+ option_one = (
80
+ f"{color(BLUE, OPTIONS[1])}{color(CYAN, '=')}"
81
+ f"{color(CYAN, '|'.join(OPTIONS_MODES))}"
82
+ )
83
+ option_two = (
81
84
  f"{color(BLUE, OPTIONS[2])}{color(CYAN, '=')}[{color(CYAN, 'css_file_path')}]"
82
85
  )
83
- opt_3 = f"{color(BLUE, OPTIONS[3])}{color(CYAN, '=')}[{color(CYAN, 'output_file_path')}]"
86
+ option_three = f"{color(BLUE, OPTIONS[3])}{color(CYAN, '=')}[{color(CYAN, 'output_file_path')}]"
84
87
 
85
88
  usage = (
86
89
  "\n"
@@ -88,11 +91,11 @@ def get_usage():
88
91
  f" {commd}\n"
89
92
  "\n"
90
93
  "Options:\n"
91
- f" {opt_1}\n"
94
+ f" {option_one}\n"
92
95
  " Convert the markdown file once (default) or live.\n"
93
- f" {opt_2}\n"
96
+ f" {option_two}\n"
94
97
  " Use a custom CSS file.\n"
95
- f" {opt_3}\n"
98
+ f" {option_three}\n"
96
99
  " Specify the output file path.\n"
97
100
  )
98
101
  return usage
@@ -0,0 +1,69 @@
1
+ """
2
+ Module for transforming HTML content.
3
+ """
4
+
5
+ import re
6
+
7
+
8
+ def create_html_document(html_content, css_content, csp):
9
+ """
10
+ Creates a complete HTML document with the given content, CSS, and Content Security Policy.
11
+ Args:
12
+ html_content (str): The HTML content to include in the body.
13
+ css_content (str): The CSS styles to include in the head.
14
+ csp (str): The Content Security Policy string.
15
+ Returns:
16
+ str: A complete HTML document as a string.
17
+ """
18
+ return f"""<!DOCTYPE html>
19
+ <html>
20
+ <head>
21
+ <meta charset="UTF-8">
22
+ <meta http-equiv="Content-Security-Policy" content="{csp}">
23
+ <style>
24
+ {css_content}
25
+ </style>
26
+ </head>
27
+ <body>
28
+ {html_content}
29
+ </body>
30
+ </html>"""
31
+
32
+
33
+ def create_sections(html):
34
+ """
35
+ Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
36
+ using regular expressions.
37
+ Args:
38
+ html (str): HTML content.
39
+ Returns:
40
+ HTML content with sections wrapped in <section> tags.
41
+ """
42
+ pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
43
+
44
+ def wrap_section(match):
45
+ return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
46
+
47
+ return pattern.sub(wrap_section, html)
48
+
49
+
50
+ def render_mermaid_diagrams(html, *, nonce):
51
+ """
52
+ Renders Mermaid diagrams in the HTML content.
53
+
54
+ Args:
55
+ html (str): HTML content.
56
+ Returns:
57
+ str: HTML content with rendered Mermaid diagrams.
58
+ """
59
+ mermaid_script = f"""
60
+ <script type="module" nonce="{nonce}">
61
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
62
+ mermaid.initialize({{ startOnLoad: true }});
63
+ </script>
64
+ """
65
+
66
+ if '<div class="mermaid">' in html:
67
+ html = mermaid_script + html
68
+
69
+ return html
@@ -6,21 +6,21 @@ Author: @julynx
6
6
  from pathlib import Path
7
7
 
8
8
 
9
- def validate_markdown_path(md_path):
9
+ def validate_markdown_path(markdown_path):
10
10
  """
11
11
  Validate the markdown file path.
12
12
 
13
13
  Args:
14
- md_path (str): The path to the markdown file.
14
+ markdown_path (str): The path to the markdown file.
15
15
 
16
16
  Raises:
17
17
  FileNotFoundError: If the file is not found.
18
18
  ValueError: If the file is not a Markdown file.
19
19
  """
20
- if not Path(md_path).is_file():
21
- raise FileNotFoundError(f"File not found: '{md_path}'")
20
+ if not Path(markdown_path).is_file():
21
+ raise FileNotFoundError(f"File not found: '{markdown_path}'")
22
22
 
23
- if not md_path.endswith(".md"):
23
+ if not markdown_path.endswith(".md"):
24
24
  raise ValueError("File must be a Markdown file.")
25
25
 
26
26
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "markdown_convert"
7
- version = "1.2.22"
7
+ version = "1.2.25"
8
8
  description = "Convert Markdown files to PDF from your command line."
9
9
  authors = [
10
10
  { name = "Julio Cabria", email = "juliocabria@tutanota.com" },
@@ -40,6 +40,7 @@ include = [
40
40
  "markdown_convert/modules/__init__.py",
41
41
  "markdown_convert/modules/constants.py",
42
42
  "markdown_convert/modules/convert.py",
43
+ "markdown_convert/modules/transform.py",
43
44
  "markdown_convert/modules/resources.py",
44
45
  "markdown_convert/modules/utils.py",
45
46
  "markdown_convert/modules/validate.py",
@@ -55,7 +56,14 @@ include = [
55
56
  "markdown_convert/modules/__init__.py",
56
57
  "markdown_convert/modules/constants.py",
57
58
  "markdown_convert/modules/convert.py",
59
+ "markdown_convert/modules/transform.py",
58
60
  "markdown_convert/modules/resources.py",
59
61
  "markdown_convert/modules/utils.py",
60
62
  "markdown_convert/modules/validate.py",
61
63
  ]
64
+
65
+ [dependency-groups]
66
+ dev = [
67
+ "black>=25.11.0",
68
+ "pylint>=3.3.9",
69
+ ]