markdown_convert 1.2.16__py3-none-any.whl → 1.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,247 +1,275 @@
1
- """
2
- Module to convert a markdown file to a pdf file.
3
- Author: @julynx
4
- """
5
-
6
- import os
7
- import sys
8
- import time
9
- import warnings
10
- from contextlib import redirect_stderr, redirect_stdout
11
- from datetime import datetime
12
- from io import StringIO
13
- from pathlib import Path
14
-
15
- import markdown2
16
- import weasyprint
17
-
18
- from .resources import get_css_path, get_code_css_path, get_output_path
19
- from .utils import drop_duplicates
20
- from .constants import MD_EXTENSIONS
21
-
22
-
23
- def _suppress_warnings():
24
- """
25
- Suppress all warnings in production while preserving critical error handling.
26
- Only errors and exceptions will be shown.
27
- """
28
- # Suppress all warnings but keep errors
29
- warnings.filterwarnings("ignore", category=UserWarning)
30
- warnings.filterwarnings("ignore", category=DeprecationWarning)
31
- warnings.filterwarnings("ignore", category=FutureWarning)
32
- warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
33
- warnings.filterwarnings("ignore", category=ImportWarning)
34
- warnings.filterwarnings("ignore", category=ResourceWarning)
35
-
36
-
37
- def _silent_pdf_generation(func, *args, **kwargs):
38
- """
39
- Execute PDF generation function while suppressing all non-critical output.
40
- Preserves exceptions and critical errors.
41
- """
42
- _suppress_warnings()
43
-
44
- # Capture stdout and stderr to filter out warnings
45
- stdout_capture = StringIO()
46
- stderr_capture = StringIO()
47
-
48
- try:
49
- with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
50
- result = func(*args, **kwargs)
51
-
52
- # Check if there were any critical errors in stderr
53
- stderr_content = stderr_capture.getvalue()
54
- if stderr_content and any(
55
- keyword in stderr_content.lower()
56
- for keyword in ["error", "exception", "traceback", "failed"]
57
- ):
58
- # Print only critical errors, not warnings
59
- print(stderr_content, file=sys.stderr)
60
-
61
- return result
62
-
63
- except Exception as exc:
64
- # Always re-raise actual exceptions
65
- raise exc
66
-
67
-
68
- def convert(md_path, css_path=None, output_path=None, *, extend_default_css=True):
69
- """
70
- Convert a markdown file to a pdf file.
71
-
72
- Args:
73
- md_path (str): Path to the markdown file.
74
- css_path (str=None): Path to the CSS file.
75
- output_path (str=None): Path to the output file.
76
- extend_default_css (bool=True): Extend the default CSS file.
77
- """
78
- if css_path is None:
79
- css_path = get_css_path()
80
-
81
- if output_path is None:
82
- output_path = get_output_path(md_path, None)
83
-
84
- if extend_default_css:
85
- css_sources = [get_code_css_path(), get_css_path(), css_path]
86
- else:
87
- css_sources = [get_code_css_path(), css_path]
88
-
89
- css_sources = drop_duplicates(css_sources)
90
-
91
- try:
92
- html = markdown2.markdown_path(md_path, extras=MD_EXTENSIONS)
93
-
94
- # Use silent PDF generation to suppress warnings
95
- _silent_pdf_generation(
96
- lambda: weasyprint.HTML(string=html, base_url=".").write_pdf(
97
- target=output_path, stylesheets=list(css_sources)
98
- )
99
- )
100
-
101
- except Exception as exc:
102
- raise RuntimeError(exc) from exc
103
-
104
-
105
- def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css=True):
106
- """
107
- Convert a markdown file to a pdf file and watch for changes.
108
-
109
- Args:
110
- md_path (str): Path to the markdown file.
111
- css_path (str=None): Path to the CSS file.
112
- output_path (str=None): Path to the output file.
113
- extend_default_css (bool=True): Extend the default CSS file.
114
- """
115
- if css_path is None:
116
- css_path = get_css_path()
117
-
118
- if output_path is None:
119
- output_path = get_output_path(md_path, None)
120
-
121
- live_converter = LiveConverter(
122
- md_path, css_path, output_path, extend_default_css=extend_default_css, loud=True
123
- )
124
- live_converter.observe()
125
-
126
-
127
- def convert_text(md_text, css_text=None, *, extend_default_css=True):
128
- """
129
- Convert markdown text to a pdf file.
130
-
131
- Args:
132
- md_text (str): Markdown text.
133
- css_text (str=None): CSS text.
134
- extend_default_css (bool=True): Extend the default CSS file.
135
-
136
- Returns:
137
- PDF file as bytes.
138
- """
139
- default_css = Path(get_css_path()).read_text(encoding="utf-8")
140
- code_css = Path(get_code_css_path()).read_text(encoding="utf-8")
141
-
142
- if css_text is None:
143
- css_text = default_css
144
-
145
- if extend_default_css:
146
- css_sources = [code_css, default_css, css_text]
147
- else:
148
- css_sources = [code_css, css_text]
149
-
150
- css_sources = [weasyprint.CSS(string=css) for css in drop_duplicates(css_sources)]
151
-
152
- try:
153
- html = markdown2.markdown(md_text, extras=MD_EXTENSIONS)
154
-
155
- # Use silent PDF generation to suppress warnings
156
- return _silent_pdf_generation(
157
- lambda: weasyprint.HTML(string=html, base_url=".").write_pdf(
158
- stylesheets=css_sources
159
- )
160
- )
161
-
162
- except Exception as exc:
163
- raise RuntimeError(exc) from exc
164
-
165
-
166
- class LiveConverter:
167
- """
168
- Class to convert a markdown file to a pdf file and watch for changes.
169
- """
170
-
171
- def __init__(
172
- self, md_path, css_path, output_path, *, extend_default_css=True, loud=False
173
- ):
174
- """
175
- Initialize the LiveConverter class.
176
-
177
- Args:
178
- md_path (str): Path to the markdown file.
179
- css_path (str): Path to the CSS file.
180
- output_path (str): Path to the output file.
181
- extend_default_css (bool): Extend the default CSS file.
182
- """
183
- self.md_path = Path(md_path).absolute()
184
- self.css_path = Path(css_path).absolute()
185
- self.output_path = output_path
186
- self.extend_default_css = extend_default_css
187
- self.loud = loud
188
-
189
- self.md_last_modified = None
190
- self.css_last_modified = None
191
-
192
- def get_last_modified_date(self, file_path):
193
- """
194
- Get the last modified date of a file.
195
-
196
- Args:
197
- file_path (str): Path to the file.
198
-
199
- Returns:
200
- Last modified date of the file.
201
- """
202
- return os.path.getmtime(file_path)
203
-
204
- def write_pdf(self):
205
- """
206
- Write the pdf file.
207
- """
208
- convert(
209
- self.md_path,
210
- self.css_path,
211
- self.output_path,
212
- extend_default_css=self.extend_default_css,
213
- )
214
- if self.loud:
215
- print(f"- PDF file updated: {datetime.now()}", flush=True)
216
-
217
- def observe(self, poll_interval=1):
218
- """
219
- Observe the markdown and CSS files. Calls write_pdf() when a file is
220
- modified.
221
- """
222
- self.write_pdf()
223
-
224
- self.md_last_modified = self.get_last_modified_date(self.md_path)
225
- self.css_last_modified = self.get_last_modified_date(self.css_path)
226
-
227
- try:
228
- while True:
229
-
230
- md_modified = self.get_last_modified_date(self.md_path)
231
- css_modified = self.get_last_modified_date(self.css_path)
232
-
233
- if (
234
- md_modified != self.md_last_modified
235
- or css_modified != self.css_last_modified
236
- ):
237
-
238
- self.write_pdf()
239
-
240
- self.md_last_modified = md_modified
241
- self.css_last_modified = css_modified
242
-
243
- time.sleep(poll_interval)
244
-
245
- except KeyboardInterrupt:
246
- if self.loud:
247
- print("\nInterrupted by user.\n", flush=True)
1
+ """
2
+ Module to convert a markdown file to a pdf file.
3
+ Author: @julynx
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import time
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ import markdown2
13
+ from playwright.sync_api import sync_playwright
14
+
15
+ from .constants import MD_EXTENSIONS
16
+ from .resources import get_code_css_path, get_css_path, get_output_path
17
+ from .utils import drop_duplicates
18
+
19
+
20
+ def _generate_pdf_with_playwright(html_content, output_path, *, css_content=None):
21
+ """
22
+ Generate a PDF from HTML content using Playwright.
23
+ """
24
+ with sync_playwright() as p:
25
+ browser = p.chromium.launch(headless=True)
26
+ page = browser.new_page()
27
+ page.set_content(html_content)
28
+ if css_content:
29
+ page.add_style_tag(content=css_content)
30
+ # Wait for any potential resources to load
31
+ page.wait_for_load_state("networkidle")
32
+
33
+ pdf_params = {
34
+ "format": "A4",
35
+ "print_background": True,
36
+ "margin": {
37
+ "top": "20mm",
38
+ "bottom": "20mm",
39
+ "left": "20mm",
40
+ "right": "20mm",
41
+ },
42
+ }
43
+
44
+ if output_path:
45
+ page.pdf(path=output_path, **pdf_params)
46
+ browser.close()
47
+ return None
48
+
49
+ pdf_bytes = page.pdf(**pdf_params)
50
+ browser.close()
51
+ return pdf_bytes
52
+
53
+
54
+ def _get_css_content(css_sources):
55
+ """
56
+ Get the CSS content from a list of CSS file paths.
57
+
58
+ Args:
59
+ css_sources (list): List of CSS file paths.
60
+ Returns:
61
+ str: Combined CSS content.
62
+ """
63
+ css_buffer = ""
64
+ for css_file in css_sources:
65
+ css_buffer += Path(css_file).read_text(encoding="utf-8") + "\n"
66
+ return css_buffer
67
+
68
+
69
+ def _create_sections(html):
70
+ """
71
+ Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
72
+ using regular expressions.
73
+ Args:
74
+ html (str): HTML content.
75
+ Returns:
76
+ HTML content with sections wrapped in <section> tags.
77
+ """
78
+
79
+ pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
80
+
81
+ def wrap_section(match):
82
+ return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
83
+
84
+ return pattern.sub(wrap_section, html)
85
+
86
+
87
+ def convert(
88
+ md_path,
89
+ css_path=None,
90
+ output_path=None,
91
+ *,
92
+ extend_default_css=True,
93
+ dump_html=False,
94
+ ):
95
+ """
96
+ Convert a markdown file to a pdf file.
97
+
98
+ Args:
99
+ md_path (str): Path to the markdown file.
100
+ css_path (str=None): Path to the CSS file.
101
+ output_path (str=None): Path to the output file.
102
+ extend_default_css (bool=True): Extend the default CSS file.
103
+ dump_html (bool=False): Dump the intermediate HTML to a file.
104
+ """
105
+ if css_path is None:
106
+ css_path = get_css_path()
107
+
108
+ if output_path is None:
109
+ output_path = get_output_path(md_path, None)
110
+
111
+ if extend_default_css:
112
+ css_sources = [get_code_css_path(), get_css_path(), css_path]
113
+ else:
114
+ css_sources = [get_code_css_path(), css_path]
115
+
116
+ css_sources = drop_duplicates(css_sources)
117
+
118
+ try:
119
+ html = markdown2.markdown_path(md_path, extras=MD_EXTENSIONS)
120
+ html = _create_sections(html)
121
+
122
+ if dump_html:
123
+ html_dump_path = Path(output_path).with_suffix(".html")
124
+ html_dump_path.write_text(html, encoding="utf-8")
125
+
126
+ _generate_pdf_with_playwright(
127
+ html,
128
+ output_path,
129
+ css_content=_get_css_content(css_sources),
130
+ )
131
+
132
+ except Exception as exc:
133
+ raise RuntimeError(exc) from exc
134
+
135
+
136
+ def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css=True):
137
+ """
138
+ Convert a markdown file to a pdf file and watch for changes.
139
+
140
+ Args:
141
+ md_path (str): Path to the markdown file.
142
+ css_path (str=None): Path to the CSS file.
143
+ output_path (str=None): Path to the output file.
144
+ extend_default_css (bool=True): Extend the default CSS file.
145
+ """
146
+ if css_path is None:
147
+ css_path = get_css_path()
148
+
149
+ if output_path is None:
150
+ output_path = get_output_path(md_path, None)
151
+
152
+ live_converter = LiveConverter(
153
+ md_path,
154
+ css_path,
155
+ output_path,
156
+ extend_default_css=extend_default_css,
157
+ loud=True,
158
+ )
159
+ live_converter.observe()
160
+
161
+
162
+ def convert_text(md_text, css_text=None, *, extend_default_css=True):
163
+ """
164
+ Convert markdown text to a pdf file.
165
+
166
+ Args:
167
+ md_text (str): Markdown text.
168
+ css_text (str=None): CSS text.
169
+ extend_default_css (bool=True): Extend the default CSS file.
170
+
171
+ Returns:
172
+ PDF file as bytes.
173
+ """
174
+ default_css = Path(get_css_path()).read_text(encoding="utf-8")
175
+ code_css = Path(get_code_css_path()).read_text(encoding="utf-8")
176
+
177
+ if css_text is None:
178
+ css_text = default_css
179
+
180
+ if extend_default_css:
181
+ css_sources = [code_css, default_css, css_text]
182
+ else:
183
+ css_sources = [code_css, css_text]
184
+
185
+ try:
186
+ html = markdown2.markdown(md_text, extras=MD_EXTENSIONS)
187
+ html = _create_sections(html)
188
+
189
+ return _generate_pdf_with_playwright(
190
+ html,
191
+ None,
192
+ css_content=_get_css_content(css_sources),
193
+ )
194
+
195
+ except Exception as exc:
196
+ raise RuntimeError(exc) from exc
197
+
198
+
199
+ class LiveConverter:
200
+ """
201
+ Class to convert a markdown file to a pdf file and watch for changes.
202
+ """
203
+
204
+ def __init__(self, md_path, css_path, output_path, *, extend_default_css=True, loud=False):
205
+ """
206
+ Initialize the LiveConverter class.
207
+
208
+ Args:
209
+ md_path (str): Path to the markdown file.
210
+ css_path (str): Path to the CSS file.
211
+ output_path (str): Path to the output file.
212
+ extend_default_css (bool): Extend the default CSS file.
213
+ """
214
+ self.md_path = Path(md_path).absolute()
215
+ self.css_path = Path(css_path).absolute()
216
+ self.output_path = output_path
217
+ self.extend_default_css = extend_default_css
218
+ self.loud = loud
219
+
220
+ self.md_last_modified = None
221
+ self.css_last_modified = None
222
+
223
+ def get_last_modified_date(self, file_path):
224
+ """
225
+ Get the last modified date of a file.
226
+
227
+ Args:
228
+ file_path (str): Path to the file.
229
+
230
+ Returns:
231
+ Last modified date of the file.
232
+ """
233
+ return os.path.getmtime(file_path)
234
+
235
+ def write_pdf(self):
236
+ """
237
+ Write the pdf file.
238
+ """
239
+ convert(
240
+ self.md_path,
241
+ self.css_path,
242
+ self.output_path,
243
+ extend_default_css=self.extend_default_css,
244
+ )
245
+ if self.loud:
246
+ print(f"- PDF file updated: {datetime.now()}", flush=True)
247
+
248
+ def observe(self, poll_interval=1):
249
+ """
250
+ Observe the markdown and CSS files. Calls write_pdf() when a file is
251
+ modified.
252
+ """
253
+ self.write_pdf()
254
+
255
+ self.md_last_modified = self.get_last_modified_date(self.md_path)
256
+ self.css_last_modified = self.get_last_modified_date(self.css_path)
257
+
258
+ try:
259
+ while True:
260
+
261
+ md_modified = self.get_last_modified_date(self.md_path)
262
+ css_modified = self.get_last_modified_date(self.css_path)
263
+
264
+ if md_modified != self.md_last_modified or css_modified != self.css_last_modified:
265
+
266
+ self.write_pdf()
267
+
268
+ self.md_last_modified = md_modified
269
+ self.css_last_modified = css_modified
270
+
271
+ time.sleep(poll_interval)
272
+
273
+ except KeyboardInterrupt:
274
+ if self.loud:
275
+ print("\nInterrupted by user.\n", flush=True)