markdown_convert 1.2.16__py3-none-any.whl → 1.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markdown_convert/__main__.py +95 -95
- markdown_convert/code.css +73 -73
- markdown_convert/default.css +223 -373
- markdown_convert/modules/__init__.py +4 -4
- markdown_convert/modules/constants.py +23 -22
- markdown_convert/modules/convert.py +275 -247
- markdown_convert/modules/resources.py +98 -98
- markdown_convert/modules/utils.py +38 -38
- markdown_convert/modules/validate.py +61 -61
- {markdown_convert-1.2.16.dist-info → markdown_convert-1.2.18.dist-info}/METADATA +6 -7
- markdown_convert-1.2.18.dist-info/RECORD +14 -0
- {markdown_convert-1.2.16.dist-info → markdown_convert-1.2.18.dist-info}/WHEEL +1 -1
- {markdown_convert-1.2.16.dist-info → markdown_convert-1.2.18.dist-info}/licenses/LICENSE +339 -339
- markdown_convert-1.2.16.dist-info/RECORD +0 -14
- {markdown_convert-1.2.16.dist-info → markdown_convert-1.2.18.dist-info}/entry_points.txt +0 -0
|
@@ -1,247 +1,275 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module to convert a markdown file to a pdf file.
|
|
3
|
-
Author: @julynx
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import
|
|
8
|
-
import time
|
|
9
|
-
import
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
import
|
|
16
|
-
import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
def
|
|
205
|
-
"""
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
extend_default_css
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
1
|
+
"""
|
|
2
|
+
Module to convert a markdown file to a pdf file.
|
|
3
|
+
Author: @julynx
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import markdown2
|
|
13
|
+
from playwright.sync_api import sync_playwright
|
|
14
|
+
|
|
15
|
+
from .constants import MD_EXTENSIONS
|
|
16
|
+
from .resources import get_code_css_path, get_css_path, get_output_path
|
|
17
|
+
from .utils import drop_duplicates
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _generate_pdf_with_playwright(html_content, output_path, *, css_content=None):
|
|
21
|
+
"""
|
|
22
|
+
Generate a PDF from HTML content using Playwright.
|
|
23
|
+
"""
|
|
24
|
+
with sync_playwright() as p:
|
|
25
|
+
browser = p.chromium.launch(headless=True)
|
|
26
|
+
page = browser.new_page()
|
|
27
|
+
page.set_content(html_content)
|
|
28
|
+
if css_content:
|
|
29
|
+
page.add_style_tag(content=css_content)
|
|
30
|
+
# Wait for any potential resources to load
|
|
31
|
+
page.wait_for_load_state("networkidle")
|
|
32
|
+
|
|
33
|
+
pdf_params = {
|
|
34
|
+
"format": "A4",
|
|
35
|
+
"print_background": True,
|
|
36
|
+
"margin": {
|
|
37
|
+
"top": "20mm",
|
|
38
|
+
"bottom": "20mm",
|
|
39
|
+
"left": "20mm",
|
|
40
|
+
"right": "20mm",
|
|
41
|
+
},
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if output_path:
|
|
45
|
+
page.pdf(path=output_path, **pdf_params)
|
|
46
|
+
browser.close()
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
pdf_bytes = page.pdf(**pdf_params)
|
|
50
|
+
browser.close()
|
|
51
|
+
return pdf_bytes
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_css_content(css_sources):
|
|
55
|
+
"""
|
|
56
|
+
Get the CSS content from a list of CSS file paths.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
css_sources (list): List of CSS file paths.
|
|
60
|
+
Returns:
|
|
61
|
+
str: Combined CSS content.
|
|
62
|
+
"""
|
|
63
|
+
css_buffer = ""
|
|
64
|
+
for css_file in css_sources:
|
|
65
|
+
css_buffer += Path(css_file).read_text(encoding="utf-8") + "\n"
|
|
66
|
+
return css_buffer
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _create_sections(html):
|
|
70
|
+
"""
|
|
71
|
+
Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
|
|
72
|
+
using regular expressions.
|
|
73
|
+
Args:
|
|
74
|
+
html (str): HTML content.
|
|
75
|
+
Returns:
|
|
76
|
+
HTML content with sections wrapped in <section> tags.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
|
|
80
|
+
|
|
81
|
+
def wrap_section(match):
|
|
82
|
+
return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
|
|
83
|
+
|
|
84
|
+
return pattern.sub(wrap_section, html)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def convert(
|
|
88
|
+
md_path,
|
|
89
|
+
css_path=None,
|
|
90
|
+
output_path=None,
|
|
91
|
+
*,
|
|
92
|
+
extend_default_css=True,
|
|
93
|
+
dump_html=False,
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Convert a markdown file to a pdf file.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
md_path (str): Path to the markdown file.
|
|
100
|
+
css_path (str=None): Path to the CSS file.
|
|
101
|
+
output_path (str=None): Path to the output file.
|
|
102
|
+
extend_default_css (bool=True): Extend the default CSS file.
|
|
103
|
+
dump_html (bool=False): Dump the intermediate HTML to a file.
|
|
104
|
+
"""
|
|
105
|
+
if css_path is None:
|
|
106
|
+
css_path = get_css_path()
|
|
107
|
+
|
|
108
|
+
if output_path is None:
|
|
109
|
+
output_path = get_output_path(md_path, None)
|
|
110
|
+
|
|
111
|
+
if extend_default_css:
|
|
112
|
+
css_sources = [get_code_css_path(), get_css_path(), css_path]
|
|
113
|
+
else:
|
|
114
|
+
css_sources = [get_code_css_path(), css_path]
|
|
115
|
+
|
|
116
|
+
css_sources = drop_duplicates(css_sources)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
html = markdown2.markdown_path(md_path, extras=MD_EXTENSIONS)
|
|
120
|
+
html = _create_sections(html)
|
|
121
|
+
|
|
122
|
+
if dump_html:
|
|
123
|
+
html_dump_path = Path(output_path).with_suffix(".html")
|
|
124
|
+
html_dump_path.write_text(html, encoding="utf-8")
|
|
125
|
+
|
|
126
|
+
_generate_pdf_with_playwright(
|
|
127
|
+
html,
|
|
128
|
+
output_path,
|
|
129
|
+
css_content=_get_css_content(css_sources),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
except Exception as exc:
|
|
133
|
+
raise RuntimeError(exc) from exc
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def live_convert(md_path, css_path=None, output_path=None, *, extend_default_css=True):
|
|
137
|
+
"""
|
|
138
|
+
Convert a markdown file to a pdf file and watch for changes.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
md_path (str): Path to the markdown file.
|
|
142
|
+
css_path (str=None): Path to the CSS file.
|
|
143
|
+
output_path (str=None): Path to the output file.
|
|
144
|
+
extend_default_css (bool=True): Extend the default CSS file.
|
|
145
|
+
"""
|
|
146
|
+
if css_path is None:
|
|
147
|
+
css_path = get_css_path()
|
|
148
|
+
|
|
149
|
+
if output_path is None:
|
|
150
|
+
output_path = get_output_path(md_path, None)
|
|
151
|
+
|
|
152
|
+
live_converter = LiveConverter(
|
|
153
|
+
md_path,
|
|
154
|
+
css_path,
|
|
155
|
+
output_path,
|
|
156
|
+
extend_default_css=extend_default_css,
|
|
157
|
+
loud=True,
|
|
158
|
+
)
|
|
159
|
+
live_converter.observe()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def convert_text(md_text, css_text=None, *, extend_default_css=True):
|
|
163
|
+
"""
|
|
164
|
+
Convert markdown text to a pdf file.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
md_text (str): Markdown text.
|
|
168
|
+
css_text (str=None): CSS text.
|
|
169
|
+
extend_default_css (bool=True): Extend the default CSS file.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
PDF file as bytes.
|
|
173
|
+
"""
|
|
174
|
+
default_css = Path(get_css_path()).read_text(encoding="utf-8")
|
|
175
|
+
code_css = Path(get_code_css_path()).read_text(encoding="utf-8")
|
|
176
|
+
|
|
177
|
+
if css_text is None:
|
|
178
|
+
css_text = default_css
|
|
179
|
+
|
|
180
|
+
if extend_default_css:
|
|
181
|
+
css_sources = [code_css, default_css, css_text]
|
|
182
|
+
else:
|
|
183
|
+
css_sources = [code_css, css_text]
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
html = markdown2.markdown(md_text, extras=MD_EXTENSIONS)
|
|
187
|
+
html = _create_sections(html)
|
|
188
|
+
|
|
189
|
+
return _generate_pdf_with_playwright(
|
|
190
|
+
html,
|
|
191
|
+
None,
|
|
192
|
+
css_content=_get_css_content(css_sources),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
except Exception as exc:
|
|
196
|
+
raise RuntimeError(exc) from exc
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class LiveConverter:
|
|
200
|
+
"""
|
|
201
|
+
Class to convert a markdown file to a pdf file and watch for changes.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(self, md_path, css_path, output_path, *, extend_default_css=True, loud=False):
|
|
205
|
+
"""
|
|
206
|
+
Initialize the LiveConverter class.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
md_path (str): Path to the markdown file.
|
|
210
|
+
css_path (str): Path to the CSS file.
|
|
211
|
+
output_path (str): Path to the output file.
|
|
212
|
+
extend_default_css (bool): Extend the default CSS file.
|
|
213
|
+
"""
|
|
214
|
+
self.md_path = Path(md_path).absolute()
|
|
215
|
+
self.css_path = Path(css_path).absolute()
|
|
216
|
+
self.output_path = output_path
|
|
217
|
+
self.extend_default_css = extend_default_css
|
|
218
|
+
self.loud = loud
|
|
219
|
+
|
|
220
|
+
self.md_last_modified = None
|
|
221
|
+
self.css_last_modified = None
|
|
222
|
+
|
|
223
|
+
def get_last_modified_date(self, file_path):
|
|
224
|
+
"""
|
|
225
|
+
Get the last modified date of a file.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
file_path (str): Path to the file.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Last modified date of the file.
|
|
232
|
+
"""
|
|
233
|
+
return os.path.getmtime(file_path)
|
|
234
|
+
|
|
235
|
+
def write_pdf(self):
|
|
236
|
+
"""
|
|
237
|
+
Write the pdf file.
|
|
238
|
+
"""
|
|
239
|
+
convert(
|
|
240
|
+
self.md_path,
|
|
241
|
+
self.css_path,
|
|
242
|
+
self.output_path,
|
|
243
|
+
extend_default_css=self.extend_default_css,
|
|
244
|
+
)
|
|
245
|
+
if self.loud:
|
|
246
|
+
print(f"- PDF file updated: {datetime.now()}", flush=True)
|
|
247
|
+
|
|
248
|
+
def observe(self, poll_interval=1):
|
|
249
|
+
"""
|
|
250
|
+
Observe the markdown and CSS files. Calls write_pdf() when a file is
|
|
251
|
+
modified.
|
|
252
|
+
"""
|
|
253
|
+
self.write_pdf()
|
|
254
|
+
|
|
255
|
+
self.md_last_modified = self.get_last_modified_date(self.md_path)
|
|
256
|
+
self.css_last_modified = self.get_last_modified_date(self.css_path)
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
while True:
|
|
260
|
+
|
|
261
|
+
md_modified = self.get_last_modified_date(self.md_path)
|
|
262
|
+
css_modified = self.get_last_modified_date(self.css_path)
|
|
263
|
+
|
|
264
|
+
if md_modified != self.md_last_modified or css_modified != self.css_last_modified:
|
|
265
|
+
|
|
266
|
+
self.write_pdf()
|
|
267
|
+
|
|
268
|
+
self.md_last_modified = md_modified
|
|
269
|
+
self.css_last_modified = css_modified
|
|
270
|
+
|
|
271
|
+
time.sleep(poll_interval)
|
|
272
|
+
|
|
273
|
+
except KeyboardInterrupt:
|
|
274
|
+
if self.loud:
|
|
275
|
+
print("\nInterrupted by user.\n", flush=True)
|