md2pdf-tex 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2pdf.py
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Convert a Markdown file with LaTeX math to a polished PDF file using KaTeX
|
|
3
|
+
for math rendering and Selenium (headless Chrome) for printing.
|
|
4
|
+
|
|
5
|
+
Usage: python3 md2pdf.py <input.md> [output.pdf]
|
|
6
|
+
"""
|
|
7
|
+
import re
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
import base64
|
|
12
|
+
from selenium import webdriver
|
|
13
|
+
from selenium.webdriver.chrome.options import Options
|
|
14
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
15
|
+
|
|
16
|
+
# Execution logic is wrapped in main() at the bottom
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# --- Step 1: Protect math blocks from markdown processing ---
|
|
20
|
+
# Extract display math $$ ... $$ and inline math $ ... $
|
|
21
|
+
# We'll convert markdown manually since we need fine control.
|
|
22
|
+
|
|
23
|
+
def md_to_html(md):
|
|
24
|
+
"""Simple markdown to HTML converter that preserves LaTeX math."""
|
|
25
|
+
lines = md.split('\n')
|
|
26
|
+
html_lines = []
|
|
27
|
+
in_list = False
|
|
28
|
+
in_sublist = False
|
|
29
|
+
i = 0
|
|
30
|
+
|
|
31
|
+
while i < len(lines):
|
|
32
|
+
line = lines[i]
|
|
33
|
+
|
|
34
|
+
# Display math block
|
|
35
|
+
if line.strip().startswith('$$'):
|
|
36
|
+
if in_list:
|
|
37
|
+
if in_sublist:
|
|
38
|
+
html_lines.append('</ul></li>')
|
|
39
|
+
in_sublist = False
|
|
40
|
+
html_lines.append('</ul>')
|
|
41
|
+
in_list = False
|
|
42
|
+
# Collect all lines until closing $$
|
|
43
|
+
math_content = []
|
|
44
|
+
if line.strip() == '$$':
|
|
45
|
+
i += 1
|
|
46
|
+
while i < len(lines) and lines[i].strip() != '$$':
|
|
47
|
+
math_content.append(lines[i])
|
|
48
|
+
i += 1
|
|
49
|
+
i += 1 # skip closing $$
|
|
50
|
+
elif line.strip().endswith('$$') and line.strip() != '$$':
|
|
51
|
+
# Single-line: $$...$$
|
|
52
|
+
inner = line.strip()[2:-2]
|
|
53
|
+
math_content.append(inner)
|
|
54
|
+
i += 1
|
|
55
|
+
else:
|
|
56
|
+
# Opens with $$ but doesn't close on same line
|
|
57
|
+
rest = line.strip()[2:]
|
|
58
|
+
if rest:
|
|
59
|
+
math_content.append(rest)
|
|
60
|
+
i += 1
|
|
61
|
+
while i < len(lines) and not lines[i].strip().endswith('$$'):
|
|
62
|
+
math_content.append(lines[i])
|
|
63
|
+
i += 1
|
|
64
|
+
if i < len(lines):
|
|
65
|
+
last = lines[i].strip()
|
|
66
|
+
if last != '$$':
|
|
67
|
+
math_content.append(last[:-2])
|
|
68
|
+
i += 1
|
|
69
|
+
|
|
70
|
+
latex = '\n'.join(math_content).strip()
|
|
71
|
+
html_lines.append(f'<div class="math-display">$${latex}$$</div>')
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
# Headers
|
|
75
|
+
if line.startswith('###'):
|
|
76
|
+
if in_list:
|
|
77
|
+
if in_sublist:
|
|
78
|
+
html_lines.append('</ul></li>')
|
|
79
|
+
in_sublist = False
|
|
80
|
+
html_lines.append('</ul>')
|
|
81
|
+
in_list = False
|
|
82
|
+
html_lines.append(f'<h3>{process_inline(line.lstrip("#").strip())}</h3>')
|
|
83
|
+
i += 1
|
|
84
|
+
continue
|
|
85
|
+
if line.startswith('##'):
|
|
86
|
+
if in_list:
|
|
87
|
+
if in_sublist:
|
|
88
|
+
html_lines.append('</ul></li>')
|
|
89
|
+
in_sublist = False
|
|
90
|
+
html_lines.append('</ul>')
|
|
91
|
+
in_list = False
|
|
92
|
+
html_lines.append(f'<h2>{process_inline(line.lstrip("#").strip())}</h2>')
|
|
93
|
+
i += 1
|
|
94
|
+
continue
|
|
95
|
+
if line.startswith('#'):
|
|
96
|
+
if in_list:
|
|
97
|
+
if in_sublist:
|
|
98
|
+
html_lines.append('</ul></li>')
|
|
99
|
+
in_sublist = False
|
|
100
|
+
html_lines.append('</ul>')
|
|
101
|
+
in_list = False
|
|
102
|
+
html_lines.append(f'<h1>{process_inline(line.lstrip("#").strip())}</h1>')
|
|
103
|
+
i += 1
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# List items (detect indent level)
|
|
107
|
+
if line.strip().startswith('- '):
|
|
108
|
+
# Determine indent level
|
|
109
|
+
stripped = line.rstrip()
|
|
110
|
+
leading_ws = len(stripped) - len(stripped.lstrip())
|
|
111
|
+
is_sub = leading_ws >= 2 or stripped.startswith('\t-') or stripped.startswith('\t\t-')
|
|
112
|
+
text = line.strip().lstrip('- ').strip()
|
|
113
|
+
|
|
114
|
+
if is_sub:
|
|
115
|
+
if not in_list:
|
|
116
|
+
html_lines.append('<ul>')
|
|
117
|
+
in_list = True
|
|
118
|
+
if not in_sublist:
|
|
119
|
+
html_lines.append('<li><ul>')
|
|
120
|
+
in_sublist = True
|
|
121
|
+
html_lines.append(f'<li>{process_inline(text)}</li>')
|
|
122
|
+
else:
|
|
123
|
+
if in_sublist:
|
|
124
|
+
html_lines.append('</ul></li>')
|
|
125
|
+
in_sublist = False
|
|
126
|
+
if not in_list:
|
|
127
|
+
html_lines.append('<ul>')
|
|
128
|
+
in_list = True
|
|
129
|
+
html_lines.append(f'<li>{process_inline(text)}</li>')
|
|
130
|
+
i += 1
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# Close any open list
|
|
134
|
+
if in_list and line.strip() == '':
|
|
135
|
+
if in_sublist:
|
|
136
|
+
html_lines.append('</ul></li>')
|
|
137
|
+
in_sublist = False
|
|
138
|
+
html_lines.append('</ul>')
|
|
139
|
+
in_list = False
|
|
140
|
+
i += 1
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Empty line
|
|
144
|
+
if line.strip() == '':
|
|
145
|
+
i += 1
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
# Regular paragraph — collect contiguous lines
|
|
149
|
+
para_lines = [line]
|
|
150
|
+
i += 1
|
|
151
|
+
while i < len(lines):
|
|
152
|
+
next_line = lines[i]
|
|
153
|
+
if (next_line.strip() == '' or
|
|
154
|
+
next_line.startswith('#') or
|
|
155
|
+
next_line.strip().startswith('- ') or
|
|
156
|
+
next_line.strip().startswith('$$')):
|
|
157
|
+
break
|
|
158
|
+
para_lines.append(next_line)
|
|
159
|
+
i += 1
|
|
160
|
+
|
|
161
|
+
if in_list:
|
|
162
|
+
if in_sublist:
|
|
163
|
+
html_lines.append('</ul></li>')
|
|
164
|
+
in_sublist = False
|
|
165
|
+
html_lines.append('</ul>')
|
|
166
|
+
in_list = False
|
|
167
|
+
|
|
168
|
+
paragraph = ' '.join(l.strip() for l in para_lines)
|
|
169
|
+
html_lines.append(f'<p>{process_inline(paragraph)}</p>')
|
|
170
|
+
|
|
171
|
+
# Close any remaining open list
|
|
172
|
+
if in_sublist:
|
|
173
|
+
html_lines.append('</ul></li>')
|
|
174
|
+
if in_list:
|
|
175
|
+
html_lines.append('</ul>')
|
|
176
|
+
|
|
177
|
+
return '\n'.join(html_lines)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def process_inline(text):
|
|
181
|
+
"""Process inline markdown: bold, italic, code, inline math."""
|
|
182
|
+
# Protect inline math first — replace with placeholders
|
|
183
|
+
math_parts = []
|
|
184
|
+
def save_math(m):
|
|
185
|
+
math_parts.append(m.group(0))
|
|
186
|
+
return f'%%MATH{len(math_parts)-1}%%'
|
|
187
|
+
|
|
188
|
+
text = re.sub(r'(?<!\$)\$(?!\$)(.+?)\$(?!\$)', save_math, text)
|
|
189
|
+
|
|
190
|
+
# Code
|
|
191
|
+
text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
|
|
192
|
+
# Bold
|
|
193
|
+
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
|
194
|
+
# Italic
|
|
195
|
+
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
|
196
|
+
|
|
197
|
+
# Restore math
|
|
198
|
+
for i, m in enumerate(math_parts):
|
|
199
|
+
text = text.replace(f'%%MATH{i}%%', f'<span class="math-inline">{m}</span>')
|
|
200
|
+
|
|
201
|
+
return text
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def main():
|
|
205
|
+
if len(sys.argv) < 2:
|
|
206
|
+
print("Usage: md2pdf <input.md> [output.pdf]")
|
|
207
|
+
sys.exit(1)
|
|
208
|
+
|
|
209
|
+
INPUT = sys.argv[1]
|
|
210
|
+
if len(sys.argv) >= 3:
|
|
211
|
+
OUTPUT_PDF = sys.argv[2]
|
|
212
|
+
else:
|
|
213
|
+
OUTPUT_PDF = os.path.splitext(os.path.basename(INPUT))[0] + ".pdf"
|
|
214
|
+
|
|
215
|
+
with open(INPUT, "r") as f:
|
|
216
|
+
md_text = f.read()
|
|
217
|
+
|
|
218
|
+
TITLE = os.path.splitext(os.path.basename(INPUT))[0].replace('_', ' ').title()
|
|
219
|
+
|
|
220
|
+
body_html = md_to_html(md_text)
|
|
221
|
+
|
|
222
|
+
full_html = f"""<!DOCTYPE html>
|
|
223
|
+
<html lang="en">
|
|
224
|
+
<head>
|
|
225
|
+
<meta charset="utf-8">
|
|
226
|
+
<title>{TITLE}</title>
|
|
227
|
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.css">
|
|
228
|
+
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.js"></script>
|
|
229
|
+
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/contrib/auto-render.min.js"
|
|
230
|
+
onload="renderMathInElement(document.body, {{
|
|
231
|
+
delimiters: [
|
|
232
|
+
{{left: '$$', right: '$$', display: true}},
|
|
233
|
+
{{left: '$', right: '$', display: false}}
|
|
234
|
+
],
|
|
235
|
+
throwOnError: false
|
|
236
|
+
}});"></script>
|
|
237
|
+
<style>
|
|
238
|
+
@import url('https://fonts.googleapis.com/css2?family=Source+Serif+4:ital,wght@0,400;0,600;0,700;1,400&family=Source+Sans+3:wght@400;600;700&family=Source+Code+Pro:wght@400&display=swap');
|
|
239
|
+
|
|
240
|
+
:root {{
|
|
241
|
+
--text: #1a1a2e;
|
|
242
|
+
--muted: #555;
|
|
243
|
+
--accent: #2d5aa0;
|
|
244
|
+
--border: #d0d7de;
|
|
245
|
+
--bg-code: #f6f8fa;
|
|
246
|
+
}}
|
|
247
|
+
|
|
248
|
+
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
249
|
+
|
|
250
|
+
body {{
|
|
251
|
+
font-family: 'Source Serif 4', Georgia, 'Times New Roman', serif;
|
|
252
|
+
font-size: 11pt;
|
|
253
|
+
line-height: 1.7;
|
|
254
|
+
color: var(--text);
|
|
255
|
+
max-width: 680px;
|
|
256
|
+
margin: 0 auto;
|
|
257
|
+
padding: 48px 32px;
|
|
258
|
+
}}
|
|
259
|
+
|
|
260
|
+
h1 {{
|
|
261
|
+
font-family: 'Source Sans 3', 'Helvetica Neue', Arial, sans-serif;
|
|
262
|
+
font-size: 24pt;
|
|
263
|
+
font-weight: 700;
|
|
264
|
+
margin: 36px 0 12px 0;
|
|
265
|
+
color: var(--text);
|
|
266
|
+
letter-spacing: -0.02em;
|
|
267
|
+
}}
|
|
268
|
+
|
|
269
|
+
h2 {{
|
|
270
|
+
font-family: 'Source Sans 3', 'Helvetica Neue', Arial, sans-serif;
|
|
271
|
+
font-size: 17pt;
|
|
272
|
+
font-weight: 700;
|
|
273
|
+
margin: 32px 0 8px 0;
|
|
274
|
+
padding-bottom: 6px;
|
|
275
|
+
border-bottom: 2px solid var(--border);
|
|
276
|
+
color: var(--text);
|
|
277
|
+
letter-spacing: -0.01em;
|
|
278
|
+
}}
|
|
279
|
+
|
|
280
|
+
h3 {{
|
|
281
|
+
font-family: 'Source Sans 3', 'Helvetica Neue', Arial, sans-serif;
|
|
282
|
+
font-size: 13pt;
|
|
283
|
+
font-weight: 600;
|
|
284
|
+
margin: 24px 0 6px 0;
|
|
285
|
+
color: var(--accent);
|
|
286
|
+
}}
|
|
287
|
+
|
|
288
|
+
p {{
|
|
289
|
+
margin: 10px 0;
|
|
290
|
+
text-align: justify;
|
|
291
|
+
hyphens: auto;
|
|
292
|
+
}}
|
|
293
|
+
|
|
294
|
+
ul {{
|
|
295
|
+
margin: 8px 0;
|
|
296
|
+
padding-left: 24px;
|
|
297
|
+
}}
|
|
298
|
+
|
|
299
|
+
li {{
|
|
300
|
+
margin-bottom: 5px;
|
|
301
|
+
}}
|
|
302
|
+
|
|
303
|
+
li > ul {{
|
|
304
|
+
margin-top: 4px;
|
|
305
|
+
}}
|
|
306
|
+
|
|
307
|
+
strong {{
|
|
308
|
+
font-weight: 600;
|
|
309
|
+
}}
|
|
310
|
+
|
|
311
|
+
code {{
|
|
312
|
+
font-family: 'Source Code Pro', 'Menlo', 'Consolas', monospace;
|
|
313
|
+
font-size: 0.9em;
|
|
314
|
+
background: var(--bg-code);
|
|
315
|
+
padding: 1px 5px;
|
|
316
|
+
border-radius: 3px;
|
|
317
|
+
border: 1px solid #e1e4e8;
|
|
318
|
+
}}
|
|
319
|
+
|
|
320
|
+
.math-display {{
|
|
321
|
+
margin: 20px 0;
|
|
322
|
+
text-align: center;
|
|
323
|
+
overflow-x: auto;
|
|
324
|
+
}}
|
|
325
|
+
|
|
326
|
+
.math-inline .katex {{
|
|
327
|
+
font-size: 1.0em;
|
|
328
|
+
}}
|
|
329
|
+
|
|
330
|
+
.math-display .katex {{
|
|
331
|
+
font-size: 1.15em;
|
|
332
|
+
}}
|
|
333
|
+
|
|
334
|
+
/* Print-specific styles */
|
|
335
|
+
@media print {{
|
|
336
|
+
body {{
|
|
337
|
+
padding: 0;
|
|
338
|
+
max-width: none;
|
|
339
|
+
}}
|
|
340
|
+
h2 {{
|
|
341
|
+
page-break-after: avoid;
|
|
342
|
+
}}
|
|
343
|
+
.math-display {{
|
|
344
|
+
page-break-inside: avoid;
|
|
345
|
+
}}
|
|
346
|
+
}}
|
|
347
|
+
</style>
|
|
348
|
+
</head>
|
|
349
|
+
<body>
|
|
350
|
+
{body_html}
|
|
351
|
+
</body>
|
|
352
|
+
</html>
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
# Write HTML to a temporary file
|
|
356
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, encoding='utf-8') as f:
|
|
357
|
+
f.write(full_html)
|
|
358
|
+
temp_html_path = f.name
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
options = Options()
|
|
362
|
+
options.add_argument('--headless')
|
|
363
|
+
options.add_argument('--no-sandbox')
|
|
364
|
+
options.add_argument('--disable-dev-shm-usage')
|
|
365
|
+
|
|
366
|
+
# Point to playwright-installed chromium in test sandbox
|
|
367
|
+
sandbox_chrome = "/home/agent/.cache/ms-playwright/chromium-1223/chrome-linux64/chrome"
|
|
368
|
+
if os.path.exists(sandbox_chrome):
|
|
369
|
+
options.binary_location = sandbox_chrome
|
|
370
|
+
|
|
371
|
+
# Create driver (Selenium Manager handles chromedriver automatically)
|
|
372
|
+
driver = webdriver.Chrome(options=options)
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
# Load the HTML file
|
|
376
|
+
driver.get(f"file://{os.path.abspath(temp_html_path)}")
|
|
377
|
+
|
|
378
|
+
# Wait for KaTeX to finish rendering
|
|
379
|
+
WebDriverWait(driver, 10).until(
|
|
380
|
+
lambda d: d.execute_script("""
|
|
381
|
+
const mathElements = document.querySelectorAll('.math-display, .math-inline');
|
|
382
|
+
if (mathElements.length === 0) return true;
|
|
383
|
+
return document.querySelectorAll('.katex').length > 0;
|
|
384
|
+
""")
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Print to PDF using CDP
|
|
388
|
+
print_settings = {
|
|
389
|
+
"printBackground": True,
|
|
390
|
+
"paperWidth": 8.27, # A4 width in inches
|
|
391
|
+
"paperHeight": 11.69, # A4 height in inches
|
|
392
|
+
"marginTop": 0.5, # 48px margin in inches (48 / 96 = 0.5)
|
|
393
|
+
"marginBottom": 0.5,
|
|
394
|
+
"marginLeft": 0.5,
|
|
395
|
+
"marginRight": 0.5
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
result = driver.execute_cdp_cmd("Page.printToPDF", print_settings)
|
|
399
|
+
pdf_data = base64.b64decode(result['data'])
|
|
400
|
+
|
|
401
|
+
with open(OUTPUT_PDF, "wb") as pdf_file:
|
|
402
|
+
pdf_file.write(pdf_data)
|
|
403
|
+
|
|
404
|
+
print(f"PDF written to {os.path.abspath(OUTPUT_PDF)}")
|
|
405
|
+
finally:
|
|
406
|
+
driver.quit()
|
|
407
|
+
except Exception as e:
|
|
408
|
+
print(f"Error printing PDF via Selenium: {e}", file=sys.stderr)
|
|
409
|
+
sys.exit(1)
|
|
410
|
+
finally:
|
|
411
|
+
if os.path.exists(temp_html_path):
|
|
412
|
+
os.remove(temp_html_path)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
if __name__ == '__main__':
|
|
416
|
+
main()
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: md2pdf-tex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert Markdown with LaTeX math to PDF using KaTeX and headless Chrome
|
|
5
|
+
Author: Roberto Moura
|
|
6
|
+
Project-URL: Homepage, https://github.com/robertoffmoura/md2pdf
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: selenium
|
|
13
|
+
|
|
14
|
+
# md2pdf
|
|
15
|
+
|
|
16
|
+
A minimalist, high-fidelity Markdown-to-PDF converter written in Python. It parses Markdown files containing LaTeX mathematical formulas, renders them using KaTeX, and outputs a print-perfect PDF using a headless Chrome instance via Selenium.
|
|
17
|
+
|
|
18
|
+
## Features
|
|
19
|
+
|
|
20
|
+
- **High-Fidelity PDF Output**: Employs headless Google Chrome to print the rendered HTML layout to PDF, preserving pagination, fonts, margins, and alignments.
|
|
21
|
+
- **LaTeX Math Support**: Seamlessly renders inline math (`$...$`) and block math (`$$...$$`) via **KaTeX**.
|
|
22
|
+
- **Minimal Dependencies**: Requires only Python, Chrome/Chromium, and the Python `selenium` library.
|
|
23
|
+
- **Polished Typography**: Features premium, print-optimized font pairings (Source Serif 4, Source Sans 3, and Source Code Pro) loaded dynamically from Google Fonts.
|
|
24
|
+
- **Clean Markdown Parser**: A custom regex-based parser that translates standard Markdown elements into clean HTML while protecting LaTeX delimiters from markdown interference.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
### 1. Prerequisites
|
|
31
|
+
- **Python 3.x**
|
|
32
|
+
- **Google Chrome** or **Chromium** browser installed on your system.
|
|
33
|
+
|
|
34
|
+
### 2. Install Python Package
|
|
35
|
+
Install the Python dependencies via `pip`. It is recommended to use a virtual environment:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Create and activate a virtual environment (optional)
|
|
39
|
+
python3 -m venv venv
|
|
40
|
+
source venv/bin/activate
|
|
41
|
+
|
|
42
|
+
# Install Selenium
|
|
43
|
+
pip install selenium
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
*Note: Selenium Manager will automatically locate and download the appropriate driver (`chromedriver`) for your Chrome version. No manual driver setup is needed.*
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
Run the converter from your terminal:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python3 md2pdf.py <input.md> [output.pdf]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
- **`<input.md>`**: The path to your input Markdown file.
|
|
59
|
+
- **`[output.pdf]`** *(Optional)*: The path for the output PDF file. If omitted, it defaults to the input file's name with a `.pdf` extension in the same directory.
|
|
60
|
+
|
|
61
|
+
### Example
|
|
62
|
+
|
|
63
|
+
Given a file `document.md` containing:
|
|
64
|
+
|
|
65
|
+
```markdown
|
|
66
|
+
# Physics Report
|
|
67
|
+
|
|
68
|
+
Let's discuss the Maxwell's equations. In differential form, Faraday's law of induction is:
|
|
69
|
+
|
|
70
|
+
$$\nabla \times \mathbf{E} = -\frac{\partial \mathbf{B}}{\partial t}$$
|
|
71
|
+
|
|
72
|
+
Where:
|
|
73
|
+
- $\mathbf{E}$ is the electric field.
|
|
74
|
+
- $\mathbf{B}$ is the magnetic field.
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Convert it using:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python3 md2pdf.py document.md
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
This generates `document.pdf` with properly formatted headers, bulleted lists, and beautiful, high-resolution mathematical equations.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Supported Markdown Elements
|
|
88
|
+
|
|
89
|
+
- **Headings**: `#` (H1), `##` (H2), and `###` (H3).
|
|
90
|
+
- **Inline Math**: `$ ... $` for inline formulas (e.g. $E = mc^2$).
|
|
91
|
+
- **Block Math**: `$$ ... $$` for centered display equations.
|
|
92
|
+
- **Unordered Lists**: `- item` and indented `- subitem`.
|
|
93
|
+
- **Text Styling**: `**bold**`, `*italic*`, and inline `` `code` `` fragments.
|
|
94
|
+
- **Paragraphs**: Contiguous lines are automatically joined into standard justified paragraphs.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## How It Works
|
|
99
|
+
|
|
100
|
+
1. **Preprocessing**: The script reads the input Markdown, identifies math blocks, and translates markdown elements (headers, lists, styling) to semantic HTML tags.
|
|
101
|
+
2. **HTML Generation**: It constructs a self-contained HTML document loading **KaTeX** stylesheets/scripts and applying typography rules.
|
|
102
|
+
3. **Rendering & Math Processing**: Selenium starts a headless Chrome browser, loads the HTML, and waits for KaTeX's auto-render extension to process the mathematical formatting.
|
|
103
|
+
4. **PDF Printing**: Headless Chrome's print-to-PDF functionality (`Page.printToPDF` via Chrome DevTools Protocol) is triggered with A4 measurements and standard margins to produce a high-fidelity document.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
md2pdf.py,sha256=bP61rxR8Y9ishF3uCg35c26Yeo7S3HamN1pcns3IZSE,10389
|
|
2
|
+
md2pdf_tex-0.1.0.dist-info/METADATA,sha256=b3cSQNYeElIEryQfOktrBzBcG6oHNhqkl0VfUSLKx0w,4041
|
|
3
|
+
md2pdf_tex-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
md2pdf_tex-0.1.0.dist-info/entry_points.txt,sha256=XIFGjuiUXJ4L6dHsDEx0qC0mzoD2FOk4GQSljXl9J0A,39
|
|
5
|
+
md2pdf_tex-0.1.0.dist-info/top_level.txt,sha256=M6w-SJJb4vYpGkCZOC_PLJ47_mldkUsf0HvtDmdhJJ0,7
|
|
6
|
+
md2pdf_tex-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
md2pdf
|