@joaodotwork/md-2-pdf 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -0
- package/bin/index.js +24 -0
- package/md_to_pdf.py +295 -0
- package/package.json +28 -0
- package/requirements.txt +21 -0
package/README.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Markdown to PDF Converter with Mermaid Support
|
|
2
|
+
|
|
3
|
+
A Python utility to convert Markdown files to PDF while properly rendering Mermaid diagrams.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Converts Markdown files to PDF
|
|
8
|
+
- Properly renders Mermaid diagrams as images in the PDF
|
|
9
|
+
- Processes individual files or entire directories
|
|
10
|
+
- Customizable output and temporary directories
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
|
|
14
|
+
- Python 3.6+
|
|
15
|
+
- Node.js and npm (for Mermaid CLI)
|
|
16
|
+
- Pandoc (for Markdown to PDF conversion)
|
|
17
|
+
- LaTeX (XeLaTeX for PDF generation)
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
1. Ensure you have Python 3.6+ installed
|
|
22
|
+
2. Install Node.js and npm
|
|
23
|
+
3. Install Pandoc: https://pandoc.org/installing.html
|
|
24
|
+
4. Install a LaTeX distribution like TeX Live or MiKTeX
|
|
25
|
+
5. Clone this repository or download the script
|
|
26
|
+
|
|
27
|
+
The script will automatically install the Mermaid CLI (@mermaid-js/mermaid-cli) if needed.
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
### Basic Usage
|
|
32
|
+
|
|
33
|
+
Convert a single Markdown file to PDF:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
python md_to_pdf.py path/to/file.md
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Convert all Markdown files in a directory:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
python md_to_pdf.py path/to/directory
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Advanced Options
|
|
46
|
+
|
|
47
|
+
Specify output file or directory:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
python md_to_pdf.py path/to/input.md -o path/to/output.pdf
|
|
51
|
+
python md_to_pdf.py path/to/input_dir -o path/to/output_dir
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Specify custom temporary directory:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python md_to_pdf.py path/to/input.md -t path/to/temp_dir
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Check if all dependencies are installed:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
python md_to_pdf.py --check-only
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## How It Works
|
|
67
|
+
|
|
68
|
+
1. The script reads the Markdown file and extracts Mermaid diagram code blocks
|
|
69
|
+
2. For each Mermaid diagram, it:
|
|
70
|
+
- Saves the diagram code to a temporary file
|
|
71
|
+
- Uses Mermaid CLI to render the diagram as an image
|
|
72
|
+
- Replaces the Mermaid code block with an image reference
|
|
73
|
+
3. The modified Markdown with image references is saved to a temporary file
|
|
74
|
+
4. Pandoc converts the modified Markdown to PDF using XeLaTeX
|
|
75
|
+
|
|
76
|
+
## Example
|
|
77
|
+
|
|
78
|
+
Input Markdown:
|
|
79
|
+
|
|
80
|
+
````markdown
|
|
81
|
+
# Sample Document
|
|
82
|
+
|
|
83
|
+
Here's a diagram:
|
|
84
|
+
|
|
85
|
+
```mermaid
|
|
86
|
+
flowchart TD
|
|
87
|
+
A[Start] --> B{Decision}
|
|
88
|
+
B -->|Yes| C[Process 1]
|
|
89
|
+
B -->|No| D[Process 2]
|
|
90
|
+
C --> E[End]
|
|
91
|
+
D --> E
|
|
92
|
+
```
|
|
93
|
+
````
|
|
94
|
+
|
|
95
|
+
This will be converted to a PDF with the diagram properly rendered.
|
package/bin/index.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { spawn } = require('child_process');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
// Path to the python script
|
|
7
|
+
const scriptPath = path.join(__dirname, '../md_to_pdf.py');
|
|
8
|
+
|
|
9
|
+
// Forward all arguments passed to this script
|
|
10
|
+
const args = [scriptPath, ...process.argv.slice(2)];
|
|
11
|
+
|
|
12
|
+
// Spawn the python process
|
|
13
|
+
// We use 'inherit' to preserve colors and output streaming
|
|
14
|
+
const pythonProcess = spawn('python3', args, { stdio: 'inherit' });
|
|
15
|
+
|
|
16
|
+
pythonProcess.on('error', (err) => {
|
|
17
|
+
console.error('Failed to start python process:', err);
|
|
18
|
+
console.error('Make sure python3 is installed and available in your PATH.');
|
|
19
|
+
process.exit(1);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
pythonProcess.on('close', (code) => {
|
|
23
|
+
process.exit(code);
|
|
24
|
+
});
|
package/md_to_pdf.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Markdown to PDF converter with Mermaid diagram support
|
|
4
|
+
|
|
5
|
+
This script converts markdown files to PDF while properly rendering Mermaid diagrams.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
import tempfile
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def find_mermaid_blocks(markdown_content: str) -> List[Tuple[str, str]]:
|
|
18
|
+
"""Extract mermaid blocks from markdown content.
|
|
19
|
+
|
|
20
|
+
Returns a list of tuples, each containing:
|
|
21
|
+
- The full match (including ```mermaid delimiters)
|
|
22
|
+
- The mermaid diagram code only
|
|
23
|
+
"""
|
|
24
|
+
pattern = r"```mermaid\n(.*?)```"
|
|
25
|
+
matches = re.finditer(pattern, markdown_content, re.DOTALL)
|
|
26
|
+
return [(match.group(0), match.group(1).strip()) for match in matches]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def render_mermaid_diagram(mermaid_code: str, output_path: str) -> bool:
|
|
30
|
+
"""Render a mermaid diagram to an image file using mmdc CLI.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
mermaid_code: The mermaid diagram code
|
|
34
|
+
output_path: Path to save the rendered image
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
True if successful, False otherwise
|
|
38
|
+
"""
|
|
39
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.mmd', delete=False) as temp_file:
|
|
40
|
+
temp_file_path = temp_file.name
|
|
41
|
+
temp_file.write(mermaid_code)
|
|
42
|
+
|
|
43
|
+
# Try local node_modules first, then global mmdc
|
|
44
|
+
mmdc_path = os.path.join(os.getcwd(), 'node_modules', '.bin', 'mmdc')
|
|
45
|
+
if not os.path.exists(mmdc_path):
|
|
46
|
+
mmdc_path = 'mmdc'
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
subprocess.run([
|
|
50
|
+
mmdc_path,
|
|
51
|
+
'-i', temp_file_path,
|
|
52
|
+
'-o', output_path,
|
|
53
|
+
'-b', 'transparent'
|
|
54
|
+
], check=True, capture_output=True)
|
|
55
|
+
return True
|
|
56
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
57
|
+
print(f"Error rendering mermaid diagram: {e}")
|
|
58
|
+
if hasattr(e, 'stdout'):
|
|
59
|
+
print(f"stdout: {e.stdout.decode()}")
|
|
60
|
+
if hasattr(e, 'stderr'):
|
|
61
|
+
print(f"stderr: {e.stderr.decode()}")
|
|
62
|
+
return False
|
|
63
|
+
finally:
|
|
64
|
+
os.unlink(temp_file_path)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def replace_mermaid_with_images(
|
|
68
|
+
markdown_content: str,
|
|
69
|
+
mermaid_blocks: List[Tuple[str, str]],
|
|
70
|
+
output_dir: str,
|
|
71
|
+
base_filename: str
|
|
72
|
+
) -> str:
|
|
73
|
+
"""Replace mermaid code blocks with image references in the markdown.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
markdown_content: The original markdown content
|
|
77
|
+
mermaid_blocks: List of mermaid blocks extracted from the content
|
|
78
|
+
output_dir: Directory to save rendered images
|
|
79
|
+
base_filename: Base name for generated image files
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Updated markdown content with image references
|
|
83
|
+
"""
|
|
84
|
+
updated_content = markdown_content
|
|
85
|
+
|
|
86
|
+
for i, (full_match, mermaid_code) in enumerate(mermaid_blocks):
|
|
87
|
+
image_filename = f"{base_filename}_diagram_{i}.png"
|
|
88
|
+
image_path = os.path.join(output_dir, image_filename)
|
|
89
|
+
|
|
90
|
+
if render_mermaid_diagram(mermaid_code, image_path):
|
|
91
|
+
# Replace the mermaid block with an image reference
|
|
92
|
+
image_ref = f"\n\n\n\n"
|
|
93
|
+
updated_content = updated_content.replace(full_match, image_ref)
|
|
94
|
+
|
|
95
|
+
return updated_content
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def convert_markdown_to_pdf(
|
|
99
|
+
input_path: str,
|
|
100
|
+
output_path: Optional[str] = None,
|
|
101
|
+
temp_dir: Optional[str] = None
|
|
102
|
+
) -> bool:
|
|
103
|
+
"""Convert a markdown file to PDF, rendering any mermaid diagrams.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
input_path: Path to the input markdown file
|
|
107
|
+
output_path: Path for the output PDF file (optional)
|
|
108
|
+
temp_dir: Directory to store temporary files (optional)
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
True if conversion was successful, False otherwise
|
|
112
|
+
"""
|
|
113
|
+
input_file = Path(input_path)
|
|
114
|
+
|
|
115
|
+
# Determine output path if not provided
|
|
116
|
+
if not output_path:
|
|
117
|
+
output_path = str(input_file.with_suffix('.pdf'))
|
|
118
|
+
|
|
119
|
+
# Create temp directory if not provided
|
|
120
|
+
if not temp_dir:
|
|
121
|
+
temp_dir = tempfile.mkdtemp()
|
|
122
|
+
else:
|
|
123
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
# Read the markdown content
|
|
126
|
+
with open(input_path, 'r') as f:
|
|
127
|
+
content = f.read()
|
|
128
|
+
|
|
129
|
+
# Find and extract mermaid blocks
|
|
130
|
+
mermaid_blocks = find_mermaid_blocks(content)
|
|
131
|
+
|
|
132
|
+
if mermaid_blocks:
|
|
133
|
+
# Replace mermaid blocks with image references
|
|
134
|
+
base_filename = input_file.stem
|
|
135
|
+
updated_content = replace_mermaid_with_images(content, mermaid_blocks, temp_dir, base_filename)
|
|
136
|
+
|
|
137
|
+
# Write updated markdown to temporary file
|
|
138
|
+
temp_md_path = os.path.join(temp_dir, f"{base_filename}_processed.md")
|
|
139
|
+
with open(temp_md_path, 'w') as f:
|
|
140
|
+
f.write(updated_content)
|
|
141
|
+
|
|
142
|
+
input_for_pandoc = temp_md_path
|
|
143
|
+
else:
|
|
144
|
+
input_for_pandoc = input_path
|
|
145
|
+
|
|
146
|
+
# Determine available PDF engine
|
|
147
|
+
pdf_engine = 'xelatex'
|
|
148
|
+
try:
|
|
149
|
+
subprocess.run(['which', 'xelatex'], check=True, capture_output=True)
|
|
150
|
+
except subprocess.CalledProcessError:
|
|
151
|
+
try:
|
|
152
|
+
subprocess.run(['which', 'pdflatex'], check=True, capture_output=True)
|
|
153
|
+
pdf_engine = 'pdflatex'
|
|
154
|
+
except subprocess.CalledProcessError:
|
|
155
|
+
try:
|
|
156
|
+
subprocess.run(['which', 'weasyprint'], check=True, capture_output=True)
|
|
157
|
+
pdf_engine = 'weasyprint'
|
|
158
|
+
except subprocess.CalledProcessError:
|
|
159
|
+
try:
|
|
160
|
+
subprocess.run(['which', 'wkhtmltopdf'], check=True, capture_output=True)
|
|
161
|
+
pdf_engine = 'wkhtmltopdf'
|
|
162
|
+
except subprocess.CalledProcessError:
|
|
163
|
+
print("No suitable PDF engine found (xelatex, pdflatex, weasyprint, wkhtmltopdf).")
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
print(f"Using PDF engine: {pdf_engine}")
|
|
167
|
+
|
|
168
|
+
# Convert to PDF using pandoc
|
|
169
|
+
try:
|
|
170
|
+
subprocess.run([
|
|
171
|
+
'pandoc',
|
|
172
|
+
input_for_pandoc,
|
|
173
|
+
'-o', output_path,
|
|
174
|
+
f'--pdf-engine={pdf_engine}',
|
|
175
|
+
'-V', 'geometry:margin=1in'
|
|
176
|
+
], check=True, capture_output=True)
|
|
177
|
+
print(f"Successfully converted {input_path} to {output_path}")
|
|
178
|
+
return True
|
|
179
|
+
except subprocess.CalledProcessError as e:
|
|
180
|
+
print(f"Error converting markdown to PDF: {e}")
|
|
181
|
+
print(f"stdout: {e.stdout.decode()}")
|
|
182
|
+
print(f"stderr: {e.stderr.decode()}")
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def process_directory(
|
|
187
|
+
input_dir: str,
|
|
188
|
+
output_dir: Optional[str] = None,
|
|
189
|
+
temp_dir: Optional[str] = None
|
|
190
|
+
) -> None:
|
|
191
|
+
"""Process all markdown files in a directory.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
input_dir: Directory containing markdown files
|
|
195
|
+
output_dir: Directory to save PDF files (optional)
|
|
196
|
+
temp_dir: Directory to store temporary files (optional)
|
|
197
|
+
"""
|
|
198
|
+
input_dir_path = Path(input_dir)
|
|
199
|
+
|
|
200
|
+
# Determine output directory
|
|
201
|
+
if not output_dir:
|
|
202
|
+
output_dir = str(input_dir_path)
|
|
203
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
204
|
+
|
|
205
|
+
# Create temp directory if not provided
|
|
206
|
+
if not temp_dir:
|
|
207
|
+
temp_dir = tempfile.mkdtemp()
|
|
208
|
+
else:
|
|
209
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
210
|
+
|
|
211
|
+
# Process each markdown file
|
|
212
|
+
for md_file in input_dir_path.glob('*.md'):
|
|
213
|
+
output_path = os.path.join(output_dir, f"{md_file.stem}.pdf")
|
|
214
|
+
convert_markdown_to_pdf(str(md_file), output_path, temp_dir)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def check_dependencies() -> bool:
|
|
218
|
+
"""Check if required dependencies are installed.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
True if all dependencies are available, False otherwise
|
|
222
|
+
"""
|
|
223
|
+
dependencies = ['npx', 'pandoc']
|
|
224
|
+
|
|
225
|
+
for dep in dependencies:
|
|
226
|
+
try:
|
|
227
|
+
subprocess.run(['which', dep], check=True, capture_output=True)
|
|
228
|
+
except subprocess.CalledProcessError:
|
|
229
|
+
print(f"Required dependency not found: {dep}")
|
|
230
|
+
if dep == 'npx':
|
|
231
|
+
print("Please install Node.js and npm")
|
|
232
|
+
elif dep == 'pandoc':
|
|
233
|
+
print("Please install pandoc")
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
# Check for mermaid-cli
|
|
237
|
+
try:
|
|
238
|
+
subprocess.run(
|
|
239
|
+
['mmdc', '--version'],
|
|
240
|
+
check=True, capture_output=True
|
|
241
|
+
)
|
|
242
|
+
except subprocess.CalledProcessError:
|
|
243
|
+
print("Mermaid CLI not found, installing...")
|
|
244
|
+
try:
|
|
245
|
+
subprocess.run(['npm', 'install', '-g', '@mermaid-js/mermaid-cli'], check=True)
|
|
246
|
+
except subprocess.CalledProcessError:
|
|
247
|
+
print("Failed to install @mermaid-js/mermaid-cli")
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
return True
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def main() -> None:
|
|
254
|
+
"""Main entry point for the script."""
|
|
255
|
+
parser = argparse.ArgumentParser(
|
|
256
|
+
description='Convert markdown files to PDF with mermaid diagram support'
|
|
257
|
+
)
|
|
258
|
+
parser.add_argument('input', help='Input markdown file or directory')
|
|
259
|
+
parser.add_argument(
|
|
260
|
+
'-o', '--output',
|
|
261
|
+
help='Output PDF file (for single file) or directory (for directory input)'
|
|
262
|
+
)
|
|
263
|
+
parser.add_argument(
|
|
264
|
+
'-t', '--temp-dir',
|
|
265
|
+
help='Directory to store temporary files'
|
|
266
|
+
)
|
|
267
|
+
parser.add_argument(
|
|
268
|
+
'--check-only',
|
|
269
|
+
action='store_true',
|
|
270
|
+
help='Only check dependencies without converting'
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
args = parser.parse_args()
|
|
274
|
+
|
|
275
|
+
# Check dependencies
|
|
276
|
+
if not check_dependencies():
|
|
277
|
+
print("Missing dependencies. Please install the required tools.")
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
if args.check_only:
|
|
281
|
+
print("All dependencies are installed correctly.")
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
# Process input
|
|
285
|
+
input_path = Path(args.input)
|
|
286
|
+
if input_path.is_dir():
|
|
287
|
+
process_directory(args.input, args.output, args.temp_dir)
|
|
288
|
+
elif input_path.is_file():
|
|
289
|
+
convert_markdown_to_pdf(args.input, args.output, args.temp_dir)
|
|
290
|
+
else:
|
|
291
|
+
print(f"Input path does not exist: {args.input}")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
if __name__ == "__main__":
|
|
295
|
+
main()
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@joaodotwork/md-2-pdf",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Convert markdown files to PDF with mermaid diagram support",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"md-2-pdf": "./bin/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
11
|
+
},
|
|
12
|
+
"keywords": [
|
|
13
|
+
"markdown",
|
|
14
|
+
"pdf",
|
|
15
|
+
"mermaid",
|
|
16
|
+
"converter"
|
|
17
|
+
],
|
|
18
|
+
"author": "Joao",
|
|
19
|
+
"license": "ISC",
|
|
20
|
+
"dependencies": {
|
|
21
|
+
"@mermaid-js/mermaid-cli": "^11.4.2"
|
|
22
|
+
},
|
|
23
|
+
"files": [
|
|
24
|
+
"md_to_pdf.py",
|
|
25
|
+
"bin/",
|
|
26
|
+
"requirements.txt"
|
|
27
|
+
]
|
|
28
|
+
}
|
package/requirements.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Python dependencies for md_to_pdf.py
|
|
2
|
+
|
|
3
|
+
# External Python packages
|
|
4
|
+
PyPDF2>=3.0.0
|
|
5
|
+
Pillow>=10.0.0
|
|
6
|
+
markdown>=3.4.0
|
|
7
|
+
weasyprint>=60.0.0
|
|
8
|
+
|
|
9
|
+
# External system dependencies (not installed via pip):
|
|
10
|
+
# - pandoc: sudo apt-get install pandoc
|
|
11
|
+
# - texlive-xetex: sudo apt-get install texlive-xetex
|
|
12
|
+
# - Node.js/npm: Install from https://nodejs.org/ or via package manager
|
|
13
|
+
# - mermaid-cli: npm install -g @mermaid-js/mermaid-cli
|
|
14
|
+
|
|
15
|
+
# Installation Steps:
|
|
16
|
+
# 1. Install Python dependencies: pip install -r requirements.txt
|
|
17
|
+
# 2. Install system dependencies:
|
|
18
|
+
# sudo apt-get update
|
|
19
|
+
# sudo apt-get install -y pandoc texlive-xetex
|
|
20
|
+
# 3. Install Node.js dependencies:
|
|
21
|
+
# npm install -g @mermaid-js/mermaid-cli
|