@joaodotwork/md-2-pdf 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,95 @@
1
+ # Markdown to PDF Converter with Mermaid Support
2
+
3
+ A Python utility to convert Markdown files to PDF while properly rendering Mermaid diagrams.
4
+
5
+ ## Features
6
+
7
+ - Converts Markdown files to PDF
8
+ - Properly renders Mermaid diagrams as images in the PDF
9
+ - Processes individual files or entire directories
10
+ - Customizable output and temporary directories
11
+
12
+ ## Requirements
13
+
14
+ - Python 3.6+
15
+ - Node.js and npm (for Mermaid CLI)
16
+ - Pandoc (for Markdown to PDF conversion)
17
+ - LaTeX (XeLaTeX for PDF generation)
18
+
19
+ ## Installation
20
+
21
+ 1. Ensure you have Python 3.6+ installed
22
+ 2. Install Node.js and npm
23
+ 3. Install Pandoc: https://pandoc.org/installing.html
24
+ 4. Install a LaTeX distribution like TeX Live or MiKTeX
25
+ 5. Clone this repository or download the script
26
+
27
+ The script will automatically install the Mermaid CLI (@mermaid-js/mermaid-cli) if needed.
28
+
29
+ ## Usage
30
+
31
+ ### Basic Usage
32
+
33
+ Convert a single Markdown file to PDF:
34
+
35
+ ```bash
36
+ python md_to_pdf.py path/to/file.md
37
+ ```
38
+
39
+ Convert all Markdown files in a directory:
40
+
41
+ ```bash
42
+ python md_to_pdf.py path/to/directory
43
+ ```
44
+
45
+ ### Advanced Options
46
+
47
+ Specify output file or directory:
48
+
49
+ ```bash
50
+ python md_to_pdf.py path/to/input.md -o path/to/output.pdf
51
+ python md_to_pdf.py path/to/input_dir -o path/to/output_dir
52
+ ```
53
+
54
+ Specify custom temporary directory:
55
+
56
+ ```bash
57
+ python md_to_pdf.py path/to/input.md -t path/to/temp_dir
58
+ ```
59
+
60
+ Check if all dependencies are installed:
61
+
62
+ ```bash
63
+ python md_to_pdf.py --check-only
64
+ ```
65
+
66
+ ## How It Works
67
+
68
+ 1. The script reads the Markdown file and extracts Mermaid diagram code blocks
69
+ 2. For each Mermaid diagram, it:
70
+ - Saves the diagram code to a temporary file
71
+ - Uses Mermaid CLI to render the diagram as an image
72
+ - Replaces the Mermaid code block with an image reference
73
+ 3. The modified Markdown with image references is saved to a temporary file
74
+ 4. Pandoc converts the modified Markdown to PDF using XeLaTeX
75
+
76
+ ## Example
77
+
78
+ Input Markdown:
79
+
80
+ ````markdown
81
+ # Sample Document
82
+
83
+ Here's a diagram:
84
+
85
+ ```mermaid
86
+ flowchart TD
87
+ A[Start] --> B{Decision}
88
+ B -->|Yes| C[Process 1]
89
+ B -->|No| D[Process 2]
90
+ C --> E[End]
91
+ D --> E
92
+ ```
93
+ ````
94
+
95
+ This will be converted to a PDF with the diagram properly rendered.
package/bin/index.js ADDED
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+
3
+ const { spawn } = require('child_process');
4
+ const path = require('path');
5
+
6
+ // Path to the python script
7
+ const scriptPath = path.join(__dirname, '../md_to_pdf.py');
8
+
9
+ // Forward all arguments passed to this script
10
+ const args = [scriptPath, ...process.argv.slice(2)];
11
+
12
+ // Spawn the python process
13
+ // We use 'inherit' to preserve colors and output streaming
14
+ const pythonProcess = spawn('python3', args, { stdio: 'inherit' });
15
+
16
+ pythonProcess.on('error', (err) => {
17
+ console.error('Failed to start python process:', err);
18
+ console.error('Make sure python3 is installed and available in your PATH.');
19
+ process.exit(1);
20
+ });
21
+
22
+ pythonProcess.on('close', (code) => {
23
+ process.exit(code);
24
+ });
package/md_to_pdf.py ADDED
@@ -0,0 +1,295 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Markdown to PDF converter with Mermaid diagram support
4
+
5
+ This script converts markdown files to PDF while properly rendering Mermaid diagrams.
6
+ """
7
+
8
+ import argparse
9
+ import os
10
+ import re
11
+ import subprocess
12
+ import tempfile
13
+ from pathlib import Path
14
+ from typing import List, Optional, Tuple
15
+
16
+
17
+ def find_mermaid_blocks(markdown_content: str) -> List[Tuple[str, str]]:
18
+ """Extract mermaid blocks from markdown content.
19
+
20
+ Returns a list of tuples, each containing:
21
+ - The full match (including ```mermaid delimiters)
22
+ - The mermaid diagram code only
23
+ """
24
+ pattern = r"```mermaid\n(.*?)```"
25
+ matches = re.finditer(pattern, markdown_content, re.DOTALL)
26
+ return [(match.group(0), match.group(1).strip()) for match in matches]
27
+
28
+
29
+ def render_mermaid_diagram(mermaid_code: str, output_path: str) -> bool:
30
+ """Render a mermaid diagram to an image file using mmdc CLI.
31
+
32
+ Args:
33
+ mermaid_code: The mermaid diagram code
34
+ output_path: Path to save the rendered image
35
+
36
+ Returns:
37
+ True if successful, False otherwise
38
+ """
39
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.mmd', delete=False) as temp_file:
40
+ temp_file_path = temp_file.name
41
+ temp_file.write(mermaid_code)
42
+
43
+ # Try local node_modules first, then global mmdc
44
+ mmdc_path = os.path.join(os.getcwd(), 'node_modules', '.bin', 'mmdc')
45
+ if not os.path.exists(mmdc_path):
46
+ mmdc_path = 'mmdc'
47
+
48
+ try:
49
+ subprocess.run([
50
+ mmdc_path,
51
+ '-i', temp_file_path,
52
+ '-o', output_path,
53
+ '-b', 'transparent'
54
+ ], check=True, capture_output=True)
55
+ return True
56
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
57
+ print(f"Error rendering mermaid diagram: {e}")
58
+ if hasattr(e, 'stdout'):
59
+ print(f"stdout: {e.stdout.decode()}")
60
+ if hasattr(e, 'stderr'):
61
+ print(f"stderr: {e.stderr.decode()}")
62
+ return False
63
+ finally:
64
+ os.unlink(temp_file_path)
65
+
66
+
67
+ def replace_mermaid_with_images(
68
+ markdown_content: str,
69
+ mermaid_blocks: List[Tuple[str, str]],
70
+ output_dir: str,
71
+ base_filename: str
72
+ ) -> str:
73
+ """Replace mermaid code blocks with image references in the markdown.
74
+
75
+ Args:
76
+ markdown_content: The original markdown content
77
+ mermaid_blocks: List of mermaid blocks extracted from the content
78
+ output_dir: Directory to save rendered images
79
+ base_filename: Base name for generated image files
80
+
81
+ Returns:
82
+ Updated markdown content with image references
83
+ """
84
+ updated_content = markdown_content
85
+
86
+ for i, (full_match, mermaid_code) in enumerate(mermaid_blocks):
87
+ image_filename = f"{base_filename}_diagram_{i}.png"
88
+ image_path = os.path.join(output_dir, image_filename)
89
+
90
+ if render_mermaid_diagram(mermaid_code, image_path):
91
+ # Replace the mermaid block with an image reference
92
+ image_ref = f"\n\n![Diagram {i+1}]({image_path})\n\n"
93
+ updated_content = updated_content.replace(full_match, image_ref)
94
+
95
+ return updated_content
96
+
97
+
98
+ def convert_markdown_to_pdf(
99
+ input_path: str,
100
+ output_path: Optional[str] = None,
101
+ temp_dir: Optional[str] = None
102
+ ) -> bool:
103
+ """Convert a markdown file to PDF, rendering any mermaid diagrams.
104
+
105
+ Args:
106
+ input_path: Path to the input markdown file
107
+ output_path: Path for the output PDF file (optional)
108
+ temp_dir: Directory to store temporary files (optional)
109
+
110
+ Returns:
111
+ True if conversion was successful, False otherwise
112
+ """
113
+ input_file = Path(input_path)
114
+
115
+ # Determine output path if not provided
116
+ if not output_path:
117
+ output_path = str(input_file.with_suffix('.pdf'))
118
+
119
+ # Create temp directory if not provided
120
+ if not temp_dir:
121
+ temp_dir = tempfile.mkdtemp()
122
+ else:
123
+ os.makedirs(temp_dir, exist_ok=True)
124
+
125
+ # Read the markdown content
126
+ with open(input_path, 'r') as f:
127
+ content = f.read()
128
+
129
+ # Find and extract mermaid blocks
130
+ mermaid_blocks = find_mermaid_blocks(content)
131
+
132
+ if mermaid_blocks:
133
+ # Replace mermaid blocks with image references
134
+ base_filename = input_file.stem
135
+ updated_content = replace_mermaid_with_images(content, mermaid_blocks, temp_dir, base_filename)
136
+
137
+ # Write updated markdown to temporary file
138
+ temp_md_path = os.path.join(temp_dir, f"{base_filename}_processed.md")
139
+ with open(temp_md_path, 'w') as f:
140
+ f.write(updated_content)
141
+
142
+ input_for_pandoc = temp_md_path
143
+ else:
144
+ input_for_pandoc = input_path
145
+
146
+ # Determine available PDF engine
147
+ pdf_engine = 'xelatex'
148
+ try:
149
+ subprocess.run(['which', 'xelatex'], check=True, capture_output=True)
150
+ except subprocess.CalledProcessError:
151
+ try:
152
+ subprocess.run(['which', 'pdflatex'], check=True, capture_output=True)
153
+ pdf_engine = 'pdflatex'
154
+ except subprocess.CalledProcessError:
155
+ try:
156
+ subprocess.run(['which', 'weasyprint'], check=True, capture_output=True)
157
+ pdf_engine = 'weasyprint'
158
+ except subprocess.CalledProcessError:
159
+ try:
160
+ subprocess.run(['which', 'wkhtmltopdf'], check=True, capture_output=True)
161
+ pdf_engine = 'wkhtmltopdf'
162
+ except subprocess.CalledProcessError:
163
+ print("No suitable PDF engine found (xelatex, pdflatex, weasyprint, wkhtmltopdf).")
164
+ return False
165
+
166
+ print(f"Using PDF engine: {pdf_engine}")
167
+
168
+ # Convert to PDF using pandoc
169
+ try:
170
+ subprocess.run([
171
+ 'pandoc',
172
+ input_for_pandoc,
173
+ '-o', output_path,
174
+ f'--pdf-engine={pdf_engine}',
175
+ '-V', 'geometry:margin=1in'
176
+ ], check=True, capture_output=True)
177
+ print(f"Successfully converted {input_path} to {output_path}")
178
+ return True
179
+ except subprocess.CalledProcessError as e:
180
+ print(f"Error converting markdown to PDF: {e}")
181
+ print(f"stdout: {e.stdout.decode()}")
182
+ print(f"stderr: {e.stderr.decode()}")
183
+ return False
184
+
185
+
186
+ def process_directory(
187
+ input_dir: str,
188
+ output_dir: Optional[str] = None,
189
+ temp_dir: Optional[str] = None
190
+ ) -> None:
191
+ """Process all markdown files in a directory.
192
+
193
+ Args:
194
+ input_dir: Directory containing markdown files
195
+ output_dir: Directory to save PDF files (optional)
196
+ temp_dir: Directory to store temporary files (optional)
197
+ """
198
+ input_dir_path = Path(input_dir)
199
+
200
+ # Determine output directory
201
+ if not output_dir:
202
+ output_dir = str(input_dir_path)
203
+ os.makedirs(output_dir, exist_ok=True)
204
+
205
+ # Create temp directory if not provided
206
+ if not temp_dir:
207
+ temp_dir = tempfile.mkdtemp()
208
+ else:
209
+ os.makedirs(temp_dir, exist_ok=True)
210
+
211
+ # Process each markdown file
212
+ for md_file in input_dir_path.glob('*.md'):
213
+ output_path = os.path.join(output_dir, f"{md_file.stem}.pdf")
214
+ convert_markdown_to_pdf(str(md_file), output_path, temp_dir)
215
+
216
+
217
+ def check_dependencies() -> bool:
218
+ """Check if required dependencies are installed.
219
+
220
+ Returns:
221
+ True if all dependencies are available, False otherwise
222
+ """
223
+ dependencies = ['npx', 'pandoc']
224
+
225
+ for dep in dependencies:
226
+ try:
227
+ subprocess.run(['which', dep], check=True, capture_output=True)
228
+ except subprocess.CalledProcessError:
229
+ print(f"Required dependency not found: {dep}")
230
+ if dep == 'npx':
231
+ print("Please install Node.js and npm")
232
+ elif dep == 'pandoc':
233
+ print("Please install pandoc")
234
+ return False
235
+
236
+ # Check for mermaid-cli
237
+ try:
238
+ subprocess.run(
239
+ ['mmdc', '--version'],
240
+ check=True, capture_output=True
241
+ )
242
+ except subprocess.CalledProcessError:
243
+ print("Mermaid CLI not found, installing...")
244
+ try:
245
+ subprocess.run(['npm', 'install', '-g', '@mermaid-js/mermaid-cli'], check=True)
246
+ except subprocess.CalledProcessError:
247
+ print("Failed to install @mermaid-js/mermaid-cli")
248
+ return False
249
+
250
+ return True
251
+
252
+
253
+ def main() -> None:
254
+ """Main entry point for the script."""
255
+ parser = argparse.ArgumentParser(
256
+ description='Convert markdown files to PDF with mermaid diagram support'
257
+ )
258
+ parser.add_argument('input', help='Input markdown file or directory')
259
+ parser.add_argument(
260
+ '-o', '--output',
261
+ help='Output PDF file (for single file) or directory (for directory input)'
262
+ )
263
+ parser.add_argument(
264
+ '-t', '--temp-dir',
265
+ help='Directory to store temporary files'
266
+ )
267
+ parser.add_argument(
268
+ '--check-only',
269
+ action='store_true',
270
+ help='Only check dependencies without converting'
271
+ )
272
+
273
+ args = parser.parse_args()
274
+
275
+ # Check dependencies
276
+ if not check_dependencies():
277
+ print("Missing dependencies. Please install the required tools.")
278
+ return
279
+
280
+ if args.check_only:
281
+ print("All dependencies are installed correctly.")
282
+ return
283
+
284
+ # Process input
285
+ input_path = Path(args.input)
286
+ if input_path.is_dir():
287
+ process_directory(args.input, args.output, args.temp_dir)
288
+ elif input_path.is_file():
289
+ convert_markdown_to_pdf(args.input, args.output, args.temp_dir)
290
+ else:
291
+ print(f"Input path does not exist: {args.input}")
292
+
293
+
294
+ if __name__ == "__main__":
295
+ main()
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@joaodotwork/md-2-pdf",
3
+ "version": "1.0.0",
4
+ "description": "Convert markdown files to PDF with mermaid diagram support",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "md-2-pdf": "./bin/index.js"
8
+ },
9
+ "scripts": {
10
+ "test": "echo \"Error: no test specified\" && exit 1"
11
+ },
12
+ "keywords": [
13
+ "markdown",
14
+ "pdf",
15
+ "mermaid",
16
+ "converter"
17
+ ],
18
+ "author": "Joao",
19
+ "license": "ISC",
20
+ "dependencies": {
21
+ "@mermaid-js/mermaid-cli": "^11.4.2"
22
+ },
23
+ "files": [
24
+ "md_to_pdf.py",
25
+ "bin/",
26
+ "requirements.txt"
27
+ ]
28
+ }
@@ -0,0 +1,21 @@
1
+ # Python dependencies for md_to_pdf.py
2
+
3
+ # External Python packages
4
+ PyPDF2>=3.0.0
5
+ Pillow>=10.0.0
6
+ markdown>=3.4.0
7
+ weasyprint>=60.0.0
8
+
9
+ # External system dependencies (not installed via pip):
10
+ # - pandoc: sudo apt-get install pandoc
11
+ # - texlive-xetex: sudo apt-get install texlive-xetex
12
+ # - Node.js/npm: Install from https://nodejs.org/ or via package manager
13
+ # - mermaid-cli: npm install -g @mermaid-js/mermaid-cli
14
+
15
+ # Installation Steps:
16
+ # 1. Install Python dependencies: pip install -r requirements.txt
17
+ # 2. Install system dependencies:
18
+ # sudo apt-get update
19
+ # sudo apt-get install -y pandoc texlive-xetex
20
+ # 3. Install Node.js dependencies:
21
+ # npm install -g @mermaid-js/mermaid-cli