pymd2pdf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymd2pdf-0.1.0/.gitignore +21 -0
- pymd2pdf-0.1.0/CHANGELOG.md +36 -0
- pymd2pdf-0.1.0/Makefile +49 -0
- pymd2pdf-0.1.0/PKG-INFO +203 -0
- pymd2pdf-0.1.0/README.md +192 -0
- pymd2pdf-0.1.0/docs/plugin-authoring.md +235 -0
- pymd2pdf-0.1.0/docs/themes.md +92 -0
- pymd2pdf-0.1.0/md2pdf/__init__.py +52 -0
- pymd2pdf-0.1.0/md2pdf/assets/__init__.py +1 -0
- pymd2pdf-0.1.0/md2pdf/assets/cache.py +72 -0
- pymd2pdf-0.1.0/md2pdf/assets/fallback.py +79 -0
- pymd2pdf-0.1.0/md2pdf/assets/kroki.py +70 -0
- pymd2pdf-0.1.0/md2pdf/cli.py +114 -0
- pymd2pdf-0.1.0/md2pdf/core/__init__.py +1 -0
- pymd2pdf-0.1.0/md2pdf/core/config.py +86 -0
- pymd2pdf-0.1.0/md2pdf/core/errors.py +33 -0
- pymd2pdf-0.1.0/md2pdf/core/flowables.py +156 -0
- pymd2pdf-0.1.0/md2pdf/core/layout.py +129 -0
- pymd2pdf-0.1.0/md2pdf/core/parser.py +126 -0
- pymd2pdf-0.1.0/md2pdf/core/pipeline.py +241 -0
- pymd2pdf-0.1.0/md2pdf/core/plugin_loader.py +153 -0
- pymd2pdf-0.1.0/md2pdf/core/postprocessors.py +111 -0
- pymd2pdf-0.1.0/md2pdf/core/preprocessors.py +127 -0
- pymd2pdf-0.1.0/md2pdf/core/registry.py +113 -0
- pymd2pdf-0.1.0/md2pdf/core/styles.py +54 -0
- pymd2pdf-0.1.0/md2pdf/core/tokens.py +45 -0
- pymd2pdf-0.1.0/md2pdf/core/validator.py +118 -0
- pymd2pdf-0.1.0/md2pdf/handlers/__init__.py +25 -0
- pymd2pdf-0.1.0/md2pdf/handlers/blockquote.py +45 -0
- pymd2pdf-0.1.0/md2pdf/handlers/code.py +135 -0
- pymd2pdf-0.1.0/md2pdf/handlers/heading.py +44 -0
- pymd2pdf-0.1.0/md2pdf/handlers/inline.py +94 -0
- pymd2pdf-0.1.0/md2pdf/handlers/latex.py +142 -0
- pymd2pdf-0.1.0/md2pdf/handlers/list_.py +107 -0
- pymd2pdf-0.1.0/md2pdf/handlers/mermaid.py +117 -0
- pymd2pdf-0.1.0/md2pdf/handlers/paragraph.py +18 -0
- pymd2pdf-0.1.0/md2pdf/handlers/table.py +106 -0
- pymd2pdf-0.1.0/md2pdf/handlers/thematic_break.py +26 -0
- pymd2pdf-0.1.0/md2pdf/styles/__init__.py +8 -0
- pymd2pdf-0.1.0/md2pdf/styles/default.py +176 -0
- pymd2pdf-0.1.0/md2pdf/styles/theme.py +92 -0
- pymd2pdf-0.1.0/md2pdf.toml.example +46 -0
- pymd2pdf-0.1.0/overview.md +79 -0
- pymd2pdf-0.1.0/plans/README.md +68 -0
- pymd2pdf-0.1.0/plans/phase-1-foundation.md +200 -0
- pymd2pdf-0.1.0/plans/phase-2-parser.md +175 -0
- pymd2pdf-0.1.0/plans/phase-3-handlers.md +350 -0
- pymd2pdf-0.1.0/plans/phase-4-assets.md +222 -0
- pymd2pdf-0.1.0/plans/phase-5-plugins.md +269 -0
- pymd2pdf-0.1.0/plans/phase-6-layout.md +207 -0
- pymd2pdf-0.1.0/plans/phase-7-cli.md +272 -0
- pymd2pdf-0.1.0/plans/phase-8-testing.md +238 -0
- pymd2pdf-0.1.0/pyproject.toml +75 -0
- pymd2pdf-0.1.0/tests/conftest.py +48 -0
- pymd2pdf-0.1.0/tests/fixtures/diagrams.md +18 -0
- pymd2pdf-0.1.0/tests/fixtures/front_matter.md +9 -0
- pymd2pdf-0.1.0/tests/fixtures/mixed.md +19 -0
- pymd2pdf-0.1.0/tests/fixtures/simple.md +13 -0
- pymd2pdf-0.1.0/tests/fixtures/tables.md +54 -0
- pymd2pdf-0.1.0/tests/test_assets.py +244 -0
- pymd2pdf-0.1.0/tests/test_cli.py +80 -0
- pymd2pdf-0.1.0/tests/test_config.py +81 -0
- pymd2pdf-0.1.0/tests/test_handlers.py +378 -0
- pymd2pdf-0.1.0/tests/test_layout.py +201 -0
- pymd2pdf-0.1.0/tests/test_parser.py +185 -0
- pymd2pdf-0.1.0/tests/test_pipeline.py +160 -0
- pymd2pdf-0.1.0/tests/test_plugin_loader.py +271 -0
- pymd2pdf-0.1.0/tests/test_registry.py +112 -0
- pymd2pdf-0.1.0/tests/test_theme.py +141 -0
- pymd2pdf-0.1.0/tests/test_validator.py +76 -0
- pymd2pdf-0.1.0/uv.lock +713 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.venv/
|
|
8
|
+
|
|
9
|
+
# md2pdf runtime artifacts
|
|
10
|
+
.md2pdf_cache/
|
|
11
|
+
*.pdf
|
|
12
|
+
|
|
13
|
+
# Testing / coverage
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
htmlcov/
|
|
17
|
+
|
|
18
|
+
# Editors
|
|
19
|
+
.DS_Store
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the `md2pdf` project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-06-12
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Linear PDF rendering pipeline using ReportLab and mistletoe.
|
|
12
|
+
- Pre-render document validation (`DocumentValidator`) with structured error/warning types:
|
|
13
|
+
- `UNSUPPORTED_ELEMENT` for unhandled elements.
|
|
14
|
+
- `EMPTY_TABLE` and `NESTED_TABLE` for table checks.
|
|
15
|
+
- `EMPTY_DIAGRAM` for diagram blocks.
|
|
16
|
+
- Typesetting safeguards and anti-fail composition logic (`LayoutComposer`):
|
|
17
|
+
- Orphaned heading prevention via `KeepTogether`.
|
|
18
|
+
- Heading-diagram association to eliminate blank ghost pages.
|
|
19
|
+
- Page number callbacks rendering "Page X" on footers.
|
|
20
|
+
- Left vertical accent bar rendering for blockquotes using custom `BlockQuoteBar` flowable (supporting multi-page page-split).
|
|
21
|
+
- Bookmark metadata generation for Table of Contents targets via `BookmarkFlowable`.
|
|
22
|
+
- Extensible 4-stage plugin system:
|
|
23
|
+
- Supports Entry-points (`pyproject.toml`) and config-file (`md2pdf.toml`) loading strategies.
|
|
24
|
+
- Dynamic stylesheet registry and theme override configurations.
|
|
25
|
+
- Kroki.io client integration for Mermaid diagrams and LaTeX tikz blocks.
|
|
26
|
+
- Includes SHA-256 disk caching (`.md2pdf_cache/`) and offline rendering placeholder fallback modes.
|
|
27
|
+
- Complete test suite covering registry, handlers, parser, theme, layout, validator, and CLI integration.
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- Fixed LaTeX compilation failure for block math environments (like `align*`) on the Kroki `tikz` endpoint by adding newlines to the document wrapper.
|
|
31
|
+
- Fixed diagram sizing issues and whitespace margins by implementing PIL-based auto-cropping for both LaTeX (Tikz) and Mermaid diagrams, removing empty borders and centering diagrams correctly in the PDF layout.
|
|
32
|
+
- Added a height-capping layout safeguard (max 600.0 points) to dynamically scale down tall/large diagrams and prevent ReportLab `LayoutError` crashes.
|
|
33
|
+
- Fixed inline code rendering to correctly resolve text nested within the mistletoe AST's children array.
|
|
34
|
+
- Implemented `CodeFenceHandler` using a monospaced font, thin border, and light gray background to render generic fenced code blocks.
|
|
35
|
+
- Implemented a pipeline-level fallback formatting rule that converts any unimplemented/unsupported markdown elements to monospaced debug blocks showing their token type and content.
|
|
36
|
+
|
pymd2pdf-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
.DEFAULT_GOAL := help
|
|
2
|
+
|
|
3
|
+
.PHONY: fmt fmt-check lint lint-fix check fix test test-cov help
|
|
4
|
+
|
|
5
|
+
# ---------------------------------------------------------------------------
|
|
6
|
+
# Formatting
|
|
7
|
+
# ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
fmt: ## Auto-format source code with black
|
|
10
|
+
uv run black md2pdf/ tests/
|
|
11
|
+
|
|
12
|
+
fmt-check: ## Check formatting without modifying files
|
|
13
|
+
uv run black --check md2pdf/ tests/
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Linting
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
lint: ## Run ruff linter
|
|
20
|
+
uv run ruff check md2pdf/ tests/
|
|
21
|
+
|
|
22
|
+
lint-fix: ## Run ruff and auto-fix safe issues
|
|
23
|
+
uv run ruff check --fix md2pdf/ tests/
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Combined
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
check: fmt-check lint ## Run all checks (no modifications)
|
|
30
|
+
|
|
31
|
+
fix: fmt lint-fix ## Format and auto-fix everything
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Testing
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
test: ## Run the test suite
|
|
38
|
+
uv run pytest
|
|
39
|
+
|
|
40
|
+
test-cov: ## Run tests with coverage report
|
|
41
|
+
uv run pytest --cov=md2pdf --cov-report=term-missing
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Help
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
help: ## Show this help message
|
|
48
|
+
@grep -E '^[a-zA-Z_-]+:.*##' $(MAKEFILE_LIST) \
|
|
49
|
+
| awk 'BEGIN {FS = ":.*##"}; {printf " \033[36m%-14s\033[0m %s\n", $$1, $$2}'
|
pymd2pdf-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pymd2pdf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automated programmatic Markdown-to-PDF typesetting engine
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: mistletoe>=1.3
|
|
7
|
+
Requires-Dist: reportlab>=4.0
|
|
8
|
+
Requires-Dist: requests>=2.31
|
|
9
|
+
Requires-Dist: typer>=0.12
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# Automated Programmatic Markdown-to-PDF Typesetting Engine
|
|
13
|
+
|
|
14
|
+
`md2pdf` converts structured Markdown documents into beautiful, print-ready PDFs. Unlike other conversion tools, it does not rely on heavy system dependencies like Pandoc, Node.js, or headless Chrome/Chromium browsers. It is written in pure Python and powered by ReportLab and mistletoe.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Architecture Overview
|
|
19
|
+
|
|
20
|
+
`md2pdf` is designed as a pipeline that runs in four distinct stages: preprocessing, parsing/validation, element rendering, and layout composition.
|
|
21
|
+
|
|
22
|
+
```mermaid
|
|
23
|
+
graph TD
|
|
24
|
+
Input[Markdown File] --> Pre[Preprocessors]
|
|
25
|
+
Pre --> Parser[MarkdownParser]
|
|
26
|
+
Parser --> Tokens[Token Stream]
|
|
27
|
+
Tokens --> Val[DocumentValidator]
|
|
28
|
+
Tokens --> Reg[Plugin/Handler Registry]
|
|
29
|
+
Reg --> Handlers[Element Handlers]
|
|
30
|
+
Handlers --> Kroki[Kroki API / Cache]
|
|
31
|
+
Handlers --> Flowables[ReportLab Flowables]
|
|
32
|
+
Flowables --> Layout[LayoutComposer]
|
|
33
|
+
Layout --> Post[Postprocessors]
|
|
34
|
+
Post --> Renderer[ReportLab PDF Engine]
|
|
35
|
+
Renderer --> Output[Output PDF]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Key Features
|
|
41
|
+
|
|
42
|
+
- **Standard Elements**: Headings (H1–H6), paragraphs, lists, blockquotes, horizontal rules, and hyperlinks.
|
|
43
|
+
- **Multi-page Tables**: Tables split cleanly across page boundaries. Headers repeat at the top of every page.
|
|
44
|
+
- **Diagrams & Math Blocks**: Renders Mermaid diagrams and LaTeX math blocks via the Kroki API, with automatic transparent/white margin cropping, offline fallbacks, and SHA-256 disk caching.
|
|
45
|
+
- **Extensible Plugin System**: Load custom element handlers, text-level preprocessors, post-processors, and stylesheet/theme layers.
|
|
46
|
+
- **Typesetting Safeguards**: Implements strict "anti-fail" layout rules including orphaned heading protection, ghost page elimination, and widow/orphan line settings.
|
|
47
|
+
- **DX-First Validation**: Pre-render validation runs to identify nested tables, empty diagrams, or unsupported elements before rendering.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Tech Stack
|
|
52
|
+
|
|
53
|
+
| Component | Library/Tool | Description |
|
|
54
|
+
| :------------------- | :--------------- | :------------------------------------------------- |
|
|
55
|
+
| **Core Language** | Python >= 3.11 | Modern Python with strict type-hinting |
|
|
56
|
+
| **PDF Generation** | ReportLab >= 4.0 | Low-level document layout and flowable engine |
|
|
57
|
+
| **Markdown Parsing** | mistletoe >= 1.3 | Fast and extensible Markdown AST parser |
|
|
58
|
+
| **HTTP Requests** | requests >= 2.31 | Handles communication with Kroki API |
|
|
59
|
+
| **CLI Framework** | typer >= 0.12 | CLI builder for options and validation errors |
|
|
60
|
+
| **Image Processing** | pillow >= 10.0 | Auto-cropping and dimension detection for diagrams |
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Project Structure
|
|
65
|
+
|
|
66
|
+
```txt
|
|
67
|
+
md2pdf/
|
|
68
|
+
├── docs/ # Developer documentation
|
|
69
|
+
│ ├── plugin-authoring.md # Instructions for writing plugins
|
|
70
|
+
│ └── themes.md # Themes and stylesheet reference
|
|
71
|
+
├── md2pdf/ # Core source directory
|
|
72
|
+
│ ├── assets/ # Kroki client, caching, and fallback elements
|
|
73
|
+
│ ├── core/ # Engine pipeline, parser, validator, layout, registry
|
|
74
|
+
│ ├── handlers/ # Element-specific flowable generators (headings, tables, etc.)
|
|
75
|
+
│ ├── styles/ # Default stylesheet and theme configs
|
|
76
|
+
│ ├── cli.py # CLI entry point
|
|
77
|
+
│ └── pipeline.py # Main execution coordinator
|
|
78
|
+
├── tests/ # Automated test suite
|
|
79
|
+
│ ├── fixtures/ # Markdown and configuration test files
|
|
80
|
+
│ ├── test_cli.py # CLI integration tests
|
|
81
|
+
│ └── test_pipeline.py # Pipeline validation tests
|
|
82
|
+
├── pyproject.toml # Build system and dependency declaration
|
|
83
|
+
└── README.md # Project overview
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Logic Flows
|
|
89
|
+
|
|
90
|
+
The diagram below details the sequence of execution inside the `Pipeline` class:
|
|
91
|
+
|
|
92
|
+
```mermaid
|
|
93
|
+
sequenceDiagram
|
|
94
|
+
autonumber
|
|
95
|
+
actor CLI as CLI / User
|
|
96
|
+
participant PL as Pipeline
|
|
97
|
+
participant PR as Preprocessors
|
|
98
|
+
participant MP as MarkdownParser
|
|
99
|
+
participant DV as DocumentValidator
|
|
100
|
+
participant REG as PluginRegistry
|
|
101
|
+
participant LC as LayoutComposer
|
|
102
|
+
participant PO as Postprocessors
|
|
103
|
+
participant RL as ReportLab Engine
|
|
104
|
+
|
|
105
|
+
CLI->>PL: run(raw_md)
|
|
106
|
+
PL->>PR: run_all(raw_md)
|
|
107
|
+
PR-->>PL: preprocessed_md
|
|
108
|
+
PL->>MP: parse(preprocessed_md)
|
|
109
|
+
MP-->>PL: token_stream
|
|
110
|
+
PL->>DV: validate(token_stream)
|
|
111
|
+
DV-->>PL: validation_issues (warnings/errors)
|
|
112
|
+
PL->>REG: render(token_stream)
|
|
113
|
+
REG-->>PL: raw_flowables
|
|
114
|
+
PL->>LC: compose(raw_flowables)
|
|
115
|
+
LC-->>PL: layout_guarded_flowables
|
|
116
|
+
PL->>PO: run_all(layout_guarded_flowables)
|
|
117
|
+
PO-->>PL: finalized_flowables
|
|
118
|
+
PL->>RL: build(finalized_flowables)
|
|
119
|
+
RL-->>CLI: output.pdf
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Installation & Setup
|
|
125
|
+
|
|
126
|
+
Using `uv` (recommended):
|
|
127
|
+
```bash
|
|
128
|
+
uv tool install pymd2pdf
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Or via standard `pip`:
|
|
132
|
+
```bash
|
|
133
|
+
pip install pymd2pdf
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
To initialize the project for local development:
|
|
137
|
+
```bash
|
|
138
|
+
# Clone the repository
|
|
139
|
+
git clone https://github.com/user/md2pdf.git
|
|
140
|
+
cd md2pdf
|
|
141
|
+
|
|
142
|
+
# Create virtual environment and install dependencies
|
|
143
|
+
uv venv
|
|
144
|
+
source .venv/bin/activate
|
|
145
|
+
uv pip install -e ".[dev]"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Usage Examples
|
|
151
|
+
|
|
152
|
+
### Command Line Interface
|
|
153
|
+
|
|
154
|
+
Convert a Markdown file:
|
|
155
|
+
```bash
|
|
156
|
+
md2pdf input.md -o output.pdf
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Execute pre-render validation checks without producing a PDF:
|
|
160
|
+
```bash
|
|
161
|
+
md2pdf input.md --validate-only
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Run in offline mode to avoid calling the Kroki API (places image boxes with source code in the PDF instead):
|
|
165
|
+
```bash
|
|
166
|
+
md2pdf input.md -o output.pdf --offline
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### CLI Options
|
|
170
|
+
|
|
171
|
+
| Flag | Shortcut | Description |
|
|
172
|
+
| :--- | :--- | :--- |
|
|
173
|
+
| `--output` | `-o` | Path to save the output PDF file (default: `output.pdf`). |
|
|
174
|
+
| `--config` | `-c` | Path to a custom `md2pdf.toml` config file. |
|
|
175
|
+
| `--theme` | `-t` | Name of the theme to apply (default: `default`). |
|
|
176
|
+
| `--offline` | | Skip external API requests (e.g. Kroki diagram rendering) and use local placeholders. |
|
|
177
|
+
| `--validate-only`| | Execute pre-render validation checks and exit without building a PDF. |
|
|
178
|
+
| `--verbose` | `-v` | Output debug-level logging to `stderr`. |
|
|
179
|
+
|
|
180
|
+
### Programmatic Python Usage
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from md2pdf import convert, Config, Pipeline
|
|
184
|
+
|
|
185
|
+
# Option 1: Simple conversion
|
|
186
|
+
convert("input.md", "output.pdf")
|
|
187
|
+
|
|
188
|
+
# Option 2: Advanced programmatic pipeline usage
|
|
189
|
+
config = Config(
|
|
190
|
+
offline=False,
|
|
191
|
+
cache_dir=".md2pdf_cache",
|
|
192
|
+
output_file="my_document.pdf"
|
|
193
|
+
)
|
|
194
|
+
pipeline = Pipeline(config)
|
|
195
|
+
|
|
196
|
+
# Validate markdown document
|
|
197
|
+
issues = pipeline.validate("# Hello World")
|
|
198
|
+
for issue in issues:
|
|
199
|
+
print(f"[{issue.severity}] {issue.code}: {issue.message}")
|
|
200
|
+
|
|
201
|
+
# Render markdown
|
|
202
|
+
pipeline.run(raw_md="# Document Title\n\nSome body text.")
|
|
203
|
+
```
|
pymd2pdf-0.1.0/README.md
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# Automated Programmatic Markdown-to-PDF Typesetting Engine
|
|
2
|
+
|
|
3
|
+
`md2pdf` converts structured Markdown documents into beautiful, print-ready PDFs. Unlike other conversion tools, it does not rely on heavy system dependencies like Pandoc, Node.js, or headless Chrome/Chromium browsers. It is written in pure Python and powered by ReportLab and mistletoe.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Architecture Overview
|
|
8
|
+
|
|
9
|
+
`md2pdf` is designed as a pipeline that runs in four distinct stages: preprocessing, parsing/validation, element rendering, and layout composition.
|
|
10
|
+
|
|
11
|
+
```mermaid
|
|
12
|
+
graph TD
|
|
13
|
+
Input[Markdown File] --> Pre[Preprocessors]
|
|
14
|
+
Pre --> Parser[MarkdownParser]
|
|
15
|
+
Parser --> Tokens[Token Stream]
|
|
16
|
+
Tokens --> Val[DocumentValidator]
|
|
17
|
+
Tokens --> Reg[Plugin/Handler Registry]
|
|
18
|
+
Reg --> Handlers[Element Handlers]
|
|
19
|
+
Handlers --> Kroki[Kroki API / Cache]
|
|
20
|
+
Handlers --> Flowables[ReportLab Flowables]
|
|
21
|
+
Flowables --> Layout[LayoutComposer]
|
|
22
|
+
Layout --> Post[Postprocessors]
|
|
23
|
+
Post --> Renderer[ReportLab PDF Engine]
|
|
24
|
+
Renderer --> Output[Output PDF]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Key Features
|
|
30
|
+
|
|
31
|
+
- **Standard Elements**: Headings (H1–H6), paragraphs, lists, blockquotes, horizontal rules, and hyperlinks.
|
|
32
|
+
- **Multi-page Tables**: Tables split cleanly across page boundaries. Headers repeat at the top of every page.
|
|
33
|
+
- **Diagrams & Math Blocks**: Renders Mermaid diagrams and LaTeX math blocks via the Kroki API, with automatic transparent/white margin cropping, offline fallbacks, and SHA-256 disk caching.
|
|
34
|
+
- **Extensible Plugin System**: Load custom element handlers, text-level preprocessors, post-processors, and stylesheet/theme layers.
|
|
35
|
+
- **Typesetting Safeguards**: Implements strict "anti-fail" layout rules including orphaned heading protection, ghost page elimination, and widow/orphan line settings.
|
|
36
|
+
- **DX-First Validation**: Pre-render validation runs to identify nested tables, empty diagrams, or unsupported elements before rendering.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Tech Stack
|
|
41
|
+
|
|
42
|
+
| Component | Library/Tool | Description |
|
|
43
|
+
| :------------------- | :--------------- | :------------------------------------------------- |
|
|
44
|
+
| **Core Language** | Python >= 3.11 | Modern Python with strict type-hinting |
|
|
45
|
+
| **PDF Generation** | ReportLab >= 4.0 | Low-level document layout and flowable engine |
|
|
46
|
+
| **Markdown Parsing** | mistletoe >= 1.3 | Fast and extensible Markdown AST parser |
|
|
47
|
+
| **HTTP Requests** | requests >= 2.31 | Handles communication with Kroki API |
|
|
48
|
+
| **CLI Framework** | typer >= 0.12 | CLI builder for options and validation errors |
|
|
49
|
+
| **Image Processing** | pillow >= 10.0 | Auto-cropping and dimension detection for diagrams |
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Project Structure
|
|
54
|
+
|
|
55
|
+
```txt
|
|
56
|
+
md2pdf/
|
|
57
|
+
├── docs/ # Developer documentation
|
|
58
|
+
│ ├── plugin-authoring.md # Instructions for writing plugins
|
|
59
|
+
│ └── themes.md # Themes and stylesheet reference
|
|
60
|
+
├── md2pdf/ # Core source directory
|
|
61
|
+
│ ├── assets/ # Kroki client, caching, and fallback elements
|
|
62
|
+
│ ├── core/ # Engine pipeline, parser, validator, layout, registry
|
|
63
|
+
│ ├── handlers/ # Element-specific flowable generators (headings, tables, etc.)
|
|
64
|
+
│ ├── styles/ # Default stylesheet and theme configs
|
|
65
|
+
│ ├── cli.py # CLI entry point
|
|
66
|
+
│ └── pipeline.py # Main execution coordinator
|
|
67
|
+
├── tests/ # Automated test suite
|
|
68
|
+
│ ├── fixtures/ # Markdown and configuration test files
|
|
69
|
+
│ ├── test_cli.py # CLI integration tests
|
|
70
|
+
│ └── test_pipeline.py # Pipeline validation tests
|
|
71
|
+
├── pyproject.toml # Build system and dependency declaration
|
|
72
|
+
└── README.md # Project overview
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Logic Flows
|
|
78
|
+
|
|
79
|
+
The diagram below details the sequence of execution inside the `Pipeline` class:
|
|
80
|
+
|
|
81
|
+
```mermaid
|
|
82
|
+
sequenceDiagram
|
|
83
|
+
autonumber
|
|
84
|
+
actor CLI as CLI / User
|
|
85
|
+
participant PL as Pipeline
|
|
86
|
+
participant PR as Preprocessors
|
|
87
|
+
participant MP as MarkdownParser
|
|
88
|
+
participant DV as DocumentValidator
|
|
89
|
+
participant REG as PluginRegistry
|
|
90
|
+
participant LC as LayoutComposer
|
|
91
|
+
participant PO as Postprocessors
|
|
92
|
+
participant RL as ReportLab Engine
|
|
93
|
+
|
|
94
|
+
CLI->>PL: run(raw_md)
|
|
95
|
+
PL->>PR: run_all(raw_md)
|
|
96
|
+
PR-->>PL: preprocessed_md
|
|
97
|
+
PL->>MP: parse(preprocessed_md)
|
|
98
|
+
MP-->>PL: token_stream
|
|
99
|
+
PL->>DV: validate(token_stream)
|
|
100
|
+
DV-->>PL: validation_issues (warnings/errors)
|
|
101
|
+
PL->>REG: render(token_stream)
|
|
102
|
+
REG-->>PL: raw_flowables
|
|
103
|
+
PL->>LC: compose(raw_flowables)
|
|
104
|
+
LC-->>PL: layout_guarded_flowables
|
|
105
|
+
PL->>PO: run_all(layout_guarded_flowables)
|
|
106
|
+
PO-->>PL: finalized_flowables
|
|
107
|
+
PL->>RL: build(finalized_flowables)
|
|
108
|
+
RL-->>CLI: output.pdf
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Installation & Setup
|
|
114
|
+
|
|
115
|
+
Using `uv` (recommended):
|
|
116
|
+
```bash
|
|
117
|
+
uv tool install pymd2pdf
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Or via standard `pip`:
|
|
121
|
+
```bash
|
|
122
|
+
pip install pymd2pdf
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
To initialize the project for local development:
|
|
126
|
+
```bash
|
|
127
|
+
# Clone the repository
|
|
128
|
+
git clone https://github.com/user/md2pdf.git
|
|
129
|
+
cd md2pdf
|
|
130
|
+
|
|
131
|
+
# Create virtual environment and install dependencies
|
|
132
|
+
uv venv
|
|
133
|
+
source .venv/bin/activate
|
|
134
|
+
uv pip install -e ".[dev]"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Usage Examples
|
|
140
|
+
|
|
141
|
+
### Command Line Interface
|
|
142
|
+
|
|
143
|
+
Convert a Markdown file:
|
|
144
|
+
```bash
|
|
145
|
+
md2pdf input.md -o output.pdf
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Execute pre-render validation checks without producing a PDF:
|
|
149
|
+
```bash
|
|
150
|
+
md2pdf input.md --validate-only
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Run in offline mode to avoid calling the Kroki API (places image boxes with source code in the PDF instead):
|
|
154
|
+
```bash
|
|
155
|
+
md2pdf input.md -o output.pdf --offline
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### CLI Options
|
|
159
|
+
|
|
160
|
+
| Flag | Shortcut | Description |
|
|
161
|
+
| :--- | :--- | :--- |
|
|
162
|
+
| `--output` | `-o` | Path to save the output PDF file (default: `output.pdf`). |
|
|
163
|
+
| `--config` | `-c` | Path to a custom `md2pdf.toml` config file. |
|
|
164
|
+
| `--theme` | `-t` | Name of the theme to apply (default: `default`). |
|
|
165
|
+
| `--offline` | | Skip external API requests (e.g. Kroki diagram rendering) and use local placeholders. |
|
|
166
|
+
| `--validate-only`| | Execute pre-render validation checks and exit without building a PDF. |
|
|
167
|
+
| `--verbose` | `-v` | Output debug-level logging to `stderr`. |
|
|
168
|
+
|
|
169
|
+
### Programmatic Python Usage
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from md2pdf import convert, Config, Pipeline
|
|
173
|
+
|
|
174
|
+
# Option 1: Simple conversion
|
|
175
|
+
convert("input.md", "output.pdf")
|
|
176
|
+
|
|
177
|
+
# Option 2: Advanced programmatic pipeline usage
|
|
178
|
+
config = Config(
|
|
179
|
+
offline=False,
|
|
180
|
+
cache_dir=".md2pdf_cache",
|
|
181
|
+
output_file="my_document.pdf"
|
|
182
|
+
)
|
|
183
|
+
pipeline = Pipeline(config)
|
|
184
|
+
|
|
185
|
+
# Validate markdown document
|
|
186
|
+
issues = pipeline.validate("# Hello World")
|
|
187
|
+
for issue in issues:
|
|
188
|
+
print(f"[{issue.severity}] {issue.code}: {issue.message}")
|
|
189
|
+
|
|
190
|
+
# Render markdown
|
|
191
|
+
pipeline.run(raw_md="# Document Title\n\nSome body text.")
|
|
192
|
+
```
|