pymd2pdf 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- md2pdf/__init__.py +52 -0
- md2pdf/assets/__init__.py +1 -0
- md2pdf/assets/cache.py +72 -0
- md2pdf/assets/fallback.py +79 -0
- md2pdf/assets/kroki.py +70 -0
- md2pdf/cli.py +114 -0
- md2pdf/core/__init__.py +1 -0
- md2pdf/core/config.py +86 -0
- md2pdf/core/errors.py +33 -0
- md2pdf/core/flowables.py +156 -0
- md2pdf/core/layout.py +129 -0
- md2pdf/core/parser.py +126 -0
- md2pdf/core/pipeline.py +241 -0
- md2pdf/core/plugin_loader.py +153 -0
- md2pdf/core/postprocessors.py +111 -0
- md2pdf/core/preprocessors.py +127 -0
- md2pdf/core/registry.py +113 -0
- md2pdf/core/styles.py +54 -0
- md2pdf/core/tokens.py +45 -0
- md2pdf/core/validator.py +118 -0
- md2pdf/handlers/__init__.py +25 -0
- md2pdf/handlers/blockquote.py +45 -0
- md2pdf/handlers/code.py +135 -0
- md2pdf/handlers/heading.py +44 -0
- md2pdf/handlers/inline.py +94 -0
- md2pdf/handlers/latex.py +142 -0
- md2pdf/handlers/list_.py +107 -0
- md2pdf/handlers/mermaid.py +117 -0
- md2pdf/handlers/paragraph.py +18 -0
- md2pdf/handlers/table.py +106 -0
- md2pdf/handlers/thematic_break.py +26 -0
- md2pdf/styles/__init__.py +8 -0
- md2pdf/styles/default.py +176 -0
- md2pdf/styles/theme.py +92 -0
- pymd2pdf-0.1.0.dist-info/METADATA +203 -0
- pymd2pdf-0.1.0.dist-info/RECORD +38 -0
- pymd2pdf-0.1.0.dist-info/WHEEL +4 -0
- pymd2pdf-0.1.0.dist-info/entry_points.txt +13 -0
md2pdf/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""md2pdf: Programmatic Markdown-to-PDF typesetting engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from md2pdf.core.config import Config
|
|
6
|
+
from md2pdf.core.errors import (
|
|
7
|
+
ConfigError,
|
|
8
|
+
Md2PdfError,
|
|
9
|
+
ParseError,
|
|
10
|
+
RenderError,
|
|
11
|
+
ValidationIssue,
|
|
12
|
+
)
|
|
13
|
+
from md2pdf.core.pipeline import Pipeline
|
|
14
|
+
from md2pdf.core.registry import HandlerRegistry
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
|
17
|
+
__all__ = [
|
|
18
|
+
"Config",
|
|
19
|
+
"Pipeline",
|
|
20
|
+
"HandlerRegistry",
|
|
21
|
+
"convert",
|
|
22
|
+
"ValidationIssue",
|
|
23
|
+
"Md2PdfError",
|
|
24
|
+
"ParseError",
|
|
25
|
+
"RenderError",
|
|
26
|
+
"ConfigError",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def convert(
|
|
31
|
+
src: str,
|
|
32
|
+
dst: str,
|
|
33
|
+
config: Config | None = None,
|
|
34
|
+
registry: HandlerRegistry | None = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
"""High-level API: convert a Markdown file to a PDF.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
src: Path to the input Markdown file.
|
|
40
|
+
dst: Path to the output PDF file.
|
|
41
|
+
config: Optional Config instance. If omitted, defaults are used.
|
|
42
|
+
registry: Optional custom HandlerRegistry instance.
|
|
43
|
+
"""
|
|
44
|
+
if config is None:
|
|
45
|
+
config = Config(input_file=src, output_file=dst)
|
|
46
|
+
else:
|
|
47
|
+
config.input_file = src
|
|
48
|
+
config.output_file = dst
|
|
49
|
+
|
|
50
|
+
pipeline = Pipeline(config, registry)
|
|
51
|
+
raw_md = open(src, encoding="utf-8").read() # noqa: WPS515
|
|
52
|
+
pipeline.run(raw_md)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""md2pdf.assets — diagram rendering helpers (Kroki API, disk cache, fallback)."""
|
md2pdf/assets/cache.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Disk-based asset cache for rendered diagram images.
|
|
2
|
+
|
|
3
|
+
Cache key is SHA-256(``{diagram_type}:{source_text}``), stored as
|
|
4
|
+
``{cache_dir}/{key}.png``. Because the key is derived from the content,
|
|
5
|
+
changing the source automatically produces a new key — no explicit
|
|
6
|
+
invalidation is needed.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import logging
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AssetCache:
|
|
19
|
+
"""Hash-keyed disk cache for PNG diagram images.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
cache_dir: Directory to store cached PNG files. Created on first use
|
|
23
|
+
if it does not already exist. Defaults to ``.md2pdf_cache``.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, cache_dir: str = ".md2pdf_cache") -> None:
|
|
27
|
+
self.cache_dir = Path(cache_dir)
|
|
28
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
# ------------------------------------------------------------------
|
|
31
|
+
# Public API
|
|
32
|
+
# ------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
def get(self, diagram_type: str, source: str) -> bytes | None:
|
|
35
|
+
"""Return cached PNG bytes, or ``None`` if not cached.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
diagram_type: Kroki diagram type string (e.g. ``"mermaid"``).
|
|
39
|
+
source: Raw diagram source text.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
PNG bytes if the entry exists in the cache, otherwise ``None``.
|
|
43
|
+
"""
|
|
44
|
+
path = self._path(diagram_type, source)
|
|
45
|
+
if path.exists():
|
|
46
|
+
logger.debug("Cache hit: %s", path.name)
|
|
47
|
+
return path.read_bytes()
|
|
48
|
+
logger.debug("Cache miss: %s", path.name)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def put(self, diagram_type: str, source: str, data: bytes) -> None:
|
|
52
|
+
"""Store *data* in the cache under the key for (*diagram_type*, *source*).
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
diagram_type: Kroki diagram type string.
|
|
56
|
+
source: Raw diagram source text.
|
|
57
|
+
data: PNG bytes to cache.
|
|
58
|
+
"""
|
|
59
|
+
path = self._path(diagram_type, source)
|
|
60
|
+
path.write_bytes(data)
|
|
61
|
+
logger.debug("Cached %d bytes → %s", len(data), path.name)
|
|
62
|
+
|
|
63
|
+
# ------------------------------------------------------------------
|
|
64
|
+
# Internal helpers
|
|
65
|
+
# ------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
def _key(self, diagram_type: str, source: str) -> str:
|
|
68
|
+
raw = f"{diagram_type}:{source}"
|
|
69
|
+
return hashlib.sha256(raw.encode()).hexdigest()
|
|
70
|
+
|
|
71
|
+
def _path(self, diagram_type: str, source: str) -> Path:
|
|
72
|
+
return self.cache_dir / f"{self._key(diagram_type, source)}.png"
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Offline / error fallback renderer for diagram blocks.
|
|
2
|
+
|
|
3
|
+
When ``Config.offline=True`` or a network/render error occurs,
|
|
4
|
+
:class:`PlaceholderBox` is returned instead of crashing the conversion run.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from reportlab.lib import colors
|
|
10
|
+
from reportlab.platypus import Flowable
|
|
11
|
+
|
|
12
|
+
_BORDER_COLOR = colors.HexColor("#aaaaaa")
|
|
13
|
+
_BG_COLOR = colors.HexColor("#f9f9f9")
|
|
14
|
+
_LABEL_COLOR = colors.HexColor("#888888")
|
|
15
|
+
_SOURCE_COLOR = colors.HexColor("#555555")
|
|
16
|
+
|
|
17
|
+
_LABEL_FONT = "Helvetica-Oblique"
|
|
18
|
+
_SOURCE_FONT = "Courier"
|
|
19
|
+
|
|
20
|
+
_LABEL_SIZE = 8
|
|
21
|
+
_SOURCE_SIZE = 7
|
|
22
|
+
_PADDING = 6
|
|
23
|
+
_LINE_HEIGHT = 14
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PlaceholderBox(Flowable):
|
|
27
|
+
"""A grey bordered box displayed when diagram rendering is unavailable.
|
|
28
|
+
|
|
29
|
+
Shows the diagram type and a truncated preview of the source so readers
|
|
30
|
+
know something was not rendered rather than silently missing content.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
diagram_type: Kroki diagram type string (e.g. ``"mermaid"``).
|
|
34
|
+
source: Raw diagram source. Truncated to 120 characters in the box.
|
|
35
|
+
width: Box width in ReportLab points. Defaults to ``400``.
|
|
36
|
+
height: Box height in ReportLab points. Defaults to ``80``.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
_MAX_SOURCE_PREVIEW = 120
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
diagram_type: str,
|
|
44
|
+
source: str,
|
|
45
|
+
width: float = 400,
|
|
46
|
+
height: float = 80,
|
|
47
|
+
) -> None:
|
|
48
|
+
super().__init__()
|
|
49
|
+
self.diagram_type = diagram_type
|
|
50
|
+
self.source_preview = source[: self._MAX_SOURCE_PREVIEW] + (
|
|
51
|
+
"..." if len(source) > self._MAX_SOURCE_PREVIEW else ""
|
|
52
|
+
)
|
|
53
|
+
self.width = width
|
|
54
|
+
self.height = height
|
|
55
|
+
|
|
56
|
+
# ReportLab calls wrap() to query our dimensions before draw().
|
|
57
|
+
def wrap(
|
|
58
|
+
self, available_width: float, available_height: float
|
|
59
|
+
) -> tuple[float, float]: # noqa: ARG002
|
|
60
|
+
return self.width, self.height
|
|
61
|
+
|
|
62
|
+
def draw(self) -> None:
|
|
63
|
+
c = self.canv
|
|
64
|
+
|
|
65
|
+
# Background + border
|
|
66
|
+
c.setStrokeColor(_BORDER_COLOR)
|
|
67
|
+
c.setFillColor(_BG_COLOR)
|
|
68
|
+
c.rect(0, 0, self.width, self.height, fill=1)
|
|
69
|
+
|
|
70
|
+
# Label line: "[mermaid diagram — offline / render failed]"
|
|
71
|
+
c.setFillColor(_LABEL_COLOR)
|
|
72
|
+
c.setFont(_LABEL_FONT, _LABEL_SIZE)
|
|
73
|
+
label = f"[{self.diagram_type} diagram — offline / render failed]"
|
|
74
|
+
c.drawString(_PADDING, self.height - _LINE_HEIGHT, label)
|
|
75
|
+
|
|
76
|
+
# Source preview line
|
|
77
|
+
c.setFillColor(_SOURCE_COLOR)
|
|
78
|
+
c.setFont(_SOURCE_FONT, _SOURCE_SIZE)
|
|
79
|
+
c.drawString(_PADDING, self.height - _LINE_HEIGHT * 2, self.source_preview)
|
md2pdf/assets/kroki.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""HTTP client for the Kroki.io diagram-rendering API.
|
|
2
|
+
|
|
3
|
+
Kroki accepts a diagram type and source text, and returns a PNG image.
|
|
4
|
+
|
|
5
|
+
Endpoint (POST form used here to avoid URL-length limits on large diagrams)::
|
|
6
|
+
|
|
7
|
+
POST https://kroki.io/{diagram_type}/png
|
|
8
|
+
Content-Type: text/plain
|
|
9
|
+
Body: <diagram source>
|
|
10
|
+
|
|
11
|
+
Supported diagram type strings used by md2pdf:
|
|
12
|
+
|
|
13
|
+
+---------------+------------------+--------------------------------------+
|
|
14
|
+
| Token type | Kroki type | Notes |
|
|
15
|
+
+===============+==================+======================================+
|
|
16
|
+
| ``Mermaid`` | ``"mermaid"`` | Flowcharts, sequence, Gantt, etc. |
|
|
17
|
+
+---------------+------------------+--------------------------------------+
|
|
18
|
+
| ``LatexBlock``| ``"tikz"`` | Requires ``\\documentclass`` wrapper |
|
|
19
|
+
+---------------+------------------+--------------------------------------+
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
|
|
26
|
+
import requests
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
KROKI_BASE = "https://kroki.io"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class KrokiClient:
|
|
34
|
+
"""Thin wrapper around the Kroki.io HTTP API.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
base_url: Base URL of the Kroki server. Override in tests or for
|
|
38
|
+
self-hosted Kroki instances.
|
|
39
|
+
timeout: Request timeout in seconds. Defaults to ``15``.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, base_url: str = KROKI_BASE, timeout: int = 15) -> None:
|
|
43
|
+
self.base_url = base_url
|
|
44
|
+
self.timeout = timeout
|
|
45
|
+
self._session = requests.Session()
|
|
46
|
+
|
|
47
|
+
def render(self, diagram_type: str, source: str) -> bytes:
|
|
48
|
+
"""Fetch PNG bytes from Kroki for the given *diagram_type* and *source*.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
diagram_type: Kroki diagram type string (e.g. ``"mermaid"``).
|
|
52
|
+
source: Raw diagram source text.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Raw PNG bytes returned by the Kroki API.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
requests.HTTPError: If the server returns a non-2xx status code.
|
|
59
|
+
requests.RequestException: On any connection/timeout error.
|
|
60
|
+
"""
|
|
61
|
+
url = f"{self.base_url}/{diagram_type}/png"
|
|
62
|
+
resp = self._session.post(
|
|
63
|
+
url,
|
|
64
|
+
data=source.encode("utf-8"),
|
|
65
|
+
headers={"Content-Type": "text/plain"},
|
|
66
|
+
timeout=self.timeout,
|
|
67
|
+
)
|
|
68
|
+
resp.raise_for_status()
|
|
69
|
+
logger.debug("Kroki rendered %s (%d bytes)", diagram_type, len(resp.content))
|
|
70
|
+
return resp.content
|
md2pdf/cli.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""CLI entry point for md2pdf."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(
|
|
12
|
+
name="md2pdf",
|
|
13
|
+
help="Convert structured Markdown files to print-ready PDFs.",
|
|
14
|
+
add_completion=False,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _setup_logging(verbose: bool) -> None:
|
|
19
|
+
level = logging.DEBUG if verbose else logging.WARNING
|
|
20
|
+
# Clean standard logging configuration directed to stderr
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=level,
|
|
23
|
+
format="%(levelname)s %(name)s: %(message)s",
|
|
24
|
+
stream=sys.stderr,
|
|
25
|
+
force=True, # Override any existing configuration
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _report_issues(issues: list) -> None:
|
|
30
|
+
for issue in issues:
|
|
31
|
+
icon = "✗" if issue.severity == "error" else "⚠"
|
|
32
|
+
line_str = f"Line {issue.line}" if issue.line is not None else "Line ?"
|
|
33
|
+
typer.echo(f"{icon} {line_str}: [{issue.code}] {issue.message}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command()
|
|
37
|
+
def convert(
|
|
38
|
+
input: Path = typer.Argument(..., help="Path to input .md file"), # noqa: B008
|
|
39
|
+
output: Path = typer.Option( # noqa: B008
|
|
40
|
+
Path("output.pdf"), "-o", "--output", help="Output PDF path"
|
|
41
|
+
),
|
|
42
|
+
config_file: Path = typer.Option( # noqa: B008
|
|
43
|
+
None, "-c", "--config", help="Path to md2pdf.toml"
|
|
44
|
+
),
|
|
45
|
+
theme: str = typer.Option("default", "-t", "--theme", help="Theme name"), # noqa: B008
|
|
46
|
+
offline: bool = typer.Option( # noqa: B008
|
|
47
|
+
False, "--offline", help="Skip external API calls; use placeholders instead"
|
|
48
|
+
),
|
|
49
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
50
|
+
False, "-v", "--verbose", help="Enable debug logging to stderr"
|
|
51
|
+
),
|
|
52
|
+
validate_only: bool = typer.Option( # noqa: B008
|
|
53
|
+
False, "--validate-only", help="Run validation but do not render"
|
|
54
|
+
),
|
|
55
|
+
min_image_scale: float = typer.Option( # noqa: B008
|
|
56
|
+
None,
|
|
57
|
+
"--min-image-scale",
|
|
58
|
+
help="Minimum scale factor for resizing images before deferring to a new page (e.g. 0.8)",
|
|
59
|
+
),
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Convert a Markdown file to a print-ready PDF."""
|
|
62
|
+
_setup_logging(verbose)
|
|
63
|
+
|
|
64
|
+
# Defer heavy imports so --help is instant even without all deps installed.
|
|
65
|
+
from md2pdf.core.config import Config
|
|
66
|
+
from md2pdf.core.pipeline import Pipeline
|
|
67
|
+
from md2pdf.core.registry import HandlerRegistry
|
|
68
|
+
|
|
69
|
+
if not input.exists():
|
|
70
|
+
typer.echo(f"✗ Input file not found: {input}", err=True)
|
|
71
|
+
raise typer.Exit(code=1)
|
|
72
|
+
|
|
73
|
+
cfg = Config(
|
|
74
|
+
input_file=str(input),
|
|
75
|
+
output_file=str(output),
|
|
76
|
+
theme=theme,
|
|
77
|
+
offline=offline,
|
|
78
|
+
)
|
|
79
|
+
if min_image_scale is not None:
|
|
80
|
+
cfg.min_image_scale = min_image_scale
|
|
81
|
+
|
|
82
|
+
if config_file is not None:
|
|
83
|
+
if not config_file.exists():
|
|
84
|
+
typer.echo(f"✗ Config file not found: {config_file}", err=True)
|
|
85
|
+
raise typer.Exit(code=1)
|
|
86
|
+
cfg = Config.from_toml(str(config_file))
|
|
87
|
+
# CLI arguments take precedence over config file values.
|
|
88
|
+
cfg.input_file = str(input)
|
|
89
|
+
cfg.output_file = str(output)
|
|
90
|
+
if theme != "default":
|
|
91
|
+
cfg.theme = theme
|
|
92
|
+
if offline:
|
|
93
|
+
cfg.offline = True
|
|
94
|
+
if min_image_scale is not None:
|
|
95
|
+
cfg.min_image_scale = min_image_scale
|
|
96
|
+
|
|
97
|
+
registry = HandlerRegistry()
|
|
98
|
+
pipeline = Pipeline(cfg, registry)
|
|
99
|
+
|
|
100
|
+
raw_md = input.read_text(encoding="utf-8")
|
|
101
|
+
|
|
102
|
+
if validate_only:
|
|
103
|
+
issues = pipeline.validate(raw_md)
|
|
104
|
+
_report_issues(issues)
|
|
105
|
+
has_errors = any(i.severity == "error" for i in issues)
|
|
106
|
+
raise typer.Exit(code=1 if has_errors else 0)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
pipeline.run(raw_md)
|
|
110
|
+
typer.echo(f"✓ PDF written to: {output}")
|
|
111
|
+
except Exception as exc:
|
|
112
|
+
logging.exception("Conversion failed")
|
|
113
|
+
typer.echo(f"✗ Conversion failed: {exc}", err=True)
|
|
114
|
+
raise typer.Exit(code=1) from exc
|
md2pdf/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core sub-package for md2pdf."""
|
md2pdf/core/config.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Configuration dataclass for md2pdf."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import tomllib
|
|
6
|
+
from dataclasses import dataclass, field, fields
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class Config:
|
|
15
|
+
"""Runtime configuration for the md2pdf pipeline.
|
|
16
|
+
|
|
17
|
+
All fields map 1:1 to entries in the ``[md2pdf]`` section of
|
|
18
|
+
``md2pdf.toml``. Unknown keys in the TOML file are silently ignored
|
|
19
|
+
so that future fields can be introduced without breaking existing
|
|
20
|
+
config files.
|
|
21
|
+
|
|
22
|
+
The ``theme_config`` attribute is populated from the optional ``[theme]``
|
|
23
|
+
section and is **not** a direct TOML field — it is excluded from the
|
|
24
|
+
known-fields filter in :meth:`from_toml`.
|
|
25
|
+
|
|
26
|
+
``plugins_dict`` is populated from the ``[plugins]`` TOML section and
|
|
27
|
+
contains three optional keys: ``handlers``, ``preprocessors``, and
|
|
28
|
+
``postprocessors``, each a list of fully-qualified class paths.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
input_file: str = ""
|
|
32
|
+
output_file: str = "output.pdf"
|
|
33
|
+
theme: str = "default"
|
|
34
|
+
offline: bool = False
|
|
35
|
+
cache_dir: str = ".md2pdf_cache"
|
|
36
|
+
min_image_scale: float = 0.8
|
|
37
|
+
|
|
38
|
+
# Structured plugin config from [plugins] TOML section.
|
|
39
|
+
# Keys: "handlers", "preprocessors", "postprocessors" → list[str]
|
|
40
|
+
plugins_dict: dict = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
# Populated from the [theme] TOML section; None means "use ThemeConfig defaults".
|
|
43
|
+
theme_config: Any = field(default=None, repr=False)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_toml(cls, path: str) -> Config:
|
|
47
|
+
"""Load configuration from a TOML file.
|
|
48
|
+
|
|
49
|
+
Reads the ``[md2pdf]`` table for core settings, the ``[theme]``
|
|
50
|
+
table (if present) to build a :class:`~md2pdf.styles.theme.ThemeConfig`,
|
|
51
|
+
and the ``[plugins]`` table for plugin class paths.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
path: Filesystem path to the TOML config file.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
A populated Config instance.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
FileNotFoundError: If *path* does not exist.
|
|
61
|
+
tomllib.TOMLDecodeError: If the file is not valid TOML.
|
|
62
|
+
"""
|
|
63
|
+
with open(path, "rb") as fh:
|
|
64
|
+
data = tomllib.load(fh)
|
|
65
|
+
|
|
66
|
+
md2pdf_section: dict = data.get("md2pdf", {})
|
|
67
|
+
# ``theme_config`` and ``plugins_dict`` are not direct TOML fields.
|
|
68
|
+
known: set[str] = {f.name for f in fields(cls)} - {"theme_config", "plugins_dict"}
|
|
69
|
+
filtered = {k: v for k, v in md2pdf_section.items() if k in known}
|
|
70
|
+
|
|
71
|
+
cfg = cls(**filtered)
|
|
72
|
+
|
|
73
|
+
# Load [theme] section into a ThemeConfig (import here to avoid
|
|
74
|
+
# circular imports / hard reportlab dependency at module load time).
|
|
75
|
+
try:
|
|
76
|
+
from md2pdf.styles.theme import ThemeConfig # noqa: PLC0415
|
|
77
|
+
|
|
78
|
+
theme_data: dict = data.get("theme", {})
|
|
79
|
+
cfg.theme_config = ThemeConfig.from_dict(theme_data)
|
|
80
|
+
except Exception:
|
|
81
|
+
cfg.theme_config = None
|
|
82
|
+
|
|
83
|
+
# Load [plugins] section into plugins_dict.
|
|
84
|
+
cfg.plugins_dict = data.get("plugins", {})
|
|
85
|
+
|
|
86
|
+
return cfg
|
md2pdf/core/errors.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Structured error and validation issue types for md2pdf."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ValidationIssue:
|
|
11
|
+
"""Represents a warning or error discovered during pre-render validation."""
|
|
12
|
+
|
|
13
|
+
severity: Literal["error", "warning"]
|
|
14
|
+
code: str # e.g. "UNSUPPORTED_ELEMENT", "EMPTY_TABLE", "NESTED_TABLE"
|
|
15
|
+
message: str
|
|
16
|
+
line: int | None = None
|
|
17
|
+
element_type: str | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Md2PdfError(Exception):
|
|
21
|
+
"""Base exception for all md2pdf errors."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ParseError(Md2PdfError):
|
|
25
|
+
"""Raised when the markdown cannot be parsed."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RenderError(Md2PdfError):
|
|
29
|
+
"""Raised when PDF generation fails."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConfigError(Md2PdfError):
|
|
33
|
+
"""Raised for invalid configuration."""
|
md2pdf/core/flowables.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Custom ReportLab flowables for md2pdf typesetting safeguards."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from reportlab.lib import colors
|
|
9
|
+
from reportlab.platypus import Flowable, Image
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from reportlab.lib.colors import Color
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BlockQuoteBar(Flowable):
|
|
18
|
+
"""A custom Flowable wrapping an inner flowable with a left vertical accent bar.
|
|
19
|
+
|
|
20
|
+
This ensures that the accent bar spans the exact height of the nested content,
|
|
21
|
+
and cleanly delegates wrapping, drawing, and splitting so the blockquote
|
|
22
|
+
can break across page boundaries without formatting errors.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
inner_flowable: Flowable,
|
|
28
|
+
bar_color: Color | None = None,
|
|
29
|
+
bar_width: float = 3.0,
|
|
30
|
+
padding: float = 8.0,
|
|
31
|
+
) -> None:
|
|
32
|
+
super().__init__()
|
|
33
|
+
self.inner = inner_flowable
|
|
34
|
+
self.bar_color = bar_color or colors.HexColor("#cccccc")
|
|
35
|
+
self.bar_width = bar_width
|
|
36
|
+
self.padding = padding
|
|
37
|
+
self.width = 0.0
|
|
38
|
+
self.height = 0.0
|
|
39
|
+
|
|
40
|
+
def wrap(self, availWidth: float, availHeight: float) -> tuple[float, float]:
|
|
41
|
+
inner_avail_width = max(0.0, availWidth - (self.bar_width + self.padding))
|
|
42
|
+
w_inner, h_inner = self.inner.wrap(inner_avail_width, availHeight)
|
|
43
|
+
self.width = w_inner + self.bar_width + self.padding
|
|
44
|
+
self.height = h_inner
|
|
45
|
+
return self.width, self.height
|
|
46
|
+
|
|
47
|
+
def draw(self) -> None:
|
|
48
|
+
c = self.canv
|
|
49
|
+
c.saveState()
|
|
50
|
+
c.setFillColor(self.bar_color)
|
|
51
|
+
c.rect(0, 0, self.bar_width, self.height, fill=1, stroke=0)
|
|
52
|
+
c.restoreState()
|
|
53
|
+
self.inner.drawOn(c, self.bar_width + self.padding, 0)
|
|
54
|
+
|
|
55
|
+
def split(self, availWidth: float, availHeight: float) -> list[Flowable]:
|
|
56
|
+
inner_avail_width = max(0.0, availWidth - (self.bar_width + self.padding))
|
|
57
|
+
splits = self.inner.split(inner_avail_width, availHeight)
|
|
58
|
+
if not splits:
|
|
59
|
+
return []
|
|
60
|
+
return [
|
|
61
|
+
BlockQuoteBar(
|
|
62
|
+
s,
|
|
63
|
+
bar_color=self.bar_color,
|
|
64
|
+
bar_width=self.bar_width,
|
|
65
|
+
padding=self.padding,
|
|
66
|
+
)
|
|
67
|
+
for s in splits
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class BookmarkFlowable(Flowable):
|
|
72
|
+
"""A custom flowable that registers a PDF bookmark anchor on the current page.
|
|
73
|
+
|
|
74
|
+
This is an invisible, zero-size flowable inserted just before heading flowables
|
|
75
|
+
to serve as anchor destinations for Table of Contents links.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, key: str) -> None:
|
|
79
|
+
super().__init__()
|
|
80
|
+
self.key = key
|
|
81
|
+
|
|
82
|
+
def wrap(self, availWidth: float, availHeight: float) -> tuple[float, float]:
|
|
83
|
+
return 0.0, 0.0
|
|
84
|
+
|
|
85
|
+
def draw(self) -> None:
|
|
86
|
+
self.canv.bookmarkPage(self.key)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ResizableImage(Image):
|
|
90
|
+
"""An Image subclass that dynamically fits itself into available page space.
|
|
91
|
+
|
|
92
|
+
If the image does not fit within the remaining vertical space on the current page,
|
|
93
|
+
and we are not yet on a fresh page (i.e., we have room to defer), it triggers a
|
|
94
|
+
deferral by returning its original dimensions. ReportLab will then push it to
|
|
95
|
+
the next page.
|
|
96
|
+
|
|
97
|
+
On a fresh page, or if we have already deferred once, it scales down proportionally
|
|
98
|
+
to fit the remaining page height/width (down to a minimum scale if needed to
|
|
99
|
+
prevent layout overflows).
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
# Track the maximum available height seen in the current rendering pass.
|
|
103
|
+
# Updated dynamically on each wrap call to learn the printable page frame height.
|
|
104
|
+
max_avail_height: float = 0.0
|
|
105
|
+
|
|
106
|
+
# Minimum scale factor before deferring rendering to the next page.
|
|
107
|
+
min_scale: float = 0.8
|
|
108
|
+
|
|
109
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
110
|
+
super().__init__(*args, **kwargs)
|
|
111
|
+
self._deferred: bool = False
|
|
112
|
+
# Capture the initial target width and height to serve as our scaling baseline.
|
|
113
|
+
# ReportLab's Image stores these in self.drawWidth and self.drawHeight.
|
|
114
|
+
self.orig_width: float = float(self.drawWidth)
|
|
115
|
+
self.orig_height: float = float(self.drawHeight)
|
|
116
|
+
|
|
117
|
+
def wrap(self, availWidth: float, availHeight: float) -> tuple[float, float]:
|
|
118
|
+
# Update the class-level maximum seen height
|
|
119
|
+
ResizableImage.max_avail_height = max(ResizableImage.max_avail_height, availHeight)
|
|
120
|
+
|
|
121
|
+
# Calculate the scale factor required to fit the remaining space
|
|
122
|
+
s = max(0.01, min(1.0, availWidth / self.orig_width, availHeight / self.orig_height))
|
|
123
|
+
|
|
124
|
+
# We are on a fresh page (or as close to it as possible with preceding block elements
|
|
125
|
+
# in KeepTogether like heading/bookmark) if the available height is close to the max.
|
|
126
|
+
# We allow a margin of 120 points for preceding titles/headings.
|
|
127
|
+
is_fresh_page = availHeight >= ResizableImage.max_avail_height - 120.0
|
|
128
|
+
|
|
129
|
+
# Scale limit check using min_scale setting
|
|
130
|
+
if s >= ResizableImage.min_scale or is_fresh_page or self._deferred:
|
|
131
|
+
# We scale the image to fit
|
|
132
|
+
self.drawWidth = self.orig_width * s
|
|
133
|
+
self.drawHeight = self.orig_height * s
|
|
134
|
+
logger.debug(
|
|
135
|
+
"ResizableImage.wrap: fit. original=(%.2fx%.2f), scale=%.2f, new=(%.2fx%.2f), is_fresh=%s",
|
|
136
|
+
self.orig_width,
|
|
137
|
+
self.orig_height,
|
|
138
|
+
s,
|
|
139
|
+
self.drawWidth,
|
|
140
|
+
self.drawHeight,
|
|
141
|
+
is_fresh_page,
|
|
142
|
+
)
|
|
143
|
+
return self.drawWidth, self.drawHeight
|
|
144
|
+
else:
|
|
145
|
+
# Defer to the next page by returning original dimensions
|
|
146
|
+
# ReportLab will find this too large for the current page and push it to the next
|
|
147
|
+
self._deferred = True
|
|
148
|
+
logger.debug(
|
|
149
|
+
"ResizableImage.wrap: defer. original=(%.2fx%.2f), availHeight=%.2f, scale=%.2f, max_avail=%.2f",
|
|
150
|
+
self.orig_width,
|
|
151
|
+
self.orig_height,
|
|
152
|
+
availHeight,
|
|
153
|
+
s,
|
|
154
|
+
ResizableImage.max_avail_height,
|
|
155
|
+
)
|
|
156
|
+
return self.orig_width, self.orig_height
|