markitup-py 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
markitup/__init__.py ADDED
@@ -0,0 +1,45 @@
1
+ """MarkItUp — markdown -> docx/pdf/html, the reverse of Microsoft's MarkItDown.
2
+
3
+ Quick start:
4
+
5
+ from markitup import MarkItUp
6
+ MarkItUp(theme="report").convert("doc.md", "doc.pdf")
7
+
8
+ Pipeline: markdown --parse--> IR --render--> .docx / .pdf / .html
9
+ All visual decisions live in a Theme; the renderers are mechanical.
10
+ """
11
+ import os
12
+
13
+ from .api import MarkItUp
14
+ from .theme import Theme, Watermark, Banner, Table, make_watermark
15
+ from .parse import parse
16
+ from .render_docx import render as render_docx
17
+ from .render_html import render_html
18
+ from .render_pdf import render_pdf
19
+ from .stamp import stamp
20
+ from .fonts import list_fonts, available_fonts, is_available, SAFE_FONTS
21
+
22
+ __version__ = "0.3.1"
23
+ __all__ = [
24
+ "MarkItUp", "Theme", "Watermark", "Banner", "Table",
25
+ "parse", "render_docx", "render_html", "render_pdf",
26
+ "stamp", "convert",
27
+ "list_fonts", "available_fonts", "is_available", "SAFE_FONTS",
28
+ ]
29
+
30
+
31
+ def convert(markdown_text: str, out_path: str, theme="report",
32
+ base_url: str = ".", pdf_engine: str = "weasyprint") -> str:
33
+ """One-shot helper: markdown string -> file (format from extension)."""
34
+ doc = parse(markdown_text)
35
+ th = theme if isinstance(theme, Theme) else Theme.load(theme)
36
+ ext = os.path.splitext(out_path)[1].lower()
37
+ if ext == ".docx":
38
+ return render_docx(doc, th, out_path)
39
+ if ext == ".pdf":
40
+ return render_pdf(doc, th, out_path, engine=pdf_engine, base_url=base_url)
41
+ if ext in (".html", ".htm"):
42
+ with open(out_path, "w", encoding="utf-8") as fh:
43
+ fh.write(render_html(doc, th))
44
+ return out_path
45
+ raise ValueError(f"unsupported output extension: {ext!r}")
markitup/api.py ADDED
@@ -0,0 +1,145 @@
1
+ """The public, configured entry point: the MarkItUp class.
2
+
3
+ Mirrors the ergonomics of Microsoft's MarkItDown — construct once with your
4
+ preferences, then convert many files:
5
+
6
+ from markitup import MarkItUp, Watermark
7
+ m = MarkItUp(theme="report", body_font="Georgia", text_color="#222")
8
+ m.convert("doc.md", "doc.pdf")
9
+ m.convert("doc.md") # -> ./doc.docx (current working directory)
10
+
11
+ Every visual knob is optional and overrides the chosen theme.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from typing import Dict, Optional, Union
17
+
18
+ from .theme import Theme, Banner, Watermark, make_watermark, norm_hex
19
+ from .parse import parse
20
+ from .render_docx import render as _render_docx
21
+ from .render_html import render_html as _render_html
22
+ from .render_pdf import render_pdf as _render_pdf
23
+ from . import stamp as _stamp_mod
24
+
25
+
26
+ class MarkItUp:
27
+ def __init__(
28
+ self,
29
+ theme: Union[str, Theme] = "report",
30
+ *,
31
+ # fonts
32
+ body_font: Optional[str] = None,
33
+ heading_font: Optional[str] = None,
34
+ mono_font: Optional[str] = None,
35
+ # colors (accept '#RRGGBB' or 'RRGGBB')
36
+ text_color: Optional[str] = None,
37
+ heading_color: Optional[str] = None,
38
+ heading_colors: Optional[Dict[int, str]] = None,
39
+ accent_color: Optional[str] = None,
40
+ link_color: Optional[str] = None,
41
+ # type & page
42
+ base_size: Optional[float] = None,
43
+ line_height: Optional[float] = None,
44
+ scale: Optional[float] = None,
45
+ page_size: Optional[str] = None,
46
+ margin_cm: Optional[float] = None,
47
+ # structure & marks
48
+ base_docx: Optional[str] = None,
49
+ watermark: Union[Watermark, str, dict, None] = None,
50
+ banner: Union[Banner, str, dict, None] = None,
51
+ # pdf
52
+ pdf_engine: str = "weasyprint",
53
+ ):
54
+ th = theme if isinstance(theme, Theme) else Theme.load(theme)
55
+
56
+ if body_font:
57
+ th.fonts.body = body_font
58
+ if heading_font:
59
+ th.fonts.heading = heading_font
60
+ if mono_font:
61
+ th.fonts.mono = mono_font
62
+
63
+ if text_color:
64
+ th.colors.text = norm_hex(text_color)
65
+ if heading_color:
66
+ th.colors.heading = norm_hex(heading_color)
67
+ if accent_color:
68
+ th.colors.accent = norm_hex(accent_color)
69
+ if link_color:
70
+ th.colors.link = norm_hex(link_color)
71
+ if heading_colors:
72
+ th.colors.headings.update({int(k): norm_hex(v) for k, v in heading_colors.items()})
73
+
74
+ if base_size is not None:
75
+ th.type.base_size = base_size
76
+ if line_height is not None:
77
+ th.type.line_height = line_height
78
+ if scale is not None:
79
+ th.type.ratio = scale
80
+ if page_size:
81
+ th.page.size = page_size
82
+ if margin_cm is not None:
83
+ th.page.margin_cm = margin_cm
84
+
85
+ if base_docx:
86
+ th.base_docx = base_docx
87
+ if watermark is not None:
88
+ th.watermark = make_watermark(watermark)
89
+ if banner is not None:
90
+ th.banner = _coerce_banner(banner)
91
+
92
+ self.theme = th
93
+ self.pdf_engine = pdf_engine
94
+
95
+ # ---- conversion -------------------------------------------------------
96
+ def convert(self, input_path: str, output_path: Optional[str] = None,
97
+ *, to: str = "docx") -> str:
98
+ """Convert a markdown file. If `output_path` is omitted, the output is
99
+ written to the current working directory as <input-stem>.<to>."""
100
+ with open(input_path, "r", encoding="utf-8") as fh:
101
+ md = fh.read()
102
+ if output_path is None:
103
+ stem = os.path.splitext(os.path.basename(input_path))[0]
104
+ output_path = os.path.join(os.getcwd(), f"{stem}.{to.lstrip('.')}")
105
+ base_url = os.path.dirname(os.path.abspath(input_path)) or "."
106
+ return self.convert_text(md, output_path, base_url=base_url)
107
+
108
+ def convert_text(self, markdown_text: str, output_path: str,
109
+ *, base_url: str = ".") -> str:
110
+ """Convert a markdown string to the file at output_path (format from ext)."""
111
+ doc = parse(markdown_text)
112
+ ext = os.path.splitext(output_path)[1].lower()
113
+ if ext == ".docx":
114
+ return _render_docx(doc, self.theme, output_path)
115
+ if ext == ".pdf":
116
+ return _render_pdf(doc, self.theme, output_path,
117
+ engine=self.pdf_engine, base_url=base_url)
118
+ if ext in (".html", ".htm"):
119
+ with open(output_path, "w", encoding="utf-8") as fh:
120
+ fh.write(_render_html(doc, self.theme))
121
+ return output_path
122
+ raise ValueError(f"unsupported output extension: {ext!r}")
123
+
124
+ # ---- existing-file watermarking --------------------------------------
125
+ @staticmethod
126
+ def stamp(input_path: str, output_path: str, watermark, **kwargs) -> str:
127
+ """Watermark an existing .pdf/.docx. See markitup.stamp for options."""
128
+ return _stamp_mod.stamp(input_path, output_path, watermark, **kwargs)
129
+
130
+
131
+ def _coerce_banner(value) -> Optional[Banner]:
132
+ if value is None:
133
+ return None
134
+ if isinstance(value, Banner):
135
+ b = value
136
+ elif isinstance(value, str):
137
+ b = Banner(text=value)
138
+ elif isinstance(value, dict):
139
+ b = Banner(**value)
140
+ else:
141
+ raise TypeError(f"banner must be Banner | str | dict | None, got {type(value)!r}")
142
+ b.color = norm_hex(b.color)
143
+ if b.bg:
144
+ b.bg = norm_hex(b.bg)
145
+ return b
markitup/cli.py ADDED
@@ -0,0 +1,131 @@
1
+ """Command-line interface: `markitup <command>`.
2
+
3
+ markitup convert in.md -o out.pdf --theme report --font Georgia
4
+ markitup convert in.md --banner "CONFIDENTIAL" --watermark DRAFT
5
+ markitup fonts
6
+ markitup stamp report.pdf -o stamped.pdf --watermark CONFIDENTIAL --position top
7
+ markitup stamp report.docx -o stamped.docx --watermark-image logo.png
8
+
9
+ If the first argument is a file path (not a command), `convert` is assumed.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import os
15
+ import sys
16
+
17
+ from .api import MarkItUp
18
+ from .stamp import stamp as stamp_file
19
+ from .fonts import list_fonts
20
+
21
+ _COMMANDS = {"convert", "fonts", "stamp"}
22
+
23
+
24
+ def _add_style_args(p):
25
+ p.add_argument("--theme", default="report", help="theme name or path to a theme .yaml")
26
+ p.add_argument("--font", help="body font for the whole document")
27
+ p.add_argument("--heading-font", help="font for headings")
28
+ p.add_argument("--mono-font", help="font for code")
29
+ p.add_argument("--text-color", help="body text color (#RRGGBB)")
30
+ p.add_argument("--heading-color", help="heading color (#RRGGBB)")
31
+ p.add_argument("--accent-color", help="accent color (#RRGGBB)")
32
+ p.add_argument("--base-docx", help="reference .docx whose styles define the design")
33
+ p.add_argument("--banner", help="top-of-document notice, e.g. CONFIDENTIAL")
34
+ p.add_argument("--watermark", help="text watermark")
35
+ p.add_argument("--watermark-image", help="image watermark (path)")
36
+ p.add_argument("--position", default="center", choices=["center", "top", "bottom"])
37
+ p.add_argument("--pdf-engine", default="weasyprint", choices=["weasyprint", "chromium"])
38
+
39
+
40
+ def main(argv=None):
41
+ argv = list(sys.argv[1:] if argv is None else argv)
42
+ if argv and argv[0] not in _COMMANDS and not argv[0].startswith("-"):
43
+ argv = ["convert"] + argv # default command
44
+
45
+ ap = argparse.ArgumentParser(prog="markitup", description="Markdown -> docx/pdf/html")
46
+ sub = ap.add_subparsers(dest="cmd", required=True)
47
+
48
+ c = sub.add_parser("convert", help="convert a markdown file")
49
+ c.add_argument("input", help="path to a .md file")
50
+ c.add_argument("-o", "--output", help="output path (.docx/.pdf/.html); default: ./<name>.docx")
51
+ c.add_argument("--to", default="docx", help="format when no -o given (docx/pdf/html)")
52
+ _add_style_args(c)
53
+
54
+ sub.add_parser("fonts", help="list fonts available for rendering")
55
+
56
+ s = sub.add_parser("stamp", help="watermark an existing .pdf/.docx")
57
+ s.add_argument("input", help="existing .pdf or .docx")
58
+ s.add_argument("-o", "--output", required=True, help="output path")
59
+ s.add_argument("--watermark", help="text watermark")
60
+ s.add_argument("--watermark-image", help="image watermark (path)")
61
+ s.add_argument("--position", default="center", choices=["center", "top", "bottom"])
62
+ s.add_argument("--opacity", type=float, default=0.10)
63
+ s.add_argument("--rotation", type=int, default=-45)
64
+ s.add_argument("--pages", default="all", help="e.g. all | 1,3 | 2-5 (PDF only)")
65
+ s.add_argument("--password", help="password for an encrypted PDF")
66
+ s.add_argument("--in-front", action="store_true", help="draw over content instead of behind")
67
+
68
+ args = ap.parse_args(argv)
69
+
70
+ if args.cmd == "fonts":
71
+ return _cmd_fonts()
72
+ if args.cmd == "stamp":
73
+ return _cmd_stamp(args)
74
+ return _cmd_convert(args)
75
+
76
+
77
+ def _cmd_fonts():
78
+ info = list_fonts()
79
+ installed = info["installed"]
80
+ print(f"Installed fonts available for PDF rendering ({len(installed)}):")
81
+ for f in installed:
82
+ print(f" {f}")
83
+ if not installed:
84
+ print(" (fontconfig not available on this machine)")
85
+ print("\nCross-platform-safe families recommended for .docx:")
86
+ for group, fams in info["safe"].items():
87
+ print(f" {group:5}: {', '.join(fams)}")
88
+ return 0
89
+
90
+
91
+ def _cmd_convert(args):
92
+ if not os.path.isfile(args.input):
93
+ print(f"input file not found: {args.input}", file=sys.stderr)
94
+ return 2
95
+ wm = None
96
+ if args.watermark_image:
97
+ wm = {"image": args.watermark_image, "position": args.position}
98
+ elif args.watermark:
99
+ wm = {"text": args.watermark, "position": args.position}
100
+ m = MarkItUp(
101
+ theme=args.theme,
102
+ body_font=args.font, heading_font=args.heading_font, mono_font=args.mono_font,
103
+ text_color=args.text_color, heading_color=args.heading_color, accent_color=args.accent_color,
104
+ base_docx=args.base_docx, watermark=wm, banner=args.banner,
105
+ pdf_engine=args.pdf_engine,
106
+ )
107
+ out = m.convert(args.input, args.output, to=args.to)
108
+ print(f"Wrote {out}")
109
+ return 0
110
+
111
+
112
+ def _cmd_stamp(args):
113
+ if not os.path.isfile(args.input):
114
+ print(f"input file not found: {args.input}", file=sys.stderr)
115
+ return 2
116
+ if args.watermark_image:
117
+ wm = {"image": args.watermark_image}
118
+ elif args.watermark:
119
+ wm = {"text": args.watermark}
120
+ else:
121
+ print("provide --watermark or --watermark-image", file=sys.stderr)
122
+ return 2
123
+ wm.update({"position": args.position, "opacity": args.opacity, "rotation": args.rotation})
124
+ out = stamp_file(args.input, args.output, wm,
125
+ password=args.password, behind=not args.in_front, pages=args.pages)
126
+ print(f"Wrote {out}")
127
+ return 0
128
+
129
+
130
+ if __name__ == "__main__":
131
+ sys.exit(main())
markitup/fonts.py ADDED
@@ -0,0 +1,61 @@
1
+ """Font discovery.
2
+
3
+ Two different realities to be honest about:
4
+
5
+ * **PDF** is rendered *here* (WeasyPrint/Chromium), so a font only works if it is
6
+ installed on this machine. `available_fonts()` queries the system via
7
+ fontconfig and tells you what will actually render.
8
+ * **DOCX** does not embed fonts by default — it stores a font *name* and Word
9
+ substitutes whatever the reader has installed. So for docx, prefer widely
10
+ available families. `SAFE_FONTS` lists cross-platform-safe choices.
11
+
12
+ `list_fonts()` returns both so callers can make an informed choice.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import shutil
17
+ import subprocess
18
+ from typing import Dict, List
19
+
20
+ # Families that ship on virtually all Windows/macOS systems (safe for docx).
21
+ SAFE_FONTS: Dict[str, List[str]] = {
22
+ "serif": ["Times New Roman", "Georgia", "Cambria", "Garamond", "Book Antiqua"],
23
+ "sans": ["Calibri", "Arial", "Helvetica", "Verdana", "Tahoma", "Trebuchet MS", "Segoe UI"],
24
+ "mono": ["Consolas", "Courier New", "Lucida Console"],
25
+ }
26
+
27
+
28
+ def available_fonts() -> List[str]:
29
+ """Font families installed on THIS machine (what PDF rendering can use).
30
+ Returns a sorted, de-duplicated list. Empty if fontconfig is unavailable."""
31
+ if not shutil.which("fc-list"):
32
+ return []
33
+ try:
34
+ out = subprocess.run(
35
+ ["fc-list", ":", "family"], capture_output=True, text=True, timeout=10
36
+ ).stdout
37
+ except Exception:
38
+ return []
39
+ fams = set()
40
+ for line in out.splitlines():
41
+ # a line may list comma-separated localized aliases; take the first
42
+ name = line.split(",")[0].strip()
43
+ if name:
44
+ fams.add(name)
45
+ return sorted(fams, key=str.lower)
46
+
47
+
48
+ def list_fonts() -> Dict[str, object]:
49
+ """Everything a caller needs to choose a font.
50
+
51
+ Returns: {"installed": [...], "safe": {...}}
52
+ - installed: families available for PDF rendering on this machine
53
+ - safe: curated cross-platform families recommended for .docx
54
+ """
55
+ return {"installed": available_fonts(), "safe": SAFE_FONTS}
56
+
57
+
58
+ def is_available(family: str) -> bool:
59
+ """True if `family` is installed locally (relevant for PDF)."""
60
+ family_l = family.lower()
61
+ return any(f.lower() == family_l for f in available_fonts())
markitup/ir.py ADDED
@@ -0,0 +1,122 @@
1
+ """Intermediate Representation (IR) for MarkItUp.
2
+
3
+ The IR is the contract between parsing and rendering. The parser produces a tree
4
+ of these nodes from markdown; every renderer (docx, pdf, ...) consumes the same
5
+ tree. Nodes carry *structure and intent only* — never visual styling. All visual
6
+ decisions live in the Theme.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import List, Optional
12
+
13
+
14
+ # --- base -------------------------------------------------------------------
15
+ class Node:
16
+ """Marker base class for all IR nodes."""
17
+
18
+
19
+ # --- inline nodes -----------------------------------------------------------
20
+ @dataclass
21
+ class Text(Node):
22
+ content: str
23
+
24
+
25
+ @dataclass
26
+ class Strong(Node):
27
+ children: List[Node] = field(default_factory=list)
28
+
29
+
30
+ @dataclass
31
+ class Emphasis(Node):
32
+ children: List[Node] = field(default_factory=list)
33
+
34
+
35
+ @dataclass
36
+ class Strike(Node):
37
+ children: List[Node] = field(default_factory=list)
38
+
39
+
40
+ @dataclass
41
+ class InlineCode(Node):
42
+ content: str
43
+
44
+
45
+ @dataclass
46
+ class Link(Node):
47
+ href: str
48
+ children: List[Node] = field(default_factory=list)
49
+
50
+
51
+ @dataclass
52
+ class Image(Node):
53
+ src: str
54
+ alt: str = ""
55
+
56
+
57
+ @dataclass
58
+ class LineBreak(Node):
59
+ pass
60
+
61
+
62
+ # --- block nodes ------------------------------------------------------------
63
+ @dataclass
64
+ class Heading(Node):
65
+ level: int # 1..6
66
+ children: List[Node] = field(default_factory=list)
67
+
68
+
69
+ @dataclass
70
+ class Paragraph(Node):
71
+ children: List[Node] = field(default_factory=list)
72
+
73
+
74
+ @dataclass
75
+ class CodeBlock(Node):
76
+ content: str
77
+ lang: Optional[str] = None
78
+
79
+
80
+ @dataclass
81
+ class BlockQuote(Node):
82
+ children: List[Node] = field(default_factory=list)
83
+
84
+
85
+ @dataclass
86
+ class ListItem(Node):
87
+ children: List[Node] = field(default_factory=list)
88
+
89
+
90
+ @dataclass
91
+ class ListNode(Node):
92
+ ordered: bool = False
93
+ start: int = 1
94
+ items: List[ListItem] = field(default_factory=list)
95
+
96
+
97
+ @dataclass
98
+ class TableCell(Node):
99
+ children: List[Node] = field(default_factory=list)
100
+ align: str = "left" # left | center | right
101
+
102
+
103
+ @dataclass
104
+ class TableRow(Node):
105
+ cells: List[TableCell] = field(default_factory=list)
106
+
107
+
108
+ @dataclass
109
+ class Table(Node):
110
+ header: Optional[TableRow] = None
111
+ rows: List[TableRow] = field(default_factory=list)
112
+
113
+
114
+ @dataclass
115
+ class ThematicBreak(Node):
116
+ pass
117
+
118
+
119
+ @dataclass
120
+ class Document(Node):
121
+ children: List[Node] = field(default_factory=list)
122
+ title: Optional[str] = None # populated from first H1 if present