markitup-py 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markitup/__init__.py +45 -0
- markitup/api.py +145 -0
- markitup/cli.py +131 -0
- markitup/fonts.py +61 -0
- markitup/ir.py +122 -0
- markitup/parse.py +196 -0
- markitup/py.typed +0 -0
- markitup/render_docx.py +521 -0
- markitup/render_html.py +274 -0
- markitup/render_pdf.py +50 -0
- markitup/stamp.py +139 -0
- markitup/theme.py +204 -0
- markitup/themes/report.yaml +44 -0
- markitup_py-0.3.1.dist-info/METADATA +205 -0
- markitup_py-0.3.1.dist-info/RECORD +19 -0
- markitup_py-0.3.1.dist-info/WHEEL +5 -0
- markitup_py-0.3.1.dist-info/entry_points.txt +2 -0
- markitup_py-0.3.1.dist-info/licenses/LICENSE +21 -0
- markitup_py-0.3.1.dist-info/top_level.txt +1 -0
markitup/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""MarkItUp — markdown -> docx/pdf/html, the reverse of Microsoft's MarkItDown.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
|
|
5
|
+
from markitup import MarkItUp
|
|
6
|
+
MarkItUp(theme="report").convert("doc.md", "doc.pdf")
|
|
7
|
+
|
|
8
|
+
Pipeline: markdown --parse--> IR --render--> .docx / .pdf / .html
|
|
9
|
+
All visual decisions live in a Theme; the renderers are mechanical.
|
|
10
|
+
"""
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
from .api import MarkItUp
|
|
14
|
+
from .theme import Theme, Watermark, Banner, Table, make_watermark
|
|
15
|
+
from .parse import parse
|
|
16
|
+
from .render_docx import render as render_docx
|
|
17
|
+
from .render_html import render_html
|
|
18
|
+
from .render_pdf import render_pdf
|
|
19
|
+
from .stamp import stamp
|
|
20
|
+
from .fonts import list_fonts, available_fonts, is_available, SAFE_FONTS
|
|
21
|
+
|
|
22
|
+
__version__ = "0.3.1"
|
|
23
|
+
__all__ = [
|
|
24
|
+
"MarkItUp", "Theme", "Watermark", "Banner", "Table",
|
|
25
|
+
"parse", "render_docx", "render_html", "render_pdf",
|
|
26
|
+
"stamp", "convert",
|
|
27
|
+
"list_fonts", "available_fonts", "is_available", "SAFE_FONTS",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def convert(markdown_text: str, out_path: str, theme="report",
|
|
32
|
+
base_url: str = ".", pdf_engine: str = "weasyprint") -> str:
|
|
33
|
+
"""One-shot helper: markdown string -> file (format from extension)."""
|
|
34
|
+
doc = parse(markdown_text)
|
|
35
|
+
th = theme if isinstance(theme, Theme) else Theme.load(theme)
|
|
36
|
+
ext = os.path.splitext(out_path)[1].lower()
|
|
37
|
+
if ext == ".docx":
|
|
38
|
+
return render_docx(doc, th, out_path)
|
|
39
|
+
if ext == ".pdf":
|
|
40
|
+
return render_pdf(doc, th, out_path, engine=pdf_engine, base_url=base_url)
|
|
41
|
+
if ext in (".html", ".htm"):
|
|
42
|
+
with open(out_path, "w", encoding="utf-8") as fh:
|
|
43
|
+
fh.write(render_html(doc, th))
|
|
44
|
+
return out_path
|
|
45
|
+
raise ValueError(f"unsupported output extension: {ext!r}")
|
markitup/api.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""The public, configured entry point: the MarkItUp class.
|
|
2
|
+
|
|
3
|
+
Mirrors the ergonomics of Microsoft's MarkItDown — construct once with your
|
|
4
|
+
preferences, then convert many files:
|
|
5
|
+
|
|
6
|
+
from markitup import MarkItUp, Watermark
|
|
7
|
+
m = MarkItUp(theme="report", body_font="Georgia", text_color="#222")
|
|
8
|
+
m.convert("doc.md", "doc.pdf")
|
|
9
|
+
m.convert("doc.md") # -> ./doc.docx (current working directory)
|
|
10
|
+
|
|
11
|
+
Every visual knob is optional and overrides the chosen theme.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from typing import Dict, Optional, Union
|
|
17
|
+
|
|
18
|
+
from .theme import Theme, Banner, Watermark, make_watermark, norm_hex
|
|
19
|
+
from .parse import parse
|
|
20
|
+
from .render_docx import render as _render_docx
|
|
21
|
+
from .render_html import render_html as _render_html
|
|
22
|
+
from .render_pdf import render_pdf as _render_pdf
|
|
23
|
+
from . import stamp as _stamp_mod
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MarkItUp:
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
theme: Union[str, Theme] = "report",
|
|
30
|
+
*,
|
|
31
|
+
# fonts
|
|
32
|
+
body_font: Optional[str] = None,
|
|
33
|
+
heading_font: Optional[str] = None,
|
|
34
|
+
mono_font: Optional[str] = None,
|
|
35
|
+
# colors (accept '#RRGGBB' or 'RRGGBB')
|
|
36
|
+
text_color: Optional[str] = None,
|
|
37
|
+
heading_color: Optional[str] = None,
|
|
38
|
+
heading_colors: Optional[Dict[int, str]] = None,
|
|
39
|
+
accent_color: Optional[str] = None,
|
|
40
|
+
link_color: Optional[str] = None,
|
|
41
|
+
# type & page
|
|
42
|
+
base_size: Optional[float] = None,
|
|
43
|
+
line_height: Optional[float] = None,
|
|
44
|
+
scale: Optional[float] = None,
|
|
45
|
+
page_size: Optional[str] = None,
|
|
46
|
+
margin_cm: Optional[float] = None,
|
|
47
|
+
# structure & marks
|
|
48
|
+
base_docx: Optional[str] = None,
|
|
49
|
+
watermark: Union[Watermark, str, dict, None] = None,
|
|
50
|
+
banner: Union[Banner, str, dict, None] = None,
|
|
51
|
+
# pdf
|
|
52
|
+
pdf_engine: str = "weasyprint",
|
|
53
|
+
):
|
|
54
|
+
th = theme if isinstance(theme, Theme) else Theme.load(theme)
|
|
55
|
+
|
|
56
|
+
if body_font:
|
|
57
|
+
th.fonts.body = body_font
|
|
58
|
+
if heading_font:
|
|
59
|
+
th.fonts.heading = heading_font
|
|
60
|
+
if mono_font:
|
|
61
|
+
th.fonts.mono = mono_font
|
|
62
|
+
|
|
63
|
+
if text_color:
|
|
64
|
+
th.colors.text = norm_hex(text_color)
|
|
65
|
+
if heading_color:
|
|
66
|
+
th.colors.heading = norm_hex(heading_color)
|
|
67
|
+
if accent_color:
|
|
68
|
+
th.colors.accent = norm_hex(accent_color)
|
|
69
|
+
if link_color:
|
|
70
|
+
th.colors.link = norm_hex(link_color)
|
|
71
|
+
if heading_colors:
|
|
72
|
+
th.colors.headings.update({int(k): norm_hex(v) for k, v in heading_colors.items()})
|
|
73
|
+
|
|
74
|
+
if base_size is not None:
|
|
75
|
+
th.type.base_size = base_size
|
|
76
|
+
if line_height is not None:
|
|
77
|
+
th.type.line_height = line_height
|
|
78
|
+
if scale is not None:
|
|
79
|
+
th.type.ratio = scale
|
|
80
|
+
if page_size:
|
|
81
|
+
th.page.size = page_size
|
|
82
|
+
if margin_cm is not None:
|
|
83
|
+
th.page.margin_cm = margin_cm
|
|
84
|
+
|
|
85
|
+
if base_docx:
|
|
86
|
+
th.base_docx = base_docx
|
|
87
|
+
if watermark is not None:
|
|
88
|
+
th.watermark = make_watermark(watermark)
|
|
89
|
+
if banner is not None:
|
|
90
|
+
th.banner = _coerce_banner(banner)
|
|
91
|
+
|
|
92
|
+
self.theme = th
|
|
93
|
+
self.pdf_engine = pdf_engine
|
|
94
|
+
|
|
95
|
+
# ---- conversion -------------------------------------------------------
|
|
96
|
+
def convert(self, input_path: str, output_path: Optional[str] = None,
|
|
97
|
+
*, to: str = "docx") -> str:
|
|
98
|
+
"""Convert a markdown file. If `output_path` is omitted, the output is
|
|
99
|
+
written to the current working directory as <input-stem>.<to>."""
|
|
100
|
+
with open(input_path, "r", encoding="utf-8") as fh:
|
|
101
|
+
md = fh.read()
|
|
102
|
+
if output_path is None:
|
|
103
|
+
stem = os.path.splitext(os.path.basename(input_path))[0]
|
|
104
|
+
output_path = os.path.join(os.getcwd(), f"{stem}.{to.lstrip('.')}")
|
|
105
|
+
base_url = os.path.dirname(os.path.abspath(input_path)) or "."
|
|
106
|
+
return self.convert_text(md, output_path, base_url=base_url)
|
|
107
|
+
|
|
108
|
+
def convert_text(self, markdown_text: str, output_path: str,
|
|
109
|
+
*, base_url: str = ".") -> str:
|
|
110
|
+
"""Convert a markdown string to the file at output_path (format from ext)."""
|
|
111
|
+
doc = parse(markdown_text)
|
|
112
|
+
ext = os.path.splitext(output_path)[1].lower()
|
|
113
|
+
if ext == ".docx":
|
|
114
|
+
return _render_docx(doc, self.theme, output_path)
|
|
115
|
+
if ext == ".pdf":
|
|
116
|
+
return _render_pdf(doc, self.theme, output_path,
|
|
117
|
+
engine=self.pdf_engine, base_url=base_url)
|
|
118
|
+
if ext in (".html", ".htm"):
|
|
119
|
+
with open(output_path, "w", encoding="utf-8") as fh:
|
|
120
|
+
fh.write(_render_html(doc, self.theme))
|
|
121
|
+
return output_path
|
|
122
|
+
raise ValueError(f"unsupported output extension: {ext!r}")
|
|
123
|
+
|
|
124
|
+
# ---- existing-file watermarking --------------------------------------
|
|
125
|
+
@staticmethod
|
|
126
|
+
def stamp(input_path: str, output_path: str, watermark, **kwargs) -> str:
|
|
127
|
+
"""Watermark an existing .pdf/.docx. See markitup.stamp for options."""
|
|
128
|
+
return _stamp_mod.stamp(input_path, output_path, watermark, **kwargs)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _coerce_banner(value) -> Optional[Banner]:
|
|
132
|
+
if value is None:
|
|
133
|
+
return None
|
|
134
|
+
if isinstance(value, Banner):
|
|
135
|
+
b = value
|
|
136
|
+
elif isinstance(value, str):
|
|
137
|
+
b = Banner(text=value)
|
|
138
|
+
elif isinstance(value, dict):
|
|
139
|
+
b = Banner(**value)
|
|
140
|
+
else:
|
|
141
|
+
raise TypeError(f"banner must be Banner | str | dict | None, got {type(value)!r}")
|
|
142
|
+
b.color = norm_hex(b.color)
|
|
143
|
+
if b.bg:
|
|
144
|
+
b.bg = norm_hex(b.bg)
|
|
145
|
+
return b
|
markitup/cli.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Command-line interface: `markitup <command>`.
|
|
2
|
+
|
|
3
|
+
markitup convert in.md -o out.pdf --theme report --font Georgia
|
|
4
|
+
markitup convert in.md --banner "CONFIDENTIAL" --watermark DRAFT
|
|
5
|
+
markitup fonts
|
|
6
|
+
markitup stamp report.pdf -o stamped.pdf --watermark CONFIDENTIAL --position top
|
|
7
|
+
markitup stamp report.docx -o stamped.docx --watermark-image logo.png
|
|
8
|
+
|
|
9
|
+
If the first argument is a file path (not a command), `convert` is assumed.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
from .api import MarkItUp
|
|
18
|
+
from .stamp import stamp as stamp_file
|
|
19
|
+
from .fonts import list_fonts
|
|
20
|
+
|
|
21
|
+
_COMMANDS = {"convert", "fonts", "stamp"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _add_style_args(p):
|
|
25
|
+
p.add_argument("--theme", default="report", help="theme name or path to a theme .yaml")
|
|
26
|
+
p.add_argument("--font", help="body font for the whole document")
|
|
27
|
+
p.add_argument("--heading-font", help="font for headings")
|
|
28
|
+
p.add_argument("--mono-font", help="font for code")
|
|
29
|
+
p.add_argument("--text-color", help="body text color (#RRGGBB)")
|
|
30
|
+
p.add_argument("--heading-color", help="heading color (#RRGGBB)")
|
|
31
|
+
p.add_argument("--accent-color", help="accent color (#RRGGBB)")
|
|
32
|
+
p.add_argument("--base-docx", help="reference .docx whose styles define the design")
|
|
33
|
+
p.add_argument("--banner", help="top-of-document notice, e.g. CONFIDENTIAL")
|
|
34
|
+
p.add_argument("--watermark", help="text watermark")
|
|
35
|
+
p.add_argument("--watermark-image", help="image watermark (path)")
|
|
36
|
+
p.add_argument("--position", default="center", choices=["center", "top", "bottom"])
|
|
37
|
+
p.add_argument("--pdf-engine", default="weasyprint", choices=["weasyprint", "chromium"])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def main(argv=None):
|
|
41
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
42
|
+
if argv and argv[0] not in _COMMANDS and not argv[0].startswith("-"):
|
|
43
|
+
argv = ["convert"] + argv # default command
|
|
44
|
+
|
|
45
|
+
ap = argparse.ArgumentParser(prog="markitup", description="Markdown -> docx/pdf/html")
|
|
46
|
+
sub = ap.add_subparsers(dest="cmd", required=True)
|
|
47
|
+
|
|
48
|
+
c = sub.add_parser("convert", help="convert a markdown file")
|
|
49
|
+
c.add_argument("input", help="path to a .md file")
|
|
50
|
+
c.add_argument("-o", "--output", help="output path (.docx/.pdf/.html); default: ./<name>.docx")
|
|
51
|
+
c.add_argument("--to", default="docx", help="format when no -o given (docx/pdf/html)")
|
|
52
|
+
_add_style_args(c)
|
|
53
|
+
|
|
54
|
+
sub.add_parser("fonts", help="list fonts available for rendering")
|
|
55
|
+
|
|
56
|
+
s = sub.add_parser("stamp", help="watermark an existing .pdf/.docx")
|
|
57
|
+
s.add_argument("input", help="existing .pdf or .docx")
|
|
58
|
+
s.add_argument("-o", "--output", required=True, help="output path")
|
|
59
|
+
s.add_argument("--watermark", help="text watermark")
|
|
60
|
+
s.add_argument("--watermark-image", help="image watermark (path)")
|
|
61
|
+
s.add_argument("--position", default="center", choices=["center", "top", "bottom"])
|
|
62
|
+
s.add_argument("--opacity", type=float, default=0.10)
|
|
63
|
+
s.add_argument("--rotation", type=int, default=-45)
|
|
64
|
+
s.add_argument("--pages", default="all", help="e.g. all | 1,3 | 2-5 (PDF only)")
|
|
65
|
+
s.add_argument("--password", help="password for an encrypted PDF")
|
|
66
|
+
s.add_argument("--in-front", action="store_true", help="draw over content instead of behind")
|
|
67
|
+
|
|
68
|
+
args = ap.parse_args(argv)
|
|
69
|
+
|
|
70
|
+
if args.cmd == "fonts":
|
|
71
|
+
return _cmd_fonts()
|
|
72
|
+
if args.cmd == "stamp":
|
|
73
|
+
return _cmd_stamp(args)
|
|
74
|
+
return _cmd_convert(args)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _cmd_fonts():
|
|
78
|
+
info = list_fonts()
|
|
79
|
+
installed = info["installed"]
|
|
80
|
+
print(f"Installed fonts available for PDF rendering ({len(installed)}):")
|
|
81
|
+
for f in installed:
|
|
82
|
+
print(f" {f}")
|
|
83
|
+
if not installed:
|
|
84
|
+
print(" (fontconfig not available on this machine)")
|
|
85
|
+
print("\nCross-platform-safe families recommended for .docx:")
|
|
86
|
+
for group, fams in info["safe"].items():
|
|
87
|
+
print(f" {group:5}: {', '.join(fams)}")
|
|
88
|
+
return 0
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _cmd_convert(args):
|
|
92
|
+
if not os.path.isfile(args.input):
|
|
93
|
+
print(f"input file not found: {args.input}", file=sys.stderr)
|
|
94
|
+
return 2
|
|
95
|
+
wm = None
|
|
96
|
+
if args.watermark_image:
|
|
97
|
+
wm = {"image": args.watermark_image, "position": args.position}
|
|
98
|
+
elif args.watermark:
|
|
99
|
+
wm = {"text": args.watermark, "position": args.position}
|
|
100
|
+
m = MarkItUp(
|
|
101
|
+
theme=args.theme,
|
|
102
|
+
body_font=args.font, heading_font=args.heading_font, mono_font=args.mono_font,
|
|
103
|
+
text_color=args.text_color, heading_color=args.heading_color, accent_color=args.accent_color,
|
|
104
|
+
base_docx=args.base_docx, watermark=wm, banner=args.banner,
|
|
105
|
+
pdf_engine=args.pdf_engine,
|
|
106
|
+
)
|
|
107
|
+
out = m.convert(args.input, args.output, to=args.to)
|
|
108
|
+
print(f"Wrote {out}")
|
|
109
|
+
return 0
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _cmd_stamp(args):
|
|
113
|
+
if not os.path.isfile(args.input):
|
|
114
|
+
print(f"input file not found: {args.input}", file=sys.stderr)
|
|
115
|
+
return 2
|
|
116
|
+
if args.watermark_image:
|
|
117
|
+
wm = {"image": args.watermark_image}
|
|
118
|
+
elif args.watermark:
|
|
119
|
+
wm = {"text": args.watermark}
|
|
120
|
+
else:
|
|
121
|
+
print("provide --watermark or --watermark-image", file=sys.stderr)
|
|
122
|
+
return 2
|
|
123
|
+
wm.update({"position": args.position, "opacity": args.opacity, "rotation": args.rotation})
|
|
124
|
+
out = stamp_file(args.input, args.output, wm,
|
|
125
|
+
password=args.password, behind=not args.in_front, pages=args.pages)
|
|
126
|
+
print(f"Wrote {out}")
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
sys.exit(main())
|
markitup/fonts.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Font discovery.
|
|
2
|
+
|
|
3
|
+
Two different realities to be honest about:
|
|
4
|
+
|
|
5
|
+
* **PDF** is rendered *here* (WeasyPrint/Chromium), so a font only works if it is
|
|
6
|
+
installed on this machine. `available_fonts()` queries the system via
|
|
7
|
+
fontconfig and tells you what will actually render.
|
|
8
|
+
* **DOCX** does not embed fonts by default — it stores a font *name* and Word
|
|
9
|
+
substitutes whatever the reader has installed. So for docx, prefer widely
|
|
10
|
+
available families. `SAFE_FONTS` lists cross-platform-safe choices.
|
|
11
|
+
|
|
12
|
+
`list_fonts()` returns both so callers can make an informed choice.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
from typing import Dict, List
|
|
19
|
+
|
|
20
|
+
# Families that ship on virtually all Windows/macOS systems (safe for docx).
|
|
21
|
+
SAFE_FONTS: Dict[str, List[str]] = {
|
|
22
|
+
"serif": ["Times New Roman", "Georgia", "Cambria", "Garamond", "Book Antiqua"],
|
|
23
|
+
"sans": ["Calibri", "Arial", "Helvetica", "Verdana", "Tahoma", "Trebuchet MS", "Segoe UI"],
|
|
24
|
+
"mono": ["Consolas", "Courier New", "Lucida Console"],
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def available_fonts() -> List[str]:
|
|
29
|
+
"""Font families installed on THIS machine (what PDF rendering can use).
|
|
30
|
+
Returns a sorted, de-duplicated list. Empty if fontconfig is unavailable."""
|
|
31
|
+
if not shutil.which("fc-list"):
|
|
32
|
+
return []
|
|
33
|
+
try:
|
|
34
|
+
out = subprocess.run(
|
|
35
|
+
["fc-list", ":", "family"], capture_output=True, text=True, timeout=10
|
|
36
|
+
).stdout
|
|
37
|
+
except Exception:
|
|
38
|
+
return []
|
|
39
|
+
fams = set()
|
|
40
|
+
for line in out.splitlines():
|
|
41
|
+
# a line may list comma-separated localized aliases; take the first
|
|
42
|
+
name = line.split(",")[0].strip()
|
|
43
|
+
if name:
|
|
44
|
+
fams.add(name)
|
|
45
|
+
return sorted(fams, key=str.lower)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def list_fonts() -> Dict[str, object]:
|
|
49
|
+
"""Everything a caller needs to choose a font.
|
|
50
|
+
|
|
51
|
+
Returns: {"installed": [...], "safe": {...}}
|
|
52
|
+
- installed: families available for PDF rendering on this machine
|
|
53
|
+
- safe: curated cross-platform families recommended for .docx
|
|
54
|
+
"""
|
|
55
|
+
return {"installed": available_fonts(), "safe": SAFE_FONTS}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_available(family: str) -> bool:
|
|
59
|
+
"""True if `family` is installed locally (relevant for PDF)."""
|
|
60
|
+
family_l = family.lower()
|
|
61
|
+
return any(f.lower() == family_l for f in available_fonts())
|
markitup/ir.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Intermediate Representation (IR) for MarkItUp.
|
|
2
|
+
|
|
3
|
+
The IR is the contract between parsing and rendering. The parser produces a tree
|
|
4
|
+
of these nodes from markdown; every renderer (docx, pdf, ...) consumes the same
|
|
5
|
+
tree. Nodes carry *structure and intent only* — never visual styling. All visual
|
|
6
|
+
decisions live in the Theme.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import List, Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# --- base -------------------------------------------------------------------
|
|
15
|
+
class Node:
|
|
16
|
+
"""Marker base class for all IR nodes."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# --- inline nodes -----------------------------------------------------------
|
|
20
|
+
@dataclass
|
|
21
|
+
class Text(Node):
|
|
22
|
+
content: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class Strong(Node):
|
|
27
|
+
children: List[Node] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Emphasis(Node):
|
|
32
|
+
children: List[Node] = field(default_factory=list)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class Strike(Node):
|
|
37
|
+
children: List[Node] = field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class InlineCode(Node):
|
|
42
|
+
content: str
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class Link(Node):
|
|
47
|
+
href: str
|
|
48
|
+
children: List[Node] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Image(Node):
|
|
53
|
+
src: str
|
|
54
|
+
alt: str = ""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class LineBreak(Node):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# --- block nodes ------------------------------------------------------------
|
|
63
|
+
@dataclass
|
|
64
|
+
class Heading(Node):
|
|
65
|
+
level: int # 1..6
|
|
66
|
+
children: List[Node] = field(default_factory=list)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class Paragraph(Node):
|
|
71
|
+
children: List[Node] = field(default_factory=list)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class CodeBlock(Node):
|
|
76
|
+
content: str
|
|
77
|
+
lang: Optional[str] = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class BlockQuote(Node):
|
|
82
|
+
children: List[Node] = field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class ListItem(Node):
|
|
87
|
+
children: List[Node] = field(default_factory=list)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class ListNode(Node):
|
|
92
|
+
ordered: bool = False
|
|
93
|
+
start: int = 1
|
|
94
|
+
items: List[ListItem] = field(default_factory=list)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class TableCell(Node):
|
|
99
|
+
children: List[Node] = field(default_factory=list)
|
|
100
|
+
align: str = "left" # left | center | right
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class TableRow(Node):
|
|
105
|
+
cells: List[TableCell] = field(default_factory=list)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class Table(Node):
|
|
110
|
+
header: Optional[TableRow] = None
|
|
111
|
+
rows: List[TableRow] = field(default_factory=list)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class ThematicBreak(Node):
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class Document(Node):
|
|
121
|
+
children: List[Node] = field(default_factory=list)
|
|
122
|
+
title: Optional[str] = None # populated from first H1 if present
|