markitup-py 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markitup_py-0.3.1/LICENSE +21 -0
- markitup_py-0.3.1/PKG-INFO +205 -0
- markitup_py-0.3.1/README.md +164 -0
- markitup_py-0.3.1/markitup/__init__.py +45 -0
- markitup_py-0.3.1/markitup/api.py +145 -0
- markitup_py-0.3.1/markitup/cli.py +131 -0
- markitup_py-0.3.1/markitup/fonts.py +61 -0
- markitup_py-0.3.1/markitup/ir.py +122 -0
- markitup_py-0.3.1/markitup/parse.py +196 -0
- markitup_py-0.3.1/markitup/py.typed +0 -0
- markitup_py-0.3.1/markitup/render_docx.py +521 -0
- markitup_py-0.3.1/markitup/render_html.py +274 -0
- markitup_py-0.3.1/markitup/render_pdf.py +50 -0
- markitup_py-0.3.1/markitup/stamp.py +139 -0
- markitup_py-0.3.1/markitup/theme.py +204 -0
- markitup_py-0.3.1/markitup/themes/report.yaml +44 -0
- markitup_py-0.3.1/markitup_py.egg-info/PKG-INFO +205 -0
- markitup_py-0.3.1/markitup_py.egg-info/SOURCES.txt +22 -0
- markitup_py-0.3.1/markitup_py.egg-info/dependency_links.txt +1 -0
- markitup_py-0.3.1/markitup_py.egg-info/entry_points.txt +2 -0
- markitup_py-0.3.1/markitup_py.egg-info/requires.txt +19 -0
- markitup_py-0.3.1/markitup_py.egg-info/top_level.txt +1 -0
- markitup_py-0.3.1/pyproject.toml +52 -0
- markitup_py-0.3.1/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Timmy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: markitup-py
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: The reverse of MarkItDown: turn Markdown into clean, themeable DOCX and PDF documents.
|
|
5
|
+
Author: Timmy
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourname/markitup
|
|
8
|
+
Project-URL: Source, https://github.com/yourname/markitup
|
|
9
|
+
Project-URL: Issues, https://github.com/yourname/markitup/issues
|
|
10
|
+
Keywords: markdown,docx,pdf,document-generation,markitdown,report
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
20
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: markdown-it-py>=3.0
|
|
25
|
+
Requires-Dist: python-docx>=1.1
|
|
26
|
+
Requires-Dist: PyYAML>=6.0
|
|
27
|
+
Provides-Extra: pdf
|
|
28
|
+
Requires-Dist: weasyprint>=60; extra == "pdf"
|
|
29
|
+
Requires-Dist: pypdf>=4.0; extra == "pdf"
|
|
30
|
+
Requires-Dist: reportlab>=4.0; extra == "pdf"
|
|
31
|
+
Requires-Dist: Pillow>=10; extra == "pdf"
|
|
32
|
+
Provides-Extra: chromium
|
|
33
|
+
Requires-Dist: playwright>=1.40; extra == "chromium"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: weasyprint>=60; extra == "all"
|
|
36
|
+
Requires-Dist: pypdf>=4.0; extra == "all"
|
|
37
|
+
Requires-Dist: reportlab>=4.0; extra == "all"
|
|
38
|
+
Requires-Dist: Pillow>=10; extra == "all"
|
|
39
|
+
Requires-Dist: playwright>=1.40; extra == "all"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
# MarkItUp
|
|
43
|
+
|
|
44
|
+
The reverse of Microsoft's **MarkItDown**: feed it Markdown, get back clean,
|
|
45
|
+
well-designed `.docx` and `.pdf` (and `.html`).
|
|
46
|
+
|
|
47
|
+
Unlike an LLM that "codes and executes" a document on every request, MarkItUp is
|
|
48
|
+
a **deterministic pipeline**. The same Markdown + same theme always produces the
|
|
49
|
+
same document. All design decisions are front-loaded into a theme, once — they
|
|
50
|
+
are never re-derived per document.
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
markdown ──parse──▶ IR ──render──┬──▶ .docx (python-docx → OOXML)
|
|
54
|
+
├──▶ .html (theme → CSS)
|
|
55
|
+
└──▶ .pdf (IR → themed HTML → print engine)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
PDF uses a **pluggable engine**: `weasyprint` (pure-Python, default) or
|
|
59
|
+
`chromium` (headless, highest fidelity). Both consume the same HTML.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install markitup-py # docx only (lightweight)
|
|
65
|
+
pip install "markitup-py[pdf]" # + PDF and existing-file watermarking
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The import name is `markitup`. PDF needs system libs for WeasyPrint
|
|
69
|
+
(pango, cairo, gdk-pixbuf).
|
|
70
|
+
|
|
71
|
+
## Quick start (Python)
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from markitup import MarkItUp
|
|
75
|
+
|
|
76
|
+
m = MarkItUp(theme="report")
|
|
77
|
+
m.convert("doc.md", "doc.pdf")
|
|
78
|
+
m.convert("doc.md") # -> ./doc.docx (current directory)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Configure once, convert many. Every knob overrides the theme:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from markitup import MarkItUp, Watermark
|
|
85
|
+
|
|
86
|
+
m = MarkItUp(
|
|
87
|
+
theme="report",
|
|
88
|
+
body_font="Georgia",
|
|
89
|
+
heading_font="Calibri",
|
|
90
|
+
text_color="#222222",
|
|
91
|
+
heading_color="#0B3D2E",
|
|
92
|
+
heading_colors={1: "#0B3D2E", 2: "#1F6FEB"}, # per-level overrides
|
|
93
|
+
accent_color="#1F6FEB",
|
|
94
|
+
base_size=11, scale=1.2, line_height=1.45,
|
|
95
|
+
page_size="A4", margin_cm=2.54,
|
|
96
|
+
banner="CONFIDENTIAL — INTERNAL USE ONLY",
|
|
97
|
+
watermark=Watermark(enabled=True, text="DRAFT", opacity=0.08, position="center"),
|
|
98
|
+
)
|
|
99
|
+
m.convert("doc.md", "doc.pdf")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Fonts
|
|
103
|
+
|
|
104
|
+
DOCX stores font *names* and the reader substitutes what they have installed, so
|
|
105
|
+
prefer cross-platform families. PDF is rendered here, so a font must be installed
|
|
106
|
+
on this machine to appear.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from markitup import list_fonts, is_available
|
|
110
|
+
info = list_fonts()
|
|
111
|
+
info["installed"] # families available for PDF rendering on this machine
|
|
112
|
+
info["safe"] # curated cross-platform families for .docx
|
|
113
|
+
is_available("Georgia")
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
markitup fonts
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Watermarks
|
|
121
|
+
|
|
122
|
+
A watermark is a theme token, applied identically to docx and PDF — text or image,
|
|
123
|
+
with `opacity`, `rotation`, `position` (`center`/`top`/`bottom`).
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
m = MarkItUp(watermark={"text": "DRAFT", "opacity": 0.1, "rotation": -45})
|
|
127
|
+
m = MarkItUp(watermark={"image": "logo.png", "opacity": 0.12})
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Stamp an EXISTING file
|
|
131
|
+
|
|
132
|
+
Add a watermark to a `.pdf` or `.docx` you already have — no re-rendering. It's an
|
|
133
|
+
overlay/append, so the document's content is left intact.
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from markitup import stamp
|
|
137
|
+
|
|
138
|
+
stamp("report.pdf", "stamped.pdf", "CONFIDENTIAL",
|
|
139
|
+
position="top", opacity=0.12, pages="1-3", behind=True)
|
|
140
|
+
stamp("report.docx", "stamped.docx", {"image": "logo.png", "opacity": 0.1})
|
|
141
|
+
# encrypted PDF: pass password=...
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
markitup stamp report.pdf -o stamped.pdf --watermark CONFIDENTIAL --position top
|
|
146
|
+
markitup stamp report.docx -o stamped.docx --watermark-image logo.png
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Encrypted PDFs are refused unless you supply a password. Scanned/image PDFs are
|
|
150
|
+
fine — the overlay lands on top.
|
|
151
|
+
|
|
152
|
+
## Banners
|
|
153
|
+
|
|
154
|
+
A short notice rendered in-flow at the very top of a generated document:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from markitup import Banner
|
|
158
|
+
m = MarkItUp(banner=Banner(text="RESTRICTED", color="#FFFFFF", bg="#B00020"))
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Templates (`base_docx`)
|
|
162
|
+
|
|
163
|
+
Hand MarkItUp a Word file you designed — brand fonts, colors, a header/footer with
|
|
164
|
+
your logo. It opens the file, clears the body, and maps your Markdown onto *its*
|
|
165
|
+
named styles. The template owns the design; Markdown just fills it. (This is the
|
|
166
|
+
Pandoc `reference.docx` model.)
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
m = MarkItUp(base_docx="brand-template.docx")
|
|
170
|
+
m.convert("doc.md", "out.docx")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## CLI
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
markitup convert in.md -o out.pdf --theme report --font Georgia
|
|
177
|
+
markitup convert in.md --banner "CONFIDENTIAL" --watermark DRAFT
|
|
178
|
+
markitup fonts
|
|
179
|
+
markitup stamp in.pdf -o out.pdf --watermark "DO NOT COPY" --position bottom --pages 1-2
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Supported Markdown
|
|
183
|
+
|
|
184
|
+
Headings, paragraphs, **bold**/*italic*/~~strike~~/`code`, links, ordered &
|
|
185
|
+
unordered lists (nested), blockquotes, fenced code blocks, GFM tables (with
|
|
186
|
+
column alignment and clean wrapping), and thematic breaks.
|
|
187
|
+
|
|
188
|
+
## Architecture
|
|
189
|
+
|
|
190
|
+
| Module | Responsibility |
|
|
191
|
+
|---------------------------|-------------------------------------------------------|
|
|
192
|
+
| `markitup/ir.py` | Intermediate Representation — structure & intent only |
|
|
193
|
+
| `markitup/theme.py` | Design tokens; computed type scale; watermark/banner |
|
|
194
|
+
| `markitup/parse.py` | markdown-it-py token stream → IR |
|
|
195
|
+
| `markitup/render_docx.py` | IR + Theme → `.docx` |
|
|
196
|
+
| `markitup/render_html.py` | IR + Theme → HTML/CSS (PDF intermediate) |
|
|
197
|
+
| `markitup/render_pdf.py` | HTML → PDF (pluggable engine) |
|
|
198
|
+
| `markitup/stamp.py` | Watermark existing `.pdf`/`.docx` |
|
|
199
|
+
| `markitup/fonts.py` | Font discovery |
|
|
200
|
+
| `markitup/api.py` | The `MarkItUp` class |
|
|
201
|
+
| `markitup/themes/*.yaml` | Named themes |
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
MIT.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# MarkItUp
|
|
2
|
+
|
|
3
|
+
The reverse of Microsoft's **MarkItDown**: feed it Markdown, get back clean,
|
|
4
|
+
well-designed `.docx` and `.pdf` (and `.html`).
|
|
5
|
+
|
|
6
|
+
Unlike an LLM that "codes and executes" a document on every request, MarkItUp is
|
|
7
|
+
a **deterministic pipeline**. The same Markdown + same theme always produces the
|
|
8
|
+
same document. All design decisions are front-loaded into a theme, once — they
|
|
9
|
+
are never re-derived per document.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
markdown ──parse──▶ IR ──render──┬──▶ .docx (python-docx → OOXML)
|
|
13
|
+
├──▶ .html (theme → CSS)
|
|
14
|
+
└──▶ .pdf (IR → themed HTML → print engine)
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
PDF uses a **pluggable engine**: `weasyprint` (pure-Python, default) or
|
|
18
|
+
`chromium` (headless, highest fidelity). Both consume the same HTML.
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install markitup-py # docx only (lightweight)
|
|
24
|
+
pip install "markitup-py[pdf]" # + PDF and existing-file watermarking
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The import name is `markitup`. PDF needs system libs for WeasyPrint
|
|
28
|
+
(pango, cairo, gdk-pixbuf).
|
|
29
|
+
|
|
30
|
+
## Quick start (Python)
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from markitup import MarkItUp
|
|
34
|
+
|
|
35
|
+
m = MarkItUp(theme="report")
|
|
36
|
+
m.convert("doc.md", "doc.pdf")
|
|
37
|
+
m.convert("doc.md") # -> ./doc.docx (current directory)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Configure once, convert many. Every knob overrides the theme:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from markitup import MarkItUp, Watermark
|
|
44
|
+
|
|
45
|
+
m = MarkItUp(
|
|
46
|
+
theme="report",
|
|
47
|
+
body_font="Georgia",
|
|
48
|
+
heading_font="Calibri",
|
|
49
|
+
text_color="#222222",
|
|
50
|
+
heading_color="#0B3D2E",
|
|
51
|
+
heading_colors={1: "#0B3D2E", 2: "#1F6FEB"}, # per-level overrides
|
|
52
|
+
accent_color="#1F6FEB",
|
|
53
|
+
base_size=11, scale=1.2, line_height=1.45,
|
|
54
|
+
page_size="A4", margin_cm=2.54,
|
|
55
|
+
banner="CONFIDENTIAL — INTERNAL USE ONLY",
|
|
56
|
+
watermark=Watermark(enabled=True, text="DRAFT", opacity=0.08, position="center"),
|
|
57
|
+
)
|
|
58
|
+
m.convert("doc.md", "doc.pdf")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Fonts
|
|
62
|
+
|
|
63
|
+
DOCX stores font *names* and the reader substitutes what they have installed, so
|
|
64
|
+
prefer cross-platform families. PDF is rendered here, so a font must be installed
|
|
65
|
+
on this machine to appear.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from markitup import list_fonts, is_available
|
|
69
|
+
info = list_fonts()
|
|
70
|
+
info["installed"] # families available for PDF rendering on this machine
|
|
71
|
+
info["safe"] # curated cross-platform families for .docx
|
|
72
|
+
is_available("Georgia")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
markitup fonts
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Watermarks
|
|
80
|
+
|
|
81
|
+
A watermark is a theme token, applied identically to docx and PDF — text or image,
|
|
82
|
+
with `opacity`, `rotation`, `position` (`center`/`top`/`bottom`).
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
m = MarkItUp(watermark={"text": "DRAFT", "opacity": 0.1, "rotation": -45})
|
|
86
|
+
m = MarkItUp(watermark={"image": "logo.png", "opacity": 0.12})
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Stamp an EXISTING file
|
|
90
|
+
|
|
91
|
+
Add a watermark to a `.pdf` or `.docx` you already have — no re-rendering. It's an
|
|
92
|
+
overlay/append, so the document's content is left intact.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from markitup import stamp
|
|
96
|
+
|
|
97
|
+
stamp("report.pdf", "stamped.pdf", "CONFIDENTIAL",
|
|
98
|
+
position="top", opacity=0.12, pages="1-3", behind=True)
|
|
99
|
+
stamp("report.docx", "stamped.docx", {"image": "logo.png", "opacity": 0.1})
|
|
100
|
+
# encrypted PDF: pass password=...
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
markitup stamp report.pdf -o stamped.pdf --watermark CONFIDENTIAL --position top
|
|
105
|
+
markitup stamp report.docx -o stamped.docx --watermark-image logo.png
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Encrypted PDFs are refused unless you supply a password. Scanned/image PDFs are
|
|
109
|
+
fine — the overlay lands on top.
|
|
110
|
+
|
|
111
|
+
## Banners
|
|
112
|
+
|
|
113
|
+
A short notice rendered in-flow at the very top of a generated document:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from markitup import Banner
|
|
117
|
+
m = MarkItUp(banner=Banner(text="RESTRICTED", color="#FFFFFF", bg="#B00020"))
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Templates (`base_docx`)
|
|
121
|
+
|
|
122
|
+
Hand MarkItUp a Word file you designed — brand fonts, colors, a header/footer with
|
|
123
|
+
your logo. It opens the file, clears the body, and maps your Markdown onto *its*
|
|
124
|
+
named styles. The template owns the design; Markdown just fills it. (This is the
|
|
125
|
+
Pandoc `reference.docx` model.)
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
m = MarkItUp(base_docx="brand-template.docx")
|
|
129
|
+
m.convert("doc.md", "out.docx")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## CLI
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
markitup convert in.md -o out.pdf --theme report --font Georgia
|
|
136
|
+
markitup convert in.md --banner "CONFIDENTIAL" --watermark DRAFT
|
|
137
|
+
markitup fonts
|
|
138
|
+
markitup stamp in.pdf -o out.pdf --watermark "DO NOT COPY" --position bottom --pages 1-2
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Supported Markdown
|
|
142
|
+
|
|
143
|
+
Headings, paragraphs, **bold**/*italic*/~~strike~~/`code`, links, ordered &
|
|
144
|
+
unordered lists (nested), blockquotes, fenced code blocks, GFM tables (with
|
|
145
|
+
column alignment and clean wrapping), and thematic breaks.
|
|
146
|
+
|
|
147
|
+
## Architecture
|
|
148
|
+
|
|
149
|
+
| Module | Responsibility |
|
|
150
|
+
|---------------------------|-------------------------------------------------------|
|
|
151
|
+
| `markitup/ir.py` | Intermediate Representation — structure & intent only |
|
|
152
|
+
| `markitup/theme.py` | Design tokens; computed type scale; watermark/banner |
|
|
153
|
+
| `markitup/parse.py` | markdown-it-py token stream → IR |
|
|
154
|
+
| `markitup/render_docx.py` | IR + Theme → `.docx` |
|
|
155
|
+
| `markitup/render_html.py` | IR + Theme → HTML/CSS (PDF intermediate) |
|
|
156
|
+
| `markitup/render_pdf.py` | HTML → PDF (pluggable engine) |
|
|
157
|
+
| `markitup/stamp.py` | Watermark existing `.pdf`/`.docx` |
|
|
158
|
+
| `markitup/fonts.py` | Font discovery |
|
|
159
|
+
| `markitup/api.py` | The `MarkItUp` class |
|
|
160
|
+
| `markitup/themes/*.yaml` | Named themes |
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""MarkItUp — markdown -> docx/pdf/html, the reverse of Microsoft's MarkItDown.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
|
|
5
|
+
from markitup import MarkItUp
|
|
6
|
+
MarkItUp(theme="report").convert("doc.md", "doc.pdf")
|
|
7
|
+
|
|
8
|
+
Pipeline: markdown --parse--> IR --render--> .docx / .pdf / .html
|
|
9
|
+
All visual decisions live in a Theme; the renderers are mechanical.
|
|
10
|
+
"""
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
from .api import MarkItUp
|
|
14
|
+
from .theme import Theme, Watermark, Banner, Table, make_watermark
|
|
15
|
+
from .parse import parse
|
|
16
|
+
from .render_docx import render as render_docx
|
|
17
|
+
from .render_html import render_html
|
|
18
|
+
from .render_pdf import render_pdf
|
|
19
|
+
from .stamp import stamp
|
|
20
|
+
from .fonts import list_fonts, available_fonts, is_available, SAFE_FONTS
|
|
21
|
+
|
|
22
|
+
__version__ = "0.3.1"
|
|
23
|
+
__all__ = [
|
|
24
|
+
"MarkItUp", "Theme", "Watermark", "Banner", "Table",
|
|
25
|
+
"parse", "render_docx", "render_html", "render_pdf",
|
|
26
|
+
"stamp", "convert",
|
|
27
|
+
"list_fonts", "available_fonts", "is_available", "SAFE_FONTS",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def convert(markdown_text: str, out_path: str, theme="report",
|
|
32
|
+
base_url: str = ".", pdf_engine: str = "weasyprint") -> str:
|
|
33
|
+
"""One-shot helper: markdown string -> file (format from extension)."""
|
|
34
|
+
doc = parse(markdown_text)
|
|
35
|
+
th = theme if isinstance(theme, Theme) else Theme.load(theme)
|
|
36
|
+
ext = os.path.splitext(out_path)[1].lower()
|
|
37
|
+
if ext == ".docx":
|
|
38
|
+
return render_docx(doc, th, out_path)
|
|
39
|
+
if ext == ".pdf":
|
|
40
|
+
return render_pdf(doc, th, out_path, engine=pdf_engine, base_url=base_url)
|
|
41
|
+
if ext in (".html", ".htm"):
|
|
42
|
+
with open(out_path, "w", encoding="utf-8") as fh:
|
|
43
|
+
fh.write(render_html(doc, th))
|
|
44
|
+
return out_path
|
|
45
|
+
raise ValueError(f"unsupported output extension: {ext!r}")
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""The public, configured entry point: the MarkItUp class.
|
|
2
|
+
|
|
3
|
+
Mirrors the ergonomics of Microsoft's MarkItDown — construct once with your
|
|
4
|
+
preferences, then convert many files:
|
|
5
|
+
|
|
6
|
+
from markitup import MarkItUp, Watermark
|
|
7
|
+
m = MarkItUp(theme="report", body_font="Georgia", text_color="#222")
|
|
8
|
+
m.convert("doc.md", "doc.pdf")
|
|
9
|
+
m.convert("doc.md") # -> ./doc.docx (current working directory)
|
|
10
|
+
|
|
11
|
+
Every visual knob is optional and overrides the chosen theme.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from typing import Dict, Optional, Union
|
|
17
|
+
|
|
18
|
+
from .theme import Theme, Banner, Watermark, make_watermark, norm_hex
|
|
19
|
+
from .parse import parse
|
|
20
|
+
from .render_docx import render as _render_docx
|
|
21
|
+
from .render_html import render_html as _render_html
|
|
22
|
+
from .render_pdf import render_pdf as _render_pdf
|
|
23
|
+
from . import stamp as _stamp_mod
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MarkItUp:
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
theme: Union[str, Theme] = "report",
|
|
30
|
+
*,
|
|
31
|
+
# fonts
|
|
32
|
+
body_font: Optional[str] = None,
|
|
33
|
+
heading_font: Optional[str] = None,
|
|
34
|
+
mono_font: Optional[str] = None,
|
|
35
|
+
# colors (accept '#RRGGBB' or 'RRGGBB')
|
|
36
|
+
text_color: Optional[str] = None,
|
|
37
|
+
heading_color: Optional[str] = None,
|
|
38
|
+
heading_colors: Optional[Dict[int, str]] = None,
|
|
39
|
+
accent_color: Optional[str] = None,
|
|
40
|
+
link_color: Optional[str] = None,
|
|
41
|
+
# type & page
|
|
42
|
+
base_size: Optional[float] = None,
|
|
43
|
+
line_height: Optional[float] = None,
|
|
44
|
+
scale: Optional[float] = None,
|
|
45
|
+
page_size: Optional[str] = None,
|
|
46
|
+
margin_cm: Optional[float] = None,
|
|
47
|
+
# structure & marks
|
|
48
|
+
base_docx: Optional[str] = None,
|
|
49
|
+
watermark: Union[Watermark, str, dict, None] = None,
|
|
50
|
+
banner: Union[Banner, str, dict, None] = None,
|
|
51
|
+
# pdf
|
|
52
|
+
pdf_engine: str = "weasyprint",
|
|
53
|
+
):
|
|
54
|
+
th = theme if isinstance(theme, Theme) else Theme.load(theme)
|
|
55
|
+
|
|
56
|
+
if body_font:
|
|
57
|
+
th.fonts.body = body_font
|
|
58
|
+
if heading_font:
|
|
59
|
+
th.fonts.heading = heading_font
|
|
60
|
+
if mono_font:
|
|
61
|
+
th.fonts.mono = mono_font
|
|
62
|
+
|
|
63
|
+
if text_color:
|
|
64
|
+
th.colors.text = norm_hex(text_color)
|
|
65
|
+
if heading_color:
|
|
66
|
+
th.colors.heading = norm_hex(heading_color)
|
|
67
|
+
if accent_color:
|
|
68
|
+
th.colors.accent = norm_hex(accent_color)
|
|
69
|
+
if link_color:
|
|
70
|
+
th.colors.link = norm_hex(link_color)
|
|
71
|
+
if heading_colors:
|
|
72
|
+
th.colors.headings.update({int(k): norm_hex(v) for k, v in heading_colors.items()})
|
|
73
|
+
|
|
74
|
+
if base_size is not None:
|
|
75
|
+
th.type.base_size = base_size
|
|
76
|
+
if line_height is not None:
|
|
77
|
+
th.type.line_height = line_height
|
|
78
|
+
if scale is not None:
|
|
79
|
+
th.type.ratio = scale
|
|
80
|
+
if page_size:
|
|
81
|
+
th.page.size = page_size
|
|
82
|
+
if margin_cm is not None:
|
|
83
|
+
th.page.margin_cm = margin_cm
|
|
84
|
+
|
|
85
|
+
if base_docx:
|
|
86
|
+
th.base_docx = base_docx
|
|
87
|
+
if watermark is not None:
|
|
88
|
+
th.watermark = make_watermark(watermark)
|
|
89
|
+
if banner is not None:
|
|
90
|
+
th.banner = _coerce_banner(banner)
|
|
91
|
+
|
|
92
|
+
self.theme = th
|
|
93
|
+
self.pdf_engine = pdf_engine
|
|
94
|
+
|
|
95
|
+
# ---- conversion -------------------------------------------------------
|
|
96
|
+
def convert(self, input_path: str, output_path: Optional[str] = None,
|
|
97
|
+
*, to: str = "docx") -> str:
|
|
98
|
+
"""Convert a markdown file. If `output_path` is omitted, the output is
|
|
99
|
+
written to the current working directory as <input-stem>.<to>."""
|
|
100
|
+
with open(input_path, "r", encoding="utf-8") as fh:
|
|
101
|
+
md = fh.read()
|
|
102
|
+
if output_path is None:
|
|
103
|
+
stem = os.path.splitext(os.path.basename(input_path))[0]
|
|
104
|
+
output_path = os.path.join(os.getcwd(), f"{stem}.{to.lstrip('.')}")
|
|
105
|
+
base_url = os.path.dirname(os.path.abspath(input_path)) or "."
|
|
106
|
+
return self.convert_text(md, output_path, base_url=base_url)
|
|
107
|
+
|
|
108
|
+
def convert_text(self, markdown_text: str, output_path: str,
|
|
109
|
+
*, base_url: str = ".") -> str:
|
|
110
|
+
"""Convert a markdown string to the file at output_path (format from ext)."""
|
|
111
|
+
doc = parse(markdown_text)
|
|
112
|
+
ext = os.path.splitext(output_path)[1].lower()
|
|
113
|
+
if ext == ".docx":
|
|
114
|
+
return _render_docx(doc, self.theme, output_path)
|
|
115
|
+
if ext == ".pdf":
|
|
116
|
+
return _render_pdf(doc, self.theme, output_path,
|
|
117
|
+
engine=self.pdf_engine, base_url=base_url)
|
|
118
|
+
if ext in (".html", ".htm"):
|
|
119
|
+
with open(output_path, "w", encoding="utf-8") as fh:
|
|
120
|
+
fh.write(_render_html(doc, self.theme))
|
|
121
|
+
return output_path
|
|
122
|
+
raise ValueError(f"unsupported output extension: {ext!r}")
|
|
123
|
+
|
|
124
|
+
# ---- existing-file watermarking --------------------------------------
|
|
125
|
+
@staticmethod
|
|
126
|
+
def stamp(input_path: str, output_path: str, watermark, **kwargs) -> str:
|
|
127
|
+
"""Watermark an existing .pdf/.docx. See markitup.stamp for options."""
|
|
128
|
+
return _stamp_mod.stamp(input_path, output_path, watermark, **kwargs)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _coerce_banner(value) -> Optional[Banner]:
|
|
132
|
+
if value is None:
|
|
133
|
+
return None
|
|
134
|
+
if isinstance(value, Banner):
|
|
135
|
+
b = value
|
|
136
|
+
elif isinstance(value, str):
|
|
137
|
+
b = Banner(text=value)
|
|
138
|
+
elif isinstance(value, dict):
|
|
139
|
+
b = Banner(**value)
|
|
140
|
+
else:
|
|
141
|
+
raise TypeError(f"banner must be Banner | str | dict | None, got {type(value)!r}")
|
|
142
|
+
b.color = norm_hex(b.color)
|
|
143
|
+
if b.bg:
|
|
144
|
+
b.bg = norm_hex(b.bg)
|
|
145
|
+
return b
|