mcp-docgen 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_docgen-0.1.0/LICENSE +21 -0
- mcp_docgen-0.1.0/PKG-INFO +156 -0
- mcp_docgen-0.1.0/README.md +126 -0
- mcp_docgen-0.1.0/pyproject.toml +70 -0
- mcp_docgen-0.1.0/src/mcp_docgen/__init__.py +3 -0
- mcp_docgen-0.1.0/src/mcp_docgen/blocks.py +90 -0
- mcp_docgen-0.1.0/src/mcp_docgen/docx_writer.py +146 -0
- mcp_docgen-0.1.0/src/mcp_docgen/markdown_parser.py +147 -0
- mcp_docgen-0.1.0/src/mcp_docgen/paths.py +45 -0
- mcp_docgen-0.1.0/src/mcp_docgen/pptx_writer.py +146 -0
- mcp_docgen-0.1.0/src/mcp_docgen/server.py +112 -0
- mcp_docgen-0.1.0/src/mcp_docgen/xlsx_writer.py +80 -0
mcp_docgen-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Touka Project (Otoha)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-docgen
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Markdown-driven MCP server that generates Word (.docx), Excel (.xlsx) and PowerPoint (.pptx) documents — by the Touka project.
|
|
5
|
+
Keywords: mcp,model-context-protocol,docx,xlsx,pptx,word,excel,powerpoint,document-generation,markdown
|
|
6
|
+
Author: Otoha (Touka Project)
|
|
7
|
+
Author-email: Otoha (Touka Project) <whitekinglight@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
|
19
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
20
|
+
Requires-Dist: python-docx>=1.1
|
|
21
|
+
Requires-Dist: openpyxl>=3.1
|
|
22
|
+
Requires-Dist: xlsxwriter>=3.2
|
|
23
|
+
Requires-Dist: python-pptx>=1.0
|
|
24
|
+
Requires-Dist: markdown-it-py>=3.0
|
|
25
|
+
Requires-Dist: mcp>=1.26
|
|
26
|
+
Maintainer: Touka Project
|
|
27
|
+
Maintainer-email: Touka Project <whitekinglight@gmail.com>
|
|
28
|
+
Requires-Python: >=3.10
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# mcp-docgen
|
|
32
|
+
|
|
33
|
+
A Markdown-driven [Model Context Protocol](https://modelcontextprotocol.io) (MCP) server
|
|
34
|
+
that turns Markdown — and structured data — into **Word (`.docx`)**,
|
|
35
|
+
**PowerPoint (`.pptx`)**, and **Excel (`.xlsx`)** files.
|
|
36
|
+
|
|
37
|
+
Built entirely on mature, permissively-licensed Python libraries
|
|
38
|
+
([`python-docx`](https://github.com/python-openxml/python-docx),
|
|
39
|
+
[`python-pptx`](https://github.com/scanny/python-pptx),
|
|
40
|
+
[`openpyxl`](https://foss.heptapod.net/openpyxl/openpyxl),
|
|
41
|
+
[`XlsxWriter`](https://github.com/jmcnamara/XlsxWriter),
|
|
42
|
+
[`markdown-it-py`](https://github.com/executablebooks/markdown-it-py)) — no proprietary
|
|
43
|
+
dependencies. **MIT licensed.**
|
|
44
|
+
|
|
45
|
+
> Part of the **Touka** project: giving AI agents the ability to produce real Office
|
|
46
|
+
> documents using only open-source building blocks.
|
|
47
|
+
|
|
48
|
+
## Why
|
|
49
|
+
|
|
50
|
+
LLMs are great at producing Markdown. `mcp-docgen` exposes three tools that convert that
|
|
51
|
+
Markdown into polished Office documents, so any MCP-capable assistant (Claude Desktop,
|
|
52
|
+
Touka, …) can hand a user a finished `.docx` / `.pptx` / `.xlsx`.
|
|
53
|
+
|
|
54
|
+
## Install & run
|
|
55
|
+
|
|
56
|
+
Once published to PyPI:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
uvx mcp-docgen
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
From a local checkout (before publishing):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
uv sync
|
|
66
|
+
uv run mcp-docgen
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The server speaks MCP over **stdio**.
|
|
70
|
+
|
|
71
|
+
## MCP client configuration
|
|
72
|
+
|
|
73
|
+
```jsonc
|
|
74
|
+
{
|
|
75
|
+
"mcpServers": {
|
|
76
|
+
"docgen": {
|
|
77
|
+
"command": "uvx",
|
|
78
|
+
"args": ["mcp-docgen"],
|
|
79
|
+
"env": { "MCP_DOCGEN_OUTPUT_DIR": "/absolute/path/to/output" }
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
From a local checkout, swap the command for:
|
|
86
|
+
|
|
87
|
+
```jsonc
|
|
88
|
+
{ "command": "uv", "args": ["run", "--directory", "/path/to/mcp-docgen", "mcp-docgen"] }
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Tools
|
|
92
|
+
|
|
93
|
+
Each tool returns `{"path": "<absolute path of the written file>"}`.
|
|
94
|
+
|
|
95
|
+
### `create_docx(markdown, output_path, title?)`
|
|
96
|
+
|
|
97
|
+
Markdown → Word. Supports headings, **bold** / *italic* / `inline code`, bullet and
|
|
98
|
+
numbered lists (nested), tables, block quotes, fenced code blocks, and horizontal rules.
|
|
99
|
+
|
|
100
|
+
### `create_pptx(markdown, output_path, title?)`
|
|
101
|
+
|
|
102
|
+
Markdown → PowerPoint, using this slide convention:
|
|
103
|
+
|
|
104
|
+
| Markdown | Result |
|
|
105
|
+
| --- | --- |
|
|
106
|
+
| `# Heading` | starts a **new slide** (the heading becomes its title) |
|
|
107
|
+
| content below a heading | **bullet points** (nested lists indent) |
|
|
108
|
+
| `---` (horizontal rule) | an explicit **slide break** |
|
|
109
|
+
|
|
110
|
+
`title` adds a leading title slide.
|
|
111
|
+
|
|
112
|
+
### `create_xlsx(sheets, output_path)`
|
|
113
|
+
|
|
114
|
+
Structured data → Excel. `sheets` is a list of worksheets:
|
|
115
|
+
|
|
116
|
+
```json
|
|
117
|
+
[
|
|
118
|
+
{
|
|
119
|
+
"name": "Sales",
|
|
120
|
+
"rows": [["Region", "Revenue"], ["APAC", 1200000], ["EMEA", 900000]],
|
|
121
|
+
"header": true
|
|
122
|
+
}
|
|
123
|
+
]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Cells may be strings, numbers, booleans, or `null`. The first row is a **bold, frozen
|
|
127
|
+
header** unless the sheet sets `"header": false`.
|
|
128
|
+
|
|
129
|
+
## Output directory & safety
|
|
130
|
+
|
|
131
|
+
All files are written inside one base directory — `MCP_DOCGEN_OUTPUT_DIR`, or `./out`
|
|
132
|
+
relative to the working directory by default. `output_path` is always interpreted
|
|
133
|
+
relative to that base, and any path that tries to escape it (via `..` or an absolute
|
|
134
|
+
path) is rejected. The server makes **no network calls** and spawns **no subprocesses**.
|
|
135
|
+
|
|
136
|
+
## Examples
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
uv run python examples/generate_samples.py
|
|
140
|
+
# writes report.docx, review.pptx and sales.xlsx into examples/output/
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Development
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
uv sync
|
|
147
|
+
uv run pytest
|
|
148
|
+
uv run ruff check .
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
MIT © 2026 Touka Project — see [LICENSE](LICENSE).
|
|
154
|
+
|
|
155
|
+
Document generation is powered by python-docx, python-pptx, openpyxl, and XlsxWriter;
|
|
156
|
+
Markdown parsing by markdown-it-py. All MIT/BSD licensed.
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# mcp-docgen
|
|
2
|
+
|
|
3
|
+
A Markdown-driven [Model Context Protocol](https://modelcontextprotocol.io) (MCP) server
|
|
4
|
+
that turns Markdown — and structured data — into **Word (`.docx`)**,
|
|
5
|
+
**PowerPoint (`.pptx`)**, and **Excel (`.xlsx`)** files.
|
|
6
|
+
|
|
7
|
+
Built entirely on mature, permissively-licensed Python libraries
|
|
8
|
+
([`python-docx`](https://github.com/python-openxml/python-docx),
|
|
9
|
+
[`python-pptx`](https://github.com/scanny/python-pptx),
|
|
10
|
+
[`openpyxl`](https://foss.heptapod.net/openpyxl/openpyxl),
|
|
11
|
+
[`XlsxWriter`](https://github.com/jmcnamara/XlsxWriter),
|
|
12
|
+
[`markdown-it-py`](https://github.com/executablebooks/markdown-it-py)) — no proprietary
|
|
13
|
+
dependencies. **MIT licensed.**
|
|
14
|
+
|
|
15
|
+
> Part of the **Touka** project: giving AI agents the ability to produce real Office
|
|
16
|
+
> documents using only open-source building blocks.
|
|
17
|
+
|
|
18
|
+
## Why
|
|
19
|
+
|
|
20
|
+
LLMs are great at producing Markdown. `mcp-docgen` exposes three tools that convert that
|
|
21
|
+
Markdown into polished Office documents, so any MCP-capable assistant (Claude Desktop,
|
|
22
|
+
Touka, …) can hand a user a finished `.docx` / `.pptx` / `.xlsx`.
|
|
23
|
+
|
|
24
|
+
## Install & run
|
|
25
|
+
|
|
26
|
+
Once published to PyPI:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uvx mcp-docgen
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
From a local checkout (before publishing):
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
uv sync
|
|
36
|
+
uv run mcp-docgen
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The server speaks MCP over **stdio**.
|
|
40
|
+
|
|
41
|
+
## MCP client configuration
|
|
42
|
+
|
|
43
|
+
```jsonc
|
|
44
|
+
{
|
|
45
|
+
"mcpServers": {
|
|
46
|
+
"docgen": {
|
|
47
|
+
"command": "uvx",
|
|
48
|
+
"args": ["mcp-docgen"],
|
|
49
|
+
"env": { "MCP_DOCGEN_OUTPUT_DIR": "/absolute/path/to/output" }
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
From a local checkout, swap the command for:
|
|
56
|
+
|
|
57
|
+
```jsonc
|
|
58
|
+
{ "command": "uv", "args": ["run", "--directory", "/path/to/mcp-docgen", "mcp-docgen"] }
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Tools
|
|
62
|
+
|
|
63
|
+
Each tool returns `{"path": "<absolute path of the written file>"}`.
|
|
64
|
+
|
|
65
|
+
### `create_docx(markdown, output_path, title?)`
|
|
66
|
+
|
|
67
|
+
Markdown → Word. Supports headings, **bold** / *italic* / `inline code`, bullet and
|
|
68
|
+
numbered lists (nested), tables, block quotes, fenced code blocks, and horizontal rules.
|
|
69
|
+
|
|
70
|
+
### `create_pptx(markdown, output_path, title?)`
|
|
71
|
+
|
|
72
|
+
Markdown → PowerPoint, using this slide convention:
|
|
73
|
+
|
|
74
|
+
| Markdown | Result |
|
|
75
|
+
| --- | --- |
|
|
76
|
+
| `# Heading` | starts a **new slide** (the heading becomes its title) |
|
|
77
|
+
| content below a heading | **bullet points** (nested lists indent) |
|
|
78
|
+
| `---` (horizontal rule) | an explicit **slide break** |
|
|
79
|
+
|
|
80
|
+
`title` adds a leading title slide.
|
|
81
|
+
|
|
82
|
+
### `create_xlsx(sheets, output_path)`
|
|
83
|
+
|
|
84
|
+
Structured data → Excel. `sheets` is a list of worksheets:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
[
|
|
88
|
+
{
|
|
89
|
+
"name": "Sales",
|
|
90
|
+
"rows": [["Region", "Revenue"], ["APAC", 1200000], ["EMEA", 900000]],
|
|
91
|
+
"header": true
|
|
92
|
+
}
|
|
93
|
+
]
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Cells may be strings, numbers, booleans, or `null`. The first row is a **bold, frozen
|
|
97
|
+
header** unless the sheet sets `"header": false`.
|
|
98
|
+
|
|
99
|
+
## Output directory & safety
|
|
100
|
+
|
|
101
|
+
All files are written inside one base directory — `MCP_DOCGEN_OUTPUT_DIR`, or `./out`
|
|
102
|
+
relative to the working directory by default. `output_path` is always interpreted
|
|
103
|
+
relative to that base, and any path that tries to escape it (via `..` or an absolute
|
|
104
|
+
path) is rejected. The server makes **no network calls** and spawns **no subprocesses**.
|
|
105
|
+
|
|
106
|
+
## Examples
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
uv run python examples/generate_samples.py
|
|
110
|
+
# writes report.docx, review.pptx and sales.xlsx into examples/output/
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Development
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
uv sync
|
|
117
|
+
uv run pytest
|
|
118
|
+
uv run ruff check .
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## License
|
|
122
|
+
|
|
123
|
+
MIT © 2026 Touka Project — see [LICENSE](LICENSE).
|
|
124
|
+
|
|
125
|
+
Document generation is powered by python-docx, python-pptx, openpyxl, and XlsxWriter;
|
|
126
|
+
Markdown parsing by markdown-it-py. All MIT/BSD licensed.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-docgen"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Markdown-driven MCP server that generates Word (.docx), Excel (.xlsx) and PowerPoint (.pptx) documents — by the Touka project."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "Otoha (Touka Project)", email = "whitekinglight@gmail.com" },
|
|
11
|
+
]
|
|
12
|
+
maintainers = [
|
|
13
|
+
{ name = "Touka Project", email = "whitekinglight@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"mcp",
|
|
17
|
+
"model-context-protocol",
|
|
18
|
+
"docx",
|
|
19
|
+
"xlsx",
|
|
20
|
+
"pptx",
|
|
21
|
+
"word",
|
|
22
|
+
"excel",
|
|
23
|
+
"powerpoint",
|
|
24
|
+
"document-generation",
|
|
25
|
+
"markdown",
|
|
26
|
+
]
|
|
27
|
+
classifiers = [
|
|
28
|
+
"Development Status :: 3 - Alpha",
|
|
29
|
+
"Intended Audience :: Developers",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Programming Language :: Python :: 3.11",
|
|
34
|
+
"Programming Language :: Python :: 3.12",
|
|
35
|
+
"Programming Language :: Python :: 3.13",
|
|
36
|
+
"Topic :: Office/Business :: Office Suites",
|
|
37
|
+
"Topic :: Text Processing :: Markup",
|
|
38
|
+
]
|
|
39
|
+
dependencies = [
|
|
40
|
+
"python-docx>=1.1",
|
|
41
|
+
"openpyxl>=3.1",
|
|
42
|
+
"xlsxwriter>=3.2",
|
|
43
|
+
"python-pptx>=1.0",
|
|
44
|
+
"markdown-it-py>=3.0",
|
|
45
|
+
"mcp>=1.26",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
mcp-docgen = "mcp_docgen.server:main"
|
|
50
|
+
|
|
51
|
+
[dependency-groups]
|
|
52
|
+
dev = [
|
|
53
|
+
"pytest>=8.0",
|
|
54
|
+
"ruff>=0.8",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[build-system]
|
|
58
|
+
requires = ["uv_build>=0.10.5,<0.11.0"]
|
|
59
|
+
build-backend = "uv_build"
|
|
60
|
+
|
|
61
|
+
[tool.ruff]
|
|
62
|
+
line-length = 100
|
|
63
|
+
target-version = "py310"
|
|
64
|
+
|
|
65
|
+
[tool.ruff.lint]
|
|
66
|
+
select = ["E", "F", "I", "UP", "B", "SIM", "C4"]
|
|
67
|
+
|
|
68
|
+
[tool.pytest.ini_options]
|
|
69
|
+
testpaths = ["tests"]
|
|
70
|
+
addopts = "-q"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Document intermediate representation (IR).
|
|
2
|
+
|
|
3
|
+
A writer-agnostic block model produced by :mod:`mcp_docgen.markdown_parser` and
|
|
4
|
+
consumed by the docx / pptx writers. Keeping the IR in one place (SRP) lets every
|
|
5
|
+
writer share a single normalized structure instead of re-walking Markdown tokens.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class Run:
|
|
15
|
+
"""A styled inline text run."""
|
|
16
|
+
|
|
17
|
+
text: str
|
|
18
|
+
bold: bool = False
|
|
19
|
+
italic: bool = False
|
|
20
|
+
code: bool = False
|
|
21
|
+
strike: bool = False
|
|
22
|
+
link: str | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class Heading:
|
|
27
|
+
"""A section heading (``level`` 1-6)."""
|
|
28
|
+
|
|
29
|
+
level: int
|
|
30
|
+
runs: list[Run]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Paragraph:
|
|
35
|
+
"""A block of inline text."""
|
|
36
|
+
|
|
37
|
+
runs: list[Run]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ListItem:
|
|
42
|
+
"""One item of a list; may itself contain blocks (e.g. nested lists)."""
|
|
43
|
+
|
|
44
|
+
blocks: list[Block] = field(default_factory=list)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ListBlock:
|
|
49
|
+
"""An ordered or unordered list."""
|
|
50
|
+
|
|
51
|
+
ordered: bool
|
|
52
|
+
items: list[ListItem]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class CodeBlock:
|
|
57
|
+
"""A fenced or indented code block."""
|
|
58
|
+
|
|
59
|
+
text: str
|
|
60
|
+
language: str | None = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class BlockQuote:
|
|
65
|
+
"""A block quote wrapping nested blocks."""
|
|
66
|
+
|
|
67
|
+
blocks: list[Block]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class TableCell:
|
|
72
|
+
"""A single table cell."""
|
|
73
|
+
|
|
74
|
+
runs: list[Run]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class Table:
|
|
79
|
+
"""A table with a header row and zero or more body rows."""
|
|
80
|
+
|
|
81
|
+
header: list[TableCell]
|
|
82
|
+
rows: list[list[TableCell]]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class ThematicBreak:
|
|
87
|
+
"""A horizontal rule (``---``). Used by the pptx writer as a slide break."""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
Block = Heading | Paragraph | ListBlock | CodeBlock | BlockQuote | Table | ThematicBreak
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Render the document IR into a Word (.docx) file via python-docx."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from docx import Document
|
|
9
|
+
from docx.oxml import OxmlElement
|
|
10
|
+
from docx.oxml.ns import qn
|
|
11
|
+
from docx.shared import Inches, Pt, RGBColor
|
|
12
|
+
|
|
13
|
+
from .blocks import (
|
|
14
|
+
BlockQuote,
|
|
15
|
+
CodeBlock,
|
|
16
|
+
Heading,
|
|
17
|
+
ListBlock,
|
|
18
|
+
Paragraph,
|
|
19
|
+
Table,
|
|
20
|
+
ThematicBreak,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
_MONO_FONT = "Consolas"
|
|
24
|
+
_LINK_COLOR = RGBColor(0x06, 0x6C, 0xC0)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def render_docx(blocks, title: str | None = None):
|
|
28
|
+
"""Build and return a python-docx ``Document`` from IR blocks."""
|
|
29
|
+
doc = Document()
|
|
30
|
+
if title:
|
|
31
|
+
doc.add_paragraph(title, style="Title")
|
|
32
|
+
for block in blocks:
|
|
33
|
+
_render_block(doc, block)
|
|
34
|
+
return doc
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def write_docx(blocks, output_path: str | Path, title: str | None = None) -> Path:
|
|
38
|
+
"""Render ``blocks`` and save the .docx to ``output_path``."""
|
|
39
|
+
path = Path(output_path)
|
|
40
|
+
render_docx(blocks, title=title).save(str(path))
|
|
41
|
+
return path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _render_block(doc, block) -> None:
|
|
45
|
+
if isinstance(block, Heading):
|
|
46
|
+
_add_runs(doc.add_paragraph(style=_heading_style(block.level)), block.runs)
|
|
47
|
+
elif isinstance(block, Paragraph):
|
|
48
|
+
_add_runs(doc.add_paragraph(), block.runs)
|
|
49
|
+
elif isinstance(block, ListBlock):
|
|
50
|
+
_render_list(doc, block, level=0)
|
|
51
|
+
elif isinstance(block, CodeBlock):
|
|
52
|
+
_render_code(doc, block)
|
|
53
|
+
elif isinstance(block, BlockQuote):
|
|
54
|
+
_render_quote(doc, block)
|
|
55
|
+
elif isinstance(block, Table):
|
|
56
|
+
_render_table(doc, block)
|
|
57
|
+
elif isinstance(block, ThematicBreak):
|
|
58
|
+
_add_horizontal_rule(doc)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _heading_style(level: int) -> str:
|
|
62
|
+
return f"Heading {min(max(level, 1), 9)}"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _add_runs(paragraph, runs, *, force_bold: bool = False) -> None:
|
|
66
|
+
for run in runs:
|
|
67
|
+
r = paragraph.add_run(run.text)
|
|
68
|
+
if run.bold or force_bold:
|
|
69
|
+
r.bold = True
|
|
70
|
+
if run.italic:
|
|
71
|
+
r.italic = True
|
|
72
|
+
if run.strike:
|
|
73
|
+
r.font.strike = True
|
|
74
|
+
if run.code:
|
|
75
|
+
r.font.name = _MONO_FONT
|
|
76
|
+
if run.link:
|
|
77
|
+
r.font.underline = True
|
|
78
|
+
r.font.color.rgb = _LINK_COLOR
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _render_list(doc, list_block: ListBlock, level: int) -> None:
|
|
82
|
+
style = "List Number" if list_block.ordered else "List Bullet"
|
|
83
|
+
for item in list_block.items:
|
|
84
|
+
for child in item.blocks:
|
|
85
|
+
if isinstance(child, Paragraph):
|
|
86
|
+
p = doc.add_paragraph(style=style)
|
|
87
|
+
if level >= 1:
|
|
88
|
+
p.paragraph_format.left_indent = Inches(0.25 * (level + 1))
|
|
89
|
+
_add_runs(p, child.runs)
|
|
90
|
+
elif isinstance(child, ListBlock):
|
|
91
|
+
_render_list(doc, child, level + 1)
|
|
92
|
+
else:
|
|
93
|
+
_render_block(doc, child)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _render_code(doc, code: CodeBlock) -> None:
|
|
97
|
+
lines = code.text.split("\n")
|
|
98
|
+
if lines and lines[-1] == "":
|
|
99
|
+
lines = lines[:-1]
|
|
100
|
+
p = doc.add_paragraph()
|
|
101
|
+
for i, line in enumerate(lines):
|
|
102
|
+
r = p.add_run(line)
|
|
103
|
+
r.font.name = _MONO_FONT
|
|
104
|
+
r.font.size = Pt(9)
|
|
105
|
+
if i != len(lines) - 1:
|
|
106
|
+
r.add_break()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _render_quote(doc, quote: BlockQuote) -> None:
|
|
110
|
+
for child in quote.blocks:
|
|
111
|
+
if isinstance(child, Paragraph):
|
|
112
|
+
_add_runs(doc.add_paragraph(style="Quote"), child.runs)
|
|
113
|
+
else:
|
|
114
|
+
_render_block(doc, child)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _render_table(doc, table: Table) -> None:
|
|
118
|
+
ncols = len(table.header) or (len(table.rows[0]) if table.rows else 0)
|
|
119
|
+
if ncols == 0:
|
|
120
|
+
return
|
|
121
|
+
docx_table = doc.add_table(rows=0, cols=ncols)
|
|
122
|
+
with contextlib.suppress(KeyError): # template always ships Table Grid
|
|
123
|
+
docx_table.style = "Table Grid"
|
|
124
|
+
if table.header:
|
|
125
|
+
cells = docx_table.add_row().cells
|
|
126
|
+
for i, cell in enumerate(table.header):
|
|
127
|
+
_add_runs(cells[i].paragraphs[0], cell.runs, force_bold=True)
|
|
128
|
+
for row in table.rows:
|
|
129
|
+
cells = docx_table.add_row().cells
|
|
130
|
+
for i, cell in enumerate(row):
|
|
131
|
+
if i < ncols:
|
|
132
|
+
_add_runs(cells[i].paragraphs[0], cell.runs)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _add_horizontal_rule(doc) -> None:
|
|
136
|
+
"""Append an empty paragraph carrying a bottom border (a visual ``<hr>``)."""
|
|
137
|
+
p = doc.add_paragraph()
|
|
138
|
+
p_pr = p._p.get_or_add_pPr()
|
|
139
|
+
borders = OxmlElement("w:pBdr")
|
|
140
|
+
bottom = OxmlElement("w:bottom")
|
|
141
|
+
bottom.set(qn("w:val"), "single")
|
|
142
|
+
bottom.set(qn("w:sz"), "6")
|
|
143
|
+
bottom.set(qn("w:space"), "1")
|
|
144
|
+
bottom.set(qn("w:color"), "auto")
|
|
145
|
+
borders.append(bottom)
|
|
146
|
+
p_pr.append(borders)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Parse Markdown into the document IR (:mod:`mcp_docgen.blocks`).
|
|
2
|
+
|
|
3
|
+
Pure transformation: text in, ``list[Block]`` out, no IO. Built on markdown-it-py's
|
|
4
|
+
:class:`~markdown_it.tree.SyntaxTreeNode`, which turns the flat token stream into a
|
|
5
|
+
nested tree that is straightforward to walk.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, replace
|
|
11
|
+
|
|
12
|
+
from markdown_it import MarkdownIt
|
|
13
|
+
from markdown_it.tree import SyntaxTreeNode
|
|
14
|
+
|
|
15
|
+
from .blocks import (
|
|
16
|
+
Block,
|
|
17
|
+
BlockQuote,
|
|
18
|
+
CodeBlock,
|
|
19
|
+
Heading,
|
|
20
|
+
ListBlock,
|
|
21
|
+
ListItem,
|
|
22
|
+
Paragraph,
|
|
23
|
+
Run,
|
|
24
|
+
Table,
|
|
25
|
+
TableCell,
|
|
26
|
+
ThematicBreak,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# CommonMark + GitHub-flavoured tables and strikethrough. ``ignoreInvalid=True`` keeps
|
|
30
|
+
# construction safe across markdown-it-py versions; linkify is intentionally left off
|
|
31
|
+
# to avoid the optional ``linkify-it-py`` dependency.
|
|
32
|
+
_MD = MarkdownIt("commonmark").enable(["table", "strikethrough"], True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class _Style:
|
|
37
|
+
"""Inline styling state carried down the recursion (internal)."""
|
|
38
|
+
|
|
39
|
+
bold: bool = False
|
|
40
|
+
italic: bool = False
|
|
41
|
+
code: bool = False
|
|
42
|
+
strike: bool = False
|
|
43
|
+
link: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_markdown(text: str) -> list[Block]:
|
|
47
|
+
"""Parse a Markdown string into a list of document blocks (IR)."""
|
|
48
|
+
tokens = _MD.parse(text or "")
|
|
49
|
+
root = SyntaxTreeNode(tokens)
|
|
50
|
+
return _blocks(root.children)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _blocks(nodes: list[SyntaxTreeNode]) -> list[Block]:
|
|
54
|
+
out: list[Block] = []
|
|
55
|
+
for node in nodes:
|
|
56
|
+
block = _block(node)
|
|
57
|
+
if block is not None:
|
|
58
|
+
out.append(block)
|
|
59
|
+
return out
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _block(node: SyntaxTreeNode) -> Block | None:
|
|
63
|
+
t = node.type
|
|
64
|
+
if t == "heading":
|
|
65
|
+
return Heading(level=int(node.tag[1:]), runs=_inline_of(node))
|
|
66
|
+
if t == "paragraph":
|
|
67
|
+
return Paragraph(runs=_inline_of(node))
|
|
68
|
+
if t == "bullet_list":
|
|
69
|
+
return ListBlock(ordered=False, items=_items(node))
|
|
70
|
+
if t == "ordered_list":
|
|
71
|
+
return ListBlock(ordered=True, items=_items(node))
|
|
72
|
+
if t in ("fence", "code_block"):
|
|
73
|
+
language = (node.info or "").strip() or None
|
|
74
|
+
return CodeBlock(text=node.content, language=language)
|
|
75
|
+
if t == "blockquote":
|
|
76
|
+
return BlockQuote(blocks=_blocks(node.children))
|
|
77
|
+
if t == "table":
|
|
78
|
+
return _table(node)
|
|
79
|
+
if t == "hr":
|
|
80
|
+
return ThematicBreak()
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _items(list_node: SyntaxTreeNode) -> list[ListItem]:
|
|
85
|
+
return [
|
|
86
|
+
ListItem(blocks=_blocks(child.children))
|
|
87
|
+
for child in list_node.children
|
|
88
|
+
if child.type == "list_item"
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _table(table_node: SyntaxTreeNode) -> Table:
|
|
93
|
+
header: list[TableCell] = []
|
|
94
|
+
rows: list[list[TableCell]] = []
|
|
95
|
+
for section in table_node.children:
|
|
96
|
+
if section.type == "thead":
|
|
97
|
+
for tr in section.children:
|
|
98
|
+
header = [TableCell(runs=_inline_of(cell)) for cell in tr.children]
|
|
99
|
+
elif section.type == "tbody":
|
|
100
|
+
for tr in section.children:
|
|
101
|
+
rows.append([TableCell(runs=_inline_of(cell)) for cell in tr.children])
|
|
102
|
+
return Table(header=header, rows=rows)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _inline_of(node: SyntaxTreeNode) -> list[Run]:
|
|
106
|
+
"""Collect styled runs from a block node wrapping a single ``inline`` child."""
|
|
107
|
+
for child in node.children:
|
|
108
|
+
if child.type == "inline":
|
|
109
|
+
return _runs(child.children, _Style())
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _runs(nodes: list[SyntaxTreeNode], style: _Style) -> list[Run]:
|
|
114
|
+
out: list[Run] = []
|
|
115
|
+
for node in nodes:
|
|
116
|
+
t = node.type
|
|
117
|
+
if t == "text":
|
|
118
|
+
if node.content:
|
|
119
|
+
out.append(_run(style, node.content))
|
|
120
|
+
elif t == "code_inline":
|
|
121
|
+
out.append(_run(replace(style, code=True), node.content))
|
|
122
|
+
elif t == "softbreak":
|
|
123
|
+
out.append(_run(style, " "))
|
|
124
|
+
elif t == "hardbreak":
|
|
125
|
+
out.append(_run(style, "\n"))
|
|
126
|
+
elif t == "strong":
|
|
127
|
+
out.extend(_runs(node.children, replace(style, bold=True)))
|
|
128
|
+
elif t == "em":
|
|
129
|
+
out.extend(_runs(node.children, replace(style, italic=True)))
|
|
130
|
+
elif t == "s":
|
|
131
|
+
out.extend(_runs(node.children, replace(style, strike=True)))
|
|
132
|
+
elif t == "link":
|
|
133
|
+
out.extend(_runs(node.children, replace(style, link=node.attrs.get("href"))))
|
|
134
|
+
elif node.children:
|
|
135
|
+
out.extend(_runs(node.children, style))
|
|
136
|
+
return out
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _run(style: _Style, text: str) -> Run:
|
|
140
|
+
return Run(
|
|
141
|
+
text=text,
|
|
142
|
+
bold=style.bold,
|
|
143
|
+
italic=style.italic,
|
|
144
|
+
code=style.code,
|
|
145
|
+
strike=style.strike,
|
|
146
|
+
link=style.link,
|
|
147
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Output-path resolution and jail for generated files.
|
|
2
|
+
|
|
3
|
+
All writes are confined to a single base directory (``MCP_DOCGEN_OUTPUT_DIR`` or,
|
|
4
|
+
by default, ``./out`` under the working directory). Any path that resolves outside
|
|
5
|
+
the base — via ``..`` traversal or an absolute path — is rejected. This keeps a
|
|
6
|
+
server that anyone can install from writing arbitrary files on the host.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
ENV_OUTPUT_DIR = "MCP_DOCGEN_OUTPUT_DIR"
|
|
15
|
+
DEFAULT_SUBDIR = "out"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def output_base() -> Path:
|
|
19
|
+
"""Return (creating if needed) the absolute base directory for outputs."""
|
|
20
|
+
raw = os.environ.get(ENV_OUTPUT_DIR)
|
|
21
|
+
base = Path(raw) if raw else Path.cwd() / DEFAULT_SUBDIR
|
|
22
|
+
base = base.resolve()
|
|
23
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
return base
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def resolve_output_path(output_path: str, expected_suffix: str) -> Path:
|
|
28
|
+
"""Resolve ``output_path`` inside the jail, coercing the file suffix.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: if ``output_path`` is empty.
|
|
32
|
+
PermissionError: if the resolved path escapes the output base directory.
|
|
33
|
+
"""
|
|
34
|
+
if not output_path or not str(output_path).strip():
|
|
35
|
+
raise ValueError("output_path must be a non-empty path")
|
|
36
|
+
base = output_base()
|
|
37
|
+
target = (base / output_path).resolve()
|
|
38
|
+
try:
|
|
39
|
+
target.relative_to(base)
|
|
40
|
+
except ValueError as exc:
|
|
41
|
+
raise PermissionError(f"output_path escapes the allowed output directory ({base})") from exc
|
|
42
|
+
if target.suffix.lower() != expected_suffix:
|
|
43
|
+
target = target.with_suffix(expected_suffix)
|
|
44
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
return target
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Render the document IR into a PowerPoint (.pptx) deck via python-pptx.
|
|
2
|
+
|
|
3
|
+
Slide convention:
|
|
4
|
+
* ``# H1`` -> starts a new slide; the heading text becomes the slide title.
|
|
5
|
+
* content below -> bullet points in the slide body (lists nest by indent level).
|
|
6
|
+
* ``---`` (hr) -> an explicit slide break (a new, untitled slide).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import replace
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from pptx import Presentation
|
|
15
|
+
|
|
16
|
+
from .blocks import (
|
|
17
|
+
BlockQuote,
|
|
18
|
+
CodeBlock,
|
|
19
|
+
Heading,
|
|
20
|
+
ListBlock,
|
|
21
|
+
Paragraph,
|
|
22
|
+
Run,
|
|
23
|
+
Table,
|
|
24
|
+
ThematicBreak,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
_MONO_FONT = "Consolas"
|
|
28
|
+
_MAX_LEVEL = 4
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def render_pptx(blocks, title: str | None = None):
|
|
32
|
+
"""Build and return a python-pptx ``Presentation`` from IR blocks."""
|
|
33
|
+
prs = Presentation()
|
|
34
|
+
if title:
|
|
35
|
+
_add_title_slide(prs, title)
|
|
36
|
+
for slide_title, content in _split_into_slides(blocks):
|
|
37
|
+
_add_content_slide(prs, slide_title, content)
|
|
38
|
+
return prs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def write_pptx(blocks, output_path: str | Path, title: str | None = None) -> Path:
|
|
42
|
+
"""Render ``blocks`` and save the .pptx to ``output_path``."""
|
|
43
|
+
path = Path(output_path)
|
|
44
|
+
render_pptx(blocks, title=title).save(str(path))
|
|
45
|
+
return path
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _split_into_slides(blocks):
|
|
49
|
+
"""Group the block stream into (title, content_blocks) slides."""
|
|
50
|
+
slides: list[tuple[str | None, list]] = []
|
|
51
|
+
cur_title: str | None = None
|
|
52
|
+
cur_blocks: list = []
|
|
53
|
+
|
|
54
|
+
def flush() -> None:
|
|
55
|
+
nonlocal cur_title, cur_blocks
|
|
56
|
+
if cur_title is not None or cur_blocks:
|
|
57
|
+
slides.append((cur_title, cur_blocks))
|
|
58
|
+
cur_title, cur_blocks = None, []
|
|
59
|
+
|
|
60
|
+
for block in blocks:
|
|
61
|
+
if isinstance(block, Heading) and block.level == 1:
|
|
62
|
+
flush()
|
|
63
|
+
cur_title = _runs_text(block.runs)
|
|
64
|
+
elif isinstance(block, ThematicBreak):
|
|
65
|
+
flush()
|
|
66
|
+
else:
|
|
67
|
+
cur_blocks.append(block)
|
|
68
|
+
flush()
|
|
69
|
+
return slides
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _add_title_slide(prs, title: str) -> None:
|
|
73
|
+
slide = prs.slides.add_slide(prs.slide_layouts[0])
|
|
74
|
+
slide.shapes.title.text = title
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _add_content_slide(prs, title: str | None, blocks) -> None:
|
|
78
|
+
slide = prs.slides.add_slide(prs.slide_layouts[1])
|
|
79
|
+
slide.shapes.title.text = title or ""
|
|
80
|
+
body = slide.placeholders[1].text_frame
|
|
81
|
+
body.clear()
|
|
82
|
+
first = True
|
|
83
|
+
for runs, level in _bullets(blocks, 0):
|
|
84
|
+
para = body.paragraphs[0] if first else body.add_paragraph()
|
|
85
|
+
first = False
|
|
86
|
+
para.level = min(level, _MAX_LEVEL)
|
|
87
|
+
_write_runs(para, runs)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _bullets(blocks, level: int) -> list[tuple[list[Run], int]]:
|
|
91
|
+
"""Flatten content blocks into (runs, indent_level) bullet rows."""
|
|
92
|
+
out: list[tuple[list[Run], int]] = []
|
|
93
|
+
for block in blocks:
|
|
94
|
+
if isinstance(block, Paragraph):
|
|
95
|
+
out.append((block.runs, level))
|
|
96
|
+
elif isinstance(block, Heading):
|
|
97
|
+
out.append(([replace(r, bold=True) for r in block.runs], level))
|
|
98
|
+
elif isinstance(block, ListBlock):
|
|
99
|
+
for item in block.items:
|
|
100
|
+
for child in item.blocks:
|
|
101
|
+
next_level = level + 1 if isinstance(child, ListBlock) else level
|
|
102
|
+
out.extend(_bullets([child], next_level))
|
|
103
|
+
elif isinstance(block, CodeBlock):
|
|
104
|
+
for line in _code_lines(block.text):
|
|
105
|
+
out.append(([Run(text=line, code=True)], level))
|
|
106
|
+
elif isinstance(block, BlockQuote):
|
|
107
|
+
for child in block.blocks:
|
|
108
|
+
if isinstance(child, Paragraph):
|
|
109
|
+
out.append(([replace(r, italic=True) for r in child.runs], level))
|
|
110
|
+
else:
|
|
111
|
+
out.extend(_bullets([child], level))
|
|
112
|
+
elif isinstance(block, Table):
|
|
113
|
+
out.extend(_table_bullets(block, level))
|
|
114
|
+
return out
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _table_bullets(table: Table, level: int) -> list[tuple[list[Run], int]]:
|
|
118
|
+
rows = []
|
|
119
|
+
if table.header:
|
|
120
|
+
rows.append(([Run(text=" | ".join(_runs_text(c.runs) for c in table.header))], level))
|
|
121
|
+
for row in table.rows:
|
|
122
|
+
rows.append(([Run(text=" | ".join(_runs_text(c.runs) for c in row))], level))
|
|
123
|
+
return rows
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _code_lines(text: str) -> list[str]:
|
|
127
|
+
lines = text.split("\n")
|
|
128
|
+
if lines and lines[-1] == "":
|
|
129
|
+
lines = lines[:-1]
|
|
130
|
+
return lines
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _write_runs(paragraph, runs) -> None:
|
|
134
|
+
for run in runs:
|
|
135
|
+
r = paragraph.add_run()
|
|
136
|
+
r.text = run.text
|
|
137
|
+
if run.bold:
|
|
138
|
+
r.font.bold = True
|
|
139
|
+
if run.italic:
|
|
140
|
+
r.font.italic = True
|
|
141
|
+
if run.code:
|
|
142
|
+
r.font.name = _MONO_FONT
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _runs_text(runs) -> str:
|
|
146
|
+
return "".join(r.text for r in runs)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""mcp-docgen: a Markdown-driven MCP server that generates Office documents.
|
|
2
|
+
|
|
3
|
+
Exposes three tools over the Model Context Protocol:
|
|
4
|
+
* ``create_docx`` — Markdown -> Word (.docx)
|
|
5
|
+
* ``create_pptx`` — Markdown -> PowerPoint (.pptx)
|
|
6
|
+
* ``create_xlsx`` — structured rows -> Excel (.xlsx)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from mcp.server.fastmcp import FastMCP
|
|
12
|
+
|
|
13
|
+
from .docx_writer import write_docx
|
|
14
|
+
from .markdown_parser import parse_markdown
|
|
15
|
+
from .paths import resolve_output_path
|
|
16
|
+
from .pptx_writer import write_pptx
|
|
17
|
+
from .xlsx_writer import write_xlsx
|
|
18
|
+
|
|
19
|
+
MAX_MARKDOWN_CHARS = 1_000_000
|
|
20
|
+
MAX_SHEET_CELLS = 1_000_000
|
|
21
|
+
|
|
22
|
+
mcp = FastMCP("mcp-docgen")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@mcp.tool()
|
|
26
|
+
def create_docx(markdown: str, output_path: str, title: str | None = None) -> dict:
|
|
27
|
+
"""Create a Word (.docx) document from Markdown.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
markdown: Document body as Markdown — headings, lists, tables, bold/italic,
|
|
31
|
+
inline code, fenced code blocks and block quotes are supported.
|
|
32
|
+
output_path: Destination filename, relative to the server's output directory.
|
|
33
|
+
The ``.docx`` suffix is enforced.
|
|
34
|
+
title: Optional heading rendered with Word's "Title" style at the top.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
``{"path": <absolute path of the written .docx>}``.
|
|
38
|
+
"""
|
|
39
|
+
_check_text(markdown)
|
|
40
|
+
target = resolve_output_path(output_path, ".docx")
|
|
41
|
+
write_docx(parse_markdown(markdown), target, title=title)
|
|
42
|
+
return {"path": str(target)}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@mcp.tool()
|
|
46
|
+
def create_pptx(markdown: str, output_path: str, title: str | None = None) -> dict:
|
|
47
|
+
"""Create a PowerPoint (.pptx) deck from Markdown.
|
|
48
|
+
|
|
49
|
+
Slide convention: each top-level ``# Heading`` starts a new slide and becomes its
|
|
50
|
+
title; content beneath becomes bullet points (nested lists indent); a ``---``
|
|
51
|
+
horizontal rule forces a slide break.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
markdown: Slide content as Markdown.
|
|
55
|
+
output_path: Destination filename, relative to the server's output directory.
|
|
56
|
+
The ``.pptx`` suffix is enforced.
|
|
57
|
+
title: Optional text for a leading title slide.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
``{"path": <absolute path of the written .pptx>}``.
|
|
61
|
+
"""
|
|
62
|
+
_check_text(markdown)
|
|
63
|
+
target = resolve_output_path(output_path, ".pptx")
|
|
64
|
+
write_pptx(parse_markdown(markdown), target, title=title)
|
|
65
|
+
return {"path": str(target)}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@mcp.tool()
|
|
69
|
+
def create_xlsx(sheets: list[dict], output_path: str) -> dict:
|
|
70
|
+
"""Create an Excel (.xlsx) workbook from structured sheet data.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
sheets: A list of worksheets, each ``{"name": str, "rows": [[cell, ...], ...]}``.
|
|
74
|
+
Cells may be strings, numbers, booleans or null. The first row of each sheet
|
|
75
|
+
is a bold, frozen header unless the sheet sets ``"header": false``.
|
|
76
|
+
output_path: Destination filename, relative to the server's output directory.
|
|
77
|
+
The ``.xlsx`` suffix is enforced.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
``{"path": <absolute path of the written .xlsx>}``.
|
|
81
|
+
"""
|
|
82
|
+
_check_sheets(sheets)
|
|
83
|
+
target = resolve_output_path(output_path, ".xlsx")
|
|
84
|
+
write_xlsx(sheets, target)
|
|
85
|
+
return {"path": str(target)}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _check_text(text: str) -> None:
|
|
89
|
+
if not isinstance(text, str):
|
|
90
|
+
raise ValueError("markdown must be a string")
|
|
91
|
+
if len(text) > MAX_MARKDOWN_CHARS:
|
|
92
|
+
raise ValueError(f"markdown exceeds the {MAX_MARKDOWN_CHARS}-character limit")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _check_sheets(sheets) -> None:
|
|
96
|
+
if not isinstance(sheets, list):
|
|
97
|
+
raise ValueError("sheets must be a list of sheet objects")
|
|
98
|
+
total = 0
|
|
99
|
+
for sheet in sheets:
|
|
100
|
+
for row in (sheet or {}).get("rows") or []:
|
|
101
|
+
total += len(row)
|
|
102
|
+
if total > MAX_SHEET_CELLS:
|
|
103
|
+
raise ValueError(f"sheets exceed the {MAX_SHEET_CELLS}-cell limit")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main() -> None:
|
|
107
|
+
"""Console entry point: run the MCP server over stdio."""
|
|
108
|
+
mcp.run()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
main()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Render structured sheet data into an Excel (.xlsx) workbook via openpyxl.
|
|
2
|
+
|
|
3
|
+
Input shape (JSON-friendly), one dict per worksheet::
|
|
4
|
+
|
|
5
|
+
[{"name": "Sheet1", "rows": [["Header A", "Header B"], ["a1", "b1"]], "header": true}]
|
|
6
|
+
|
|
7
|
+
``rows`` is a list of rows, each a list of cell values (str / int / float / bool / null).
|
|
8
|
+
The first row is treated as a bold, frozen header unless ``header`` is ``false``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from openpyxl import Workbook
|
|
16
|
+
from openpyxl.styles import Font
|
|
17
|
+
from openpyxl.utils import get_column_letter
|
|
18
|
+
|
|
19
|
+
_INVALID_TITLE_CHARS = set("[]:*?/\\")
|
|
20
|
+
_MAX_TITLE = 31
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def render_xlsx(sheets):
|
|
24
|
+
"""Build and return an openpyxl ``Workbook`` from structured sheet data."""
|
|
25
|
+
wb = Workbook()
|
|
26
|
+
default = wb.active
|
|
27
|
+
used: set[str] = set()
|
|
28
|
+
created = False
|
|
29
|
+
for index, sheet in enumerate(sheets or []):
|
|
30
|
+
name = sheet.get("name") or f"Sheet{index + 1}"
|
|
31
|
+
rows = sheet.get("rows") or []
|
|
32
|
+
header = sheet.get("header", True)
|
|
33
|
+
ws = wb.create_sheet(title=_safe_title(name, used))
|
|
34
|
+
created = True
|
|
35
|
+
for row in rows:
|
|
36
|
+
ws.append(list(row))
|
|
37
|
+
if header and rows:
|
|
38
|
+
_style_header(ws, len(rows[0]))
|
|
39
|
+
_autosize(ws, rows)
|
|
40
|
+
if created:
|
|
41
|
+
wb.remove(default)
|
|
42
|
+
return wb
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def write_xlsx(sheets, output_path: str | Path) -> Path:
|
|
46
|
+
"""Render ``sheets`` and save the .xlsx to ``output_path``."""
|
|
47
|
+
path = Path(output_path)
|
|
48
|
+
render_xlsx(sheets).save(str(path))
|
|
49
|
+
return path
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _safe_title(name: str, used: set[str]) -> str:
|
|
53
|
+
cleaned = "".join(" " if c in _INVALID_TITLE_CHARS else c for c in str(name)).strip()
|
|
54
|
+
cleaned = cleaned[:_MAX_TITLE] or "Sheet"
|
|
55
|
+
base = cleaned
|
|
56
|
+
counter = 2
|
|
57
|
+
while cleaned in used:
|
|
58
|
+
suffix = f" ({counter})"
|
|
59
|
+
cleaned = base[: _MAX_TITLE - len(suffix)] + suffix
|
|
60
|
+
counter += 1
|
|
61
|
+
used.add(cleaned)
|
|
62
|
+
return cleaned
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _style_header(ws, ncols: int) -> None:
|
|
66
|
+
bold = Font(bold=True)
|
|
67
|
+
for col in range(1, ncols + 1):
|
|
68
|
+
ws.cell(row=1, column=col).font = bold
|
|
69
|
+
ws.freeze_panes = "A2"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _autosize(ws, rows) -> None:
|
|
73
|
+
widths: dict[int, int] = {}
|
|
74
|
+
for row in rows:
|
|
75
|
+
for idx, cell in enumerate(row, start=1):
|
|
76
|
+
length = len(str(cell)) if cell is not None else 0
|
|
77
|
+
if length > widths.get(idx, 0):
|
|
78
|
+
widths[idx] = length
|
|
79
|
+
for idx, length in widths.items():
|
|
80
|
+
ws.column_dimensions[get_column_letter(idx)].width = min(max(length + 2, 8), 60)
|