mcp-docgen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Touka Project (Otoha)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-docgen
3
+ Version: 0.1.0
4
+ Summary: Markdown-driven MCP server that generates Word (.docx), Excel (.xlsx) and PowerPoint (.pptx) documents — by the Touka project.
5
+ Keywords: mcp,model-context-protocol,docx,xlsx,pptx,word,excel,powerpoint,document-generation,markdown
6
+ Author: Otoha (Touka Project)
7
+ Author-email: Otoha (Touka Project) <whitekinglight@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Office/Business :: Office Suites
19
+ Classifier: Topic :: Text Processing :: Markup
20
+ Requires-Dist: python-docx>=1.1
21
+ Requires-Dist: openpyxl>=3.1
22
+ Requires-Dist: xlsxwriter>=3.2
23
+ Requires-Dist: python-pptx>=1.0
24
+ Requires-Dist: markdown-it-py>=3.0
25
+ Requires-Dist: mcp>=1.26
26
+ Maintainer: Touka Project
27
+ Maintainer-email: Touka Project <whitekinglight@gmail.com>
28
+ Requires-Python: >=3.10
29
+ Description-Content-Type: text/markdown
30
+
31
+ # mcp-docgen
32
+
33
+ A Markdown-driven [Model Context Protocol](https://modelcontextprotocol.io) (MCP) server
34
+ that turns Markdown — and structured data — into **Word (`.docx`)**,
35
+ **PowerPoint (`.pptx`)**, and **Excel (`.xlsx`)** files.
36
+
37
+ Built entirely on mature, permissively-licensed Python libraries
38
+ ([`python-docx`](https://github.com/python-openxml/python-docx),
39
+ [`python-pptx`](https://github.com/scanny/python-pptx),
40
+ [`openpyxl`](https://foss.heptapod.net/openpyxl/openpyxl),
41
+ [`XlsxWriter`](https://github.com/jmcnamara/XlsxWriter),
42
+ [`markdown-it-py`](https://github.com/executablebooks/markdown-it-py)) — no proprietary
43
+ dependencies. **MIT licensed.**
44
+
45
+ > Part of the **Touka** project: giving AI agents the ability to produce real Office
46
+ > documents using only open-source building blocks.
47
+
48
+ ## Why
49
+
50
+ LLMs are great at producing Markdown. `mcp-docgen` exposes three tools that convert that
51
+ Markdown into polished Office documents, so any MCP-capable assistant (Claude Desktop,
52
+ Touka, …) can hand a user a finished `.docx` / `.pptx` / `.xlsx`.
53
+
54
+ ## Install & run
55
+
56
+ Once published to PyPI:
57
+
58
+ ```bash
59
+ uvx mcp-docgen
60
+ ```
61
+
62
+ From a local checkout (before publishing):
63
+
64
+ ```bash
65
+ uv sync
66
+ uv run mcp-docgen
67
+ ```
68
+
69
+ The server speaks MCP over **stdio**.
70
+
71
+ ## MCP client configuration
72
+
73
+ ```jsonc
74
+ {
75
+ "mcpServers": {
76
+ "docgen": {
77
+ "command": "uvx",
78
+ "args": ["mcp-docgen"],
79
+ "env": { "MCP_DOCGEN_OUTPUT_DIR": "/absolute/path/to/output" }
80
+ }
81
+ }
82
+ }
83
+ ```
84
+
85
+ From a local checkout, swap the command for:
86
+
87
+ ```jsonc
88
+ { "command": "uv", "args": ["run", "--directory", "/path/to/mcp-docgen", "mcp-docgen"] }
89
+ ```
90
+
91
+ ## Tools
92
+
93
+ Each tool returns `{"path": "<absolute path of the written file>"}`.
94
+
95
+ ### `create_docx(markdown, output_path, title?)`
96
+
97
+ Markdown → Word. Supports headings, **bold** / *italic* / `inline code`, bullet and
98
+ numbered lists (nested), tables, block quotes, fenced code blocks, and horizontal rules.
99
+
100
+ ### `create_pptx(markdown, output_path, title?)`
101
+
102
+ Markdown → PowerPoint, using this slide convention:
103
+
104
+ | Markdown | Result |
105
+ | --- | --- |
106
+ | `# Heading` | starts a **new slide** (the heading becomes its title) |
107
+ | content below a heading | **bullet points** (nested lists indent) |
108
+ | `---` (horizontal rule) | an explicit **slide break** |
109
+
110
+ `title` adds a leading title slide.
111
+
112
+ ### `create_xlsx(sheets, output_path)`
113
+
114
+ Structured data → Excel. `sheets` is a list of worksheets:
115
+
116
+ ```json
117
+ [
118
+ {
119
+ "name": "Sales",
120
+ "rows": [["Region", "Revenue"], ["APAC", 1200000], ["EMEA", 900000]],
121
+ "header": true
122
+ }
123
+ ]
124
+ ```
125
+
126
+ Cells may be strings, numbers, booleans, or `null`. The first row is a **bold, frozen
127
+ header** unless the sheet sets `"header": false`.
128
+
129
+ ## Output directory & safety
130
+
131
+ All files are written inside one base directory — `MCP_DOCGEN_OUTPUT_DIR`, or `./out`
132
+ relative to the working directory by default. `output_path` is always interpreted
133
+ relative to that base, and any path that tries to escape it (via `..` or an absolute
134
+ path) is rejected. The server makes **no network calls** and spawns **no subprocesses**.
135
+
136
+ ## Examples
137
+
138
+ ```bash
139
+ uv run python examples/generate_samples.py
140
+ # writes report.docx, review.pptx and sales.xlsx into examples/output/
141
+ ```
142
+
143
+ ## Development
144
+
145
+ ```bash
146
+ uv sync
147
+ uv run pytest
148
+ uv run ruff check .
149
+ ```
150
+
151
+ ## License
152
+
153
+ MIT © 2026 Touka Project — see [LICENSE](LICENSE).
154
+
155
+ Document generation is powered by python-docx, python-pptx, openpyxl, and XlsxWriter;
156
+ Markdown parsing by markdown-it-py. All MIT/BSD licensed.
@@ -0,0 +1,126 @@
1
+ # mcp-docgen
2
+
3
+ A Markdown-driven [Model Context Protocol](https://modelcontextprotocol.io) (MCP) server
4
+ that turns Markdown — and structured data — into **Word (`.docx`)**,
5
+ **PowerPoint (`.pptx`)**, and **Excel (`.xlsx`)** files.
6
+
7
+ Built entirely on mature, permissively-licensed Python libraries
8
+ ([`python-docx`](https://github.com/python-openxml/python-docx),
9
+ [`python-pptx`](https://github.com/scanny/python-pptx),
10
+ [`openpyxl`](https://foss.heptapod.net/openpyxl/openpyxl),
11
+ [`XlsxWriter`](https://github.com/jmcnamara/XlsxWriter),
12
+ [`markdown-it-py`](https://github.com/executablebooks/markdown-it-py)) — no proprietary
13
+ dependencies. **MIT licensed.**
14
+
15
+ > Part of the **Touka** project: giving AI agents the ability to produce real Office
16
+ > documents using only open-source building blocks.
17
+
18
+ ## Why
19
+
20
+ LLMs are great at producing Markdown. `mcp-docgen` exposes three tools that convert that
21
+ Markdown into polished Office documents, so any MCP-capable assistant (Claude Desktop,
22
+ Touka, …) can hand a user a finished `.docx` / `.pptx` / `.xlsx`.
23
+
24
+ ## Install & run
25
+
26
+ Once published to PyPI:
27
+
28
+ ```bash
29
+ uvx mcp-docgen
30
+ ```
31
+
32
+ From a local checkout (before publishing):
33
+
34
+ ```bash
35
+ uv sync
36
+ uv run mcp-docgen
37
+ ```
38
+
39
+ The server speaks MCP over **stdio**.
40
+
41
+ ## MCP client configuration
42
+
43
+ ```jsonc
44
+ {
45
+ "mcpServers": {
46
+ "docgen": {
47
+ "command": "uvx",
48
+ "args": ["mcp-docgen"],
49
+ "env": { "MCP_DOCGEN_OUTPUT_DIR": "/absolute/path/to/output" }
50
+ }
51
+ }
52
+ }
53
+ ```
54
+
55
+ From a local checkout, swap the command for:
56
+
57
+ ```jsonc
58
+ { "command": "uv", "args": ["run", "--directory", "/path/to/mcp-docgen", "mcp-docgen"] }
59
+ ```
60
+
61
+ ## Tools
62
+
63
+ Each tool returns `{"path": "<absolute path of the written file>"}`.
64
+
65
+ ### `create_docx(markdown, output_path, title?)`
66
+
67
+ Markdown → Word. Supports headings, **bold** / *italic* / `inline code`, bullet and
68
+ numbered lists (nested), tables, block quotes, fenced code blocks, and horizontal rules.
69
+
70
+ ### `create_pptx(markdown, output_path, title?)`
71
+
72
+ Markdown → PowerPoint, using this slide convention:
73
+
74
+ | Markdown | Result |
75
+ | --- | --- |
76
+ | `# Heading` | starts a **new slide** (the heading becomes its title) |
77
+ | content below a heading | **bullet points** (nested lists indent) |
78
+ | `---` (horizontal rule) | an explicit **slide break** |
79
+
80
+ `title` adds a leading title slide.
81
+
82
+ ### `create_xlsx(sheets, output_path)`
83
+
84
+ Structured data → Excel. `sheets` is a list of worksheets:
85
+
86
+ ```json
87
+ [
88
+ {
89
+ "name": "Sales",
90
+ "rows": [["Region", "Revenue"], ["APAC", 1200000], ["EMEA", 900000]],
91
+ "header": true
92
+ }
93
+ ]
94
+ ```
95
+
96
+ Cells may be strings, numbers, booleans, or `null`. The first row is a **bold, frozen
97
+ header** unless the sheet sets `"header": false`.
98
+
99
+ ## Output directory & safety
100
+
101
+ All files are written inside one base directory — `MCP_DOCGEN_OUTPUT_DIR`, or `./out`
102
+ relative to the working directory by default. `output_path` is always interpreted
103
+ relative to that base, and any path that tries to escape it (via `..` or an absolute
104
+ path) is rejected. The server makes **no network calls** and spawns **no subprocesses**.
105
+
106
+ ## Examples
107
+
108
+ ```bash
109
+ uv run python examples/generate_samples.py
110
+ # writes report.docx, review.pptx and sales.xlsx into examples/output/
111
+ ```
112
+
113
+ ## Development
114
+
115
+ ```bash
116
+ uv sync
117
+ uv run pytest
118
+ uv run ruff check .
119
+ ```
120
+
121
+ ## License
122
+
123
+ MIT © 2026 Touka Project — see [LICENSE](LICENSE).
124
+
125
+ Document generation is powered by python-docx, python-pptx, openpyxl, and XlsxWriter;
126
+ Markdown parsing by markdown-it-py. All MIT/BSD licensed.
@@ -0,0 +1,70 @@
1
+ [project]
2
+ name = "mcp-docgen"
3
+ version = "0.1.0"
4
+ description = "Markdown-driven MCP server that generates Word (.docx), Excel (.xlsx) and PowerPoint (.pptx) documents — by the Touka project."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "MIT"
8
+ license-files = ["LICENSE"]
9
+ authors = [
10
+ { name = "Otoha (Touka Project)", email = "whitekinglight@gmail.com" },
11
+ ]
12
+ maintainers = [
13
+ { name = "Touka Project", email = "whitekinglight@gmail.com" },
14
+ ]
15
+ keywords = [
16
+ "mcp",
17
+ "model-context-protocol",
18
+ "docx",
19
+ "xlsx",
20
+ "pptx",
21
+ "word",
22
+ "excel",
23
+ "powerpoint",
24
+ "document-generation",
25
+ "markdown",
26
+ ]
27
+ classifiers = [
28
+ "Development Status :: 3 - Alpha",
29
+ "Intended Audience :: Developers",
30
+ "Operating System :: OS Independent",
31
+ "Programming Language :: Python :: 3",
32
+ "Programming Language :: Python :: 3.10",
33
+ "Programming Language :: Python :: 3.11",
34
+ "Programming Language :: Python :: 3.12",
35
+ "Programming Language :: Python :: 3.13",
36
+ "Topic :: Office/Business :: Office Suites",
37
+ "Topic :: Text Processing :: Markup",
38
+ ]
39
+ dependencies = [
40
+ "python-docx>=1.1",
41
+ "openpyxl>=3.1",
42
+ "xlsxwriter>=3.2",
43
+ "python-pptx>=1.0",
44
+ "markdown-it-py>=3.0",
45
+ "mcp>=1.26",
46
+ ]
47
+
48
+ [project.scripts]
49
+ mcp-docgen = "mcp_docgen.server:main"
50
+
51
+ [dependency-groups]
52
+ dev = [
53
+ "pytest>=8.0",
54
+ "ruff>=0.8",
55
+ ]
56
+
57
+ [build-system]
58
+ requires = ["uv_build>=0.10.5,<0.11.0"]
59
+ build-backend = "uv_build"
60
+
61
+ [tool.ruff]
62
+ line-length = 100
63
+ target-version = "py310"
64
+
65
+ [tool.ruff.lint]
66
+ select = ["E", "F", "I", "UP", "B", "SIM", "C4"]
67
+
68
+ [tool.pytest.ini_options]
69
+ testpaths = ["tests"]
70
+ addopts = "-q"
@@ -0,0 +1,3 @@
1
+ """mcp-docgen: a Markdown-driven MCP server for .docx / .xlsx / .pptx generation."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,90 @@
1
+ """Document intermediate representation (IR).
2
+
3
+ A writer-agnostic block model produced by :mod:`mcp_docgen.markdown_parser` and
4
+ consumed by the docx / pptx writers. Keeping the IR in one place (SRP) lets every
5
+ writer share a single normalized structure instead of re-walking Markdown tokens.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+
12
+
13
+ @dataclass
14
+ class Run:
15
+ """A styled inline text run."""
16
+
17
+ text: str
18
+ bold: bool = False
19
+ italic: bool = False
20
+ code: bool = False
21
+ strike: bool = False
22
+ link: str | None = None
23
+
24
+
25
+ @dataclass
26
+ class Heading:
27
+ """A section heading (``level`` 1-6)."""
28
+
29
+ level: int
30
+ runs: list[Run]
31
+
32
+
33
+ @dataclass
34
+ class Paragraph:
35
+ """A block of inline text."""
36
+
37
+ runs: list[Run]
38
+
39
+
40
+ @dataclass
41
+ class ListItem:
42
+ """One item of a list; may itself contain blocks (e.g. nested lists)."""
43
+
44
+ blocks: list[Block] = field(default_factory=list)
45
+
46
+
47
+ @dataclass
48
+ class ListBlock:
49
+ """An ordered or unordered list."""
50
+
51
+ ordered: bool
52
+ items: list[ListItem]
53
+
54
+
55
+ @dataclass
56
+ class CodeBlock:
57
+ """A fenced or indented code block."""
58
+
59
+ text: str
60
+ language: str | None = None
61
+
62
+
63
+ @dataclass
64
+ class BlockQuote:
65
+ """A block quote wrapping nested blocks."""
66
+
67
+ blocks: list[Block]
68
+
69
+
70
+ @dataclass
71
+ class TableCell:
72
+ """A single table cell."""
73
+
74
+ runs: list[Run]
75
+
76
+
77
+ @dataclass
78
+ class Table:
79
+ """A table with a header row and zero or more body rows."""
80
+
81
+ header: list[TableCell]
82
+ rows: list[list[TableCell]]
83
+
84
+
85
+ @dataclass
86
+ class ThematicBreak:
87
+ """A horizontal rule (``---``). Used by the pptx writer as a slide break."""
88
+
89
+
90
+ Block = Heading | Paragraph | ListBlock | CodeBlock | BlockQuote | Table | ThematicBreak
@@ -0,0 +1,146 @@
1
+ """Render the document IR into a Word (.docx) file via python-docx."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ from pathlib import Path
7
+
8
+ from docx import Document
9
+ from docx.oxml import OxmlElement
10
+ from docx.oxml.ns import qn
11
+ from docx.shared import Inches, Pt, RGBColor
12
+
13
+ from .blocks import (
14
+ BlockQuote,
15
+ CodeBlock,
16
+ Heading,
17
+ ListBlock,
18
+ Paragraph,
19
+ Table,
20
+ ThematicBreak,
21
+ )
22
+
23
+ _MONO_FONT = "Consolas"
24
+ _LINK_COLOR = RGBColor(0x06, 0x6C, 0xC0)
25
+
26
+
27
+ def render_docx(blocks, title: str | None = None):
28
+ """Build and return a python-docx ``Document`` from IR blocks."""
29
+ doc = Document()
30
+ if title:
31
+ doc.add_paragraph(title, style="Title")
32
+ for block in blocks:
33
+ _render_block(doc, block)
34
+ return doc
35
+
36
+
37
+ def write_docx(blocks, output_path: str | Path, title: str | None = None) -> Path:
38
+ """Render ``blocks`` and save the .docx to ``output_path``."""
39
+ path = Path(output_path)
40
+ render_docx(blocks, title=title).save(str(path))
41
+ return path
42
+
43
+
44
+ def _render_block(doc, block) -> None:
45
+ if isinstance(block, Heading):
46
+ _add_runs(doc.add_paragraph(style=_heading_style(block.level)), block.runs)
47
+ elif isinstance(block, Paragraph):
48
+ _add_runs(doc.add_paragraph(), block.runs)
49
+ elif isinstance(block, ListBlock):
50
+ _render_list(doc, block, level=0)
51
+ elif isinstance(block, CodeBlock):
52
+ _render_code(doc, block)
53
+ elif isinstance(block, BlockQuote):
54
+ _render_quote(doc, block)
55
+ elif isinstance(block, Table):
56
+ _render_table(doc, block)
57
+ elif isinstance(block, ThematicBreak):
58
+ _add_horizontal_rule(doc)
59
+
60
+
61
+ def _heading_style(level: int) -> str:
62
+ return f"Heading {min(max(level, 1), 9)}"
63
+
64
+
65
+ def _add_runs(paragraph, runs, *, force_bold: bool = False) -> None:
66
+ for run in runs:
67
+ r = paragraph.add_run(run.text)
68
+ if run.bold or force_bold:
69
+ r.bold = True
70
+ if run.italic:
71
+ r.italic = True
72
+ if run.strike:
73
+ r.font.strike = True
74
+ if run.code:
75
+ r.font.name = _MONO_FONT
76
+ if run.link:
77
+ r.font.underline = True
78
+ r.font.color.rgb = _LINK_COLOR
79
+
80
+
81
+ def _render_list(doc, list_block: ListBlock, level: int) -> None:
82
+ style = "List Number" if list_block.ordered else "List Bullet"
83
+ for item in list_block.items:
84
+ for child in item.blocks:
85
+ if isinstance(child, Paragraph):
86
+ p = doc.add_paragraph(style=style)
87
+ if level >= 1:
88
+ p.paragraph_format.left_indent = Inches(0.25 * (level + 1))
89
+ _add_runs(p, child.runs)
90
+ elif isinstance(child, ListBlock):
91
+ _render_list(doc, child, level + 1)
92
+ else:
93
+ _render_block(doc, child)
94
+
95
+
96
+ def _render_code(doc, code: CodeBlock) -> None:
97
+ lines = code.text.split("\n")
98
+ if lines and lines[-1] == "":
99
+ lines = lines[:-1]
100
+ p = doc.add_paragraph()
101
+ for i, line in enumerate(lines):
102
+ r = p.add_run(line)
103
+ r.font.name = _MONO_FONT
104
+ r.font.size = Pt(9)
105
+ if i != len(lines) - 1:
106
+ r.add_break()
107
+
108
+
109
+ def _render_quote(doc, quote: BlockQuote) -> None:
110
+ for child in quote.blocks:
111
+ if isinstance(child, Paragraph):
112
+ _add_runs(doc.add_paragraph(style="Quote"), child.runs)
113
+ else:
114
+ _render_block(doc, child)
115
+
116
+
117
+ def _render_table(doc, table: Table) -> None:
118
+ ncols = len(table.header) or (len(table.rows[0]) if table.rows else 0)
119
+ if ncols == 0:
120
+ return
121
+ docx_table = doc.add_table(rows=0, cols=ncols)
122
+ with contextlib.suppress(KeyError): # template always ships Table Grid
123
+ docx_table.style = "Table Grid"
124
+ if table.header:
125
+ cells = docx_table.add_row().cells
126
+ for i, cell in enumerate(table.header):
127
+ _add_runs(cells[i].paragraphs[0], cell.runs, force_bold=True)
128
+ for row in table.rows:
129
+ cells = docx_table.add_row().cells
130
+ for i, cell in enumerate(row):
131
+ if i < ncols:
132
+ _add_runs(cells[i].paragraphs[0], cell.runs)
133
+
134
+
135
+ def _add_horizontal_rule(doc) -> None:
136
+ """Append an empty paragraph carrying a bottom border (a visual ``<hr>``)."""
137
+ p = doc.add_paragraph()
138
+ p_pr = p._p.get_or_add_pPr()
139
+ borders = OxmlElement("w:pBdr")
140
+ bottom = OxmlElement("w:bottom")
141
+ bottom.set(qn("w:val"), "single")
142
+ bottom.set(qn("w:sz"), "6")
143
+ bottom.set(qn("w:space"), "1")
144
+ bottom.set(qn("w:color"), "auto")
145
+ borders.append(bottom)
146
+ p_pr.append(borders)
@@ -0,0 +1,147 @@
1
+ """Parse Markdown into the document IR (:mod:`mcp_docgen.blocks`).
2
+
3
+ Pure transformation: text in, ``list[Block]`` out, no IO. Built on markdown-it-py's
4
+ :class:`~markdown_it.tree.SyntaxTreeNode`, which turns the flat token stream into a
5
+ nested tree that is straightforward to walk.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, replace
11
+
12
+ from markdown_it import MarkdownIt
13
+ from markdown_it.tree import SyntaxTreeNode
14
+
15
+ from .blocks import (
16
+ Block,
17
+ BlockQuote,
18
+ CodeBlock,
19
+ Heading,
20
+ ListBlock,
21
+ ListItem,
22
+ Paragraph,
23
+ Run,
24
+ Table,
25
+ TableCell,
26
+ ThematicBreak,
27
+ )
28
+
29
+ # CommonMark + GitHub-flavoured tables and strikethrough. ``ignoreInvalid=True`` keeps
30
+ # construction safe across markdown-it-py versions; linkify is intentionally left off
31
+ # to avoid the optional ``linkify-it-py`` dependency.
32
+ _MD = MarkdownIt("commonmark").enable(["table", "strikethrough"], True)
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class _Style:
37
+ """Inline styling state carried down the recursion (internal)."""
38
+
39
+ bold: bool = False
40
+ italic: bool = False
41
+ code: bool = False
42
+ strike: bool = False
43
+ link: str | None = None
44
+
45
+
46
+ def parse_markdown(text: str) -> list[Block]:
47
+ """Parse a Markdown string into a list of document blocks (IR)."""
48
+ tokens = _MD.parse(text or "")
49
+ root = SyntaxTreeNode(tokens)
50
+ return _blocks(root.children)
51
+
52
+
53
+ def _blocks(nodes: list[SyntaxTreeNode]) -> list[Block]:
54
+ out: list[Block] = []
55
+ for node in nodes:
56
+ block = _block(node)
57
+ if block is not None:
58
+ out.append(block)
59
+ return out
60
+
61
+
62
+ def _block(node: SyntaxTreeNode) -> Block | None:
63
+ t = node.type
64
+ if t == "heading":
65
+ return Heading(level=int(node.tag[1:]), runs=_inline_of(node))
66
+ if t == "paragraph":
67
+ return Paragraph(runs=_inline_of(node))
68
+ if t == "bullet_list":
69
+ return ListBlock(ordered=False, items=_items(node))
70
+ if t == "ordered_list":
71
+ return ListBlock(ordered=True, items=_items(node))
72
+ if t in ("fence", "code_block"):
73
+ language = (node.info or "").strip() or None
74
+ return CodeBlock(text=node.content, language=language)
75
+ if t == "blockquote":
76
+ return BlockQuote(blocks=_blocks(node.children))
77
+ if t == "table":
78
+ return _table(node)
79
+ if t == "hr":
80
+ return ThematicBreak()
81
+ return None
82
+
83
+
84
+ def _items(list_node: SyntaxTreeNode) -> list[ListItem]:
85
+ return [
86
+ ListItem(blocks=_blocks(child.children))
87
+ for child in list_node.children
88
+ if child.type == "list_item"
89
+ ]
90
+
91
+
92
+ def _table(table_node: SyntaxTreeNode) -> Table:
93
+ header: list[TableCell] = []
94
+ rows: list[list[TableCell]] = []
95
+ for section in table_node.children:
96
+ if section.type == "thead":
97
+ for tr in section.children:
98
+ header = [TableCell(runs=_inline_of(cell)) for cell in tr.children]
99
+ elif section.type == "tbody":
100
+ for tr in section.children:
101
+ rows.append([TableCell(runs=_inline_of(cell)) for cell in tr.children])
102
+ return Table(header=header, rows=rows)
103
+
104
+
105
+ def _inline_of(node: SyntaxTreeNode) -> list[Run]:
106
+ """Collect styled runs from a block node wrapping a single ``inline`` child."""
107
+ for child in node.children:
108
+ if child.type == "inline":
109
+ return _runs(child.children, _Style())
110
+ return []
111
+
112
+
113
+ def _runs(nodes: list[SyntaxTreeNode], style: _Style) -> list[Run]:
114
+ out: list[Run] = []
115
+ for node in nodes:
116
+ t = node.type
117
+ if t == "text":
118
+ if node.content:
119
+ out.append(_run(style, node.content))
120
+ elif t == "code_inline":
121
+ out.append(_run(replace(style, code=True), node.content))
122
+ elif t == "softbreak":
123
+ out.append(_run(style, " "))
124
+ elif t == "hardbreak":
125
+ out.append(_run(style, "\n"))
126
+ elif t == "strong":
127
+ out.extend(_runs(node.children, replace(style, bold=True)))
128
+ elif t == "em":
129
+ out.extend(_runs(node.children, replace(style, italic=True)))
130
+ elif t == "s":
131
+ out.extend(_runs(node.children, replace(style, strike=True)))
132
+ elif t == "link":
133
+ out.extend(_runs(node.children, replace(style, link=node.attrs.get("href"))))
134
+ elif node.children:
135
+ out.extend(_runs(node.children, style))
136
+ return out
137
+
138
+
139
+ def _run(style: _Style, text: str) -> Run:
140
+ return Run(
141
+ text=text,
142
+ bold=style.bold,
143
+ italic=style.italic,
144
+ code=style.code,
145
+ strike=style.strike,
146
+ link=style.link,
147
+ )
@@ -0,0 +1,45 @@
1
+ """Output-path resolution and jail for generated files.
2
+
3
+ All writes are confined to a single base directory (``MCP_DOCGEN_OUTPUT_DIR`` or,
4
+ by default, ``./out`` under the working directory). Any path that resolves outside
5
+ the base — via ``..`` traversal or an absolute path — is rejected. This keeps a
6
+ server that anyone can install from writing arbitrary files on the host.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from pathlib import Path
13
+
14
+ ENV_OUTPUT_DIR = "MCP_DOCGEN_OUTPUT_DIR"
15
+ DEFAULT_SUBDIR = "out"
16
+
17
+
18
+ def output_base() -> Path:
19
+ """Return (creating if needed) the absolute base directory for outputs."""
20
+ raw = os.environ.get(ENV_OUTPUT_DIR)
21
+ base = Path(raw) if raw else Path.cwd() / DEFAULT_SUBDIR
22
+ base = base.resolve()
23
+ base.mkdir(parents=True, exist_ok=True)
24
+ return base
25
+
26
+
27
+ def resolve_output_path(output_path: str, expected_suffix: str) -> Path:
28
+ """Resolve ``output_path`` inside the jail, coercing the file suffix.
29
+
30
+ Raises:
31
+ ValueError: if ``output_path`` is empty.
32
+ PermissionError: if the resolved path escapes the output base directory.
33
+ """
34
+ if not output_path or not str(output_path).strip():
35
+ raise ValueError("output_path must be a non-empty path")
36
+ base = output_base()
37
+ target = (base / output_path).resolve()
38
+ try:
39
+ target.relative_to(base)
40
+ except ValueError as exc:
41
+ raise PermissionError(f"output_path escapes the allowed output directory ({base})") from exc
42
+ if target.suffix.lower() != expected_suffix:
43
+ target = target.with_suffix(expected_suffix)
44
+ target.parent.mkdir(parents=True, exist_ok=True)
45
+ return target
@@ -0,0 +1,146 @@
1
+ """Render the document IR into a PowerPoint (.pptx) deck via python-pptx.
2
+
3
+ Slide convention:
4
+ * ``# H1`` -> starts a new slide; the heading text becomes the slide title.
5
+ * content below -> bullet points in the slide body (lists nest by indent level).
6
+ * ``---`` (hr) -> an explicit slide break (a new, untitled slide).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import replace
12
+ from pathlib import Path
13
+
14
+ from pptx import Presentation
15
+
16
+ from .blocks import (
17
+ BlockQuote,
18
+ CodeBlock,
19
+ Heading,
20
+ ListBlock,
21
+ Paragraph,
22
+ Run,
23
+ Table,
24
+ ThematicBreak,
25
+ )
26
+
27
+ _MONO_FONT = "Consolas"
28
+ _MAX_LEVEL = 4
29
+
30
+
31
+ def render_pptx(blocks, title: str | None = None):
32
+ """Build and return a python-pptx ``Presentation`` from IR blocks."""
33
+ prs = Presentation()
34
+ if title:
35
+ _add_title_slide(prs, title)
36
+ for slide_title, content in _split_into_slides(blocks):
37
+ _add_content_slide(prs, slide_title, content)
38
+ return prs
39
+
40
+
41
+ def write_pptx(blocks, output_path: str | Path, title: str | None = None) -> Path:
42
+ """Render ``blocks`` and save the .pptx to ``output_path``."""
43
+ path = Path(output_path)
44
+ render_pptx(blocks, title=title).save(str(path))
45
+ return path
46
+
47
+
48
+ def _split_into_slides(blocks):
49
+ """Group the block stream into (title, content_blocks) slides."""
50
+ slides: list[tuple[str | None, list]] = []
51
+ cur_title: str | None = None
52
+ cur_blocks: list = []
53
+
54
+ def flush() -> None:
55
+ nonlocal cur_title, cur_blocks
56
+ if cur_title is not None or cur_blocks:
57
+ slides.append((cur_title, cur_blocks))
58
+ cur_title, cur_blocks = None, []
59
+
60
+ for block in blocks:
61
+ if isinstance(block, Heading) and block.level == 1:
62
+ flush()
63
+ cur_title = _runs_text(block.runs)
64
+ elif isinstance(block, ThematicBreak):
65
+ flush()
66
+ else:
67
+ cur_blocks.append(block)
68
+ flush()
69
+ return slides
70
+
71
+
72
+ def _add_title_slide(prs, title: str) -> None:
73
+ slide = prs.slides.add_slide(prs.slide_layouts[0])
74
+ slide.shapes.title.text = title
75
+
76
+
77
+ def _add_content_slide(prs, title: str | None, blocks) -> None:
78
+ slide = prs.slides.add_slide(prs.slide_layouts[1])
79
+ slide.shapes.title.text = title or ""
80
+ body = slide.placeholders[1].text_frame
81
+ body.clear()
82
+ first = True
83
+ for runs, level in _bullets(blocks, 0):
84
+ para = body.paragraphs[0] if first else body.add_paragraph()
85
+ first = False
86
+ para.level = min(level, _MAX_LEVEL)
87
+ _write_runs(para, runs)
88
+
89
+
90
+ def _bullets(blocks, level: int) -> list[tuple[list[Run], int]]:
91
+ """Flatten content blocks into (runs, indent_level) bullet rows."""
92
+ out: list[tuple[list[Run], int]] = []
93
+ for block in blocks:
94
+ if isinstance(block, Paragraph):
95
+ out.append((block.runs, level))
96
+ elif isinstance(block, Heading):
97
+ out.append(([replace(r, bold=True) for r in block.runs], level))
98
+ elif isinstance(block, ListBlock):
99
+ for item in block.items:
100
+ for child in item.blocks:
101
+ next_level = level + 1 if isinstance(child, ListBlock) else level
102
+ out.extend(_bullets([child], next_level))
103
+ elif isinstance(block, CodeBlock):
104
+ for line in _code_lines(block.text):
105
+ out.append(([Run(text=line, code=True)], level))
106
+ elif isinstance(block, BlockQuote):
107
+ for child in block.blocks:
108
+ if isinstance(child, Paragraph):
109
+ out.append(([replace(r, italic=True) for r in child.runs], level))
110
+ else:
111
+ out.extend(_bullets([child], level))
112
+ elif isinstance(block, Table):
113
+ out.extend(_table_bullets(block, level))
114
+ return out
115
+
116
+
117
+ def _table_bullets(table: Table, level: int) -> list[tuple[list[Run], int]]:
118
+ rows = []
119
+ if table.header:
120
+ rows.append(([Run(text=" | ".join(_runs_text(c.runs) for c in table.header))], level))
121
+ for row in table.rows:
122
+ rows.append(([Run(text=" | ".join(_runs_text(c.runs) for c in row))], level))
123
+ return rows
124
+
125
+
126
+ def _code_lines(text: str) -> list[str]:
127
+ lines = text.split("\n")
128
+ if lines and lines[-1] == "":
129
+ lines = lines[:-1]
130
+ return lines
131
+
132
+
133
+ def _write_runs(paragraph, runs) -> None:
134
+ for run in runs:
135
+ r = paragraph.add_run()
136
+ r.text = run.text
137
+ if run.bold:
138
+ r.font.bold = True
139
+ if run.italic:
140
+ r.font.italic = True
141
+ if run.code:
142
+ r.font.name = _MONO_FONT
143
+
144
+
145
+ def _runs_text(runs) -> str:
146
+ return "".join(r.text for r in runs)
@@ -0,0 +1,112 @@
1
+ """mcp-docgen: a Markdown-driven MCP server that generates Office documents.
2
+
3
+ Exposes three tools over the Model Context Protocol:
4
+ * ``create_docx`` — Markdown -> Word (.docx)
5
+ * ``create_pptx`` — Markdown -> PowerPoint (.pptx)
6
+ * ``create_xlsx`` — structured rows -> Excel (.xlsx)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from mcp.server.fastmcp import FastMCP
12
+
13
+ from .docx_writer import write_docx
14
+ from .markdown_parser import parse_markdown
15
+ from .paths import resolve_output_path
16
+ from .pptx_writer import write_pptx
17
+ from .xlsx_writer import write_xlsx
18
+
19
+ MAX_MARKDOWN_CHARS = 1_000_000
20
+ MAX_SHEET_CELLS = 1_000_000
21
+
22
+ mcp = FastMCP("mcp-docgen")
23
+
24
+
25
+ @mcp.tool()
26
+ def create_docx(markdown: str, output_path: str, title: str | None = None) -> dict:
27
+ """Create a Word (.docx) document from Markdown.
28
+
29
+ Args:
30
+ markdown: Document body as Markdown — headings, lists, tables, bold/italic,
31
+ inline code, fenced code blocks and block quotes are supported.
32
+ output_path: Destination filename, relative to the server's output directory.
33
+ The ``.docx`` suffix is enforced.
34
+ title: Optional heading rendered with Word's "Title" style at the top.
35
+
36
+ Returns:
37
+ ``{"path": <absolute path of the written .docx>}``.
38
+ """
39
+ _check_text(markdown)
40
+ target = resolve_output_path(output_path, ".docx")
41
+ write_docx(parse_markdown(markdown), target, title=title)
42
+ return {"path": str(target)}
43
+
44
+
45
+ @mcp.tool()
46
+ def create_pptx(markdown: str, output_path: str, title: str | None = None) -> dict:
47
+ """Create a PowerPoint (.pptx) deck from Markdown.
48
+
49
+ Slide convention: each top-level ``# Heading`` starts a new slide and becomes its
50
+ title; content beneath becomes bullet points (nested lists indent); a ``---``
51
+ horizontal rule forces a slide break.
52
+
53
+ Args:
54
+ markdown: Slide content as Markdown.
55
+ output_path: Destination filename, relative to the server's output directory.
56
+ The ``.pptx`` suffix is enforced.
57
+ title: Optional text for a leading title slide.
58
+
59
+ Returns:
60
+ ``{"path": <absolute path of the written .pptx>}``.
61
+ """
62
+ _check_text(markdown)
63
+ target = resolve_output_path(output_path, ".pptx")
64
+ write_pptx(parse_markdown(markdown), target, title=title)
65
+ return {"path": str(target)}
66
+
67
+
68
+ @mcp.tool()
69
+ def create_xlsx(sheets: list[dict], output_path: str) -> dict:
70
+ """Create an Excel (.xlsx) workbook from structured sheet data.
71
+
72
+ Args:
73
+ sheets: A list of worksheets, each ``{"name": str, "rows": [[cell, ...], ...]}``.
74
+ Cells may be strings, numbers, booleans or null. The first row of each sheet
75
+ is a bold, frozen header unless the sheet sets ``"header": false``.
76
+ output_path: Destination filename, relative to the server's output directory.
77
+ The ``.xlsx`` suffix is enforced.
78
+
79
+ Returns:
80
+ ``{"path": <absolute path of the written .xlsx>}``.
81
+ """
82
+ _check_sheets(sheets)
83
+ target = resolve_output_path(output_path, ".xlsx")
84
+ write_xlsx(sheets, target)
85
+ return {"path": str(target)}
86
+
87
+
88
+ def _check_text(text: str) -> None:
89
+ if not isinstance(text, str):
90
+ raise ValueError("markdown must be a string")
91
+ if len(text) > MAX_MARKDOWN_CHARS:
92
+ raise ValueError(f"markdown exceeds the {MAX_MARKDOWN_CHARS}-character limit")
93
+
94
+
95
+ def _check_sheets(sheets) -> None:
96
+ if not isinstance(sheets, list):
97
+ raise ValueError("sheets must be a list of sheet objects")
98
+ total = 0
99
+ for sheet in sheets:
100
+ for row in (sheet or {}).get("rows") or []:
101
+ total += len(row)
102
+ if total > MAX_SHEET_CELLS:
103
+ raise ValueError(f"sheets exceed the {MAX_SHEET_CELLS}-cell limit")
104
+
105
+
106
+ def main() -> None:
107
+ """Console entry point: run the MCP server over stdio."""
108
+ mcp.run()
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
@@ -0,0 +1,80 @@
1
+ """Render structured sheet data into an Excel (.xlsx) workbook via openpyxl.
2
+
3
+ Input shape (JSON-friendly), one dict per worksheet::
4
+
5
+ [{"name": "Sheet1", "rows": [["Header A", "Header B"], ["a1", "b1"]], "header": true}]
6
+
7
+ ``rows`` is a list of rows, each a list of cell values (str / int / float / bool / null).
8
+ The first row is treated as a bold, frozen header unless ``header`` is ``false``.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ from openpyxl import Workbook
16
+ from openpyxl.styles import Font
17
+ from openpyxl.utils import get_column_letter
18
+
19
+ _INVALID_TITLE_CHARS = set("[]:*?/\\")
20
+ _MAX_TITLE = 31
21
+
22
+
23
+ def render_xlsx(sheets):
24
+ """Build and return an openpyxl ``Workbook`` from structured sheet data."""
25
+ wb = Workbook()
26
+ default = wb.active
27
+ used: set[str] = set()
28
+ created = False
29
+ for index, sheet in enumerate(sheets or []):
30
+ name = sheet.get("name") or f"Sheet{index + 1}"
31
+ rows = sheet.get("rows") or []
32
+ header = sheet.get("header", True)
33
+ ws = wb.create_sheet(title=_safe_title(name, used))
34
+ created = True
35
+ for row in rows:
36
+ ws.append(list(row))
37
+ if header and rows:
38
+ _style_header(ws, len(rows[0]))
39
+ _autosize(ws, rows)
40
+ if created:
41
+ wb.remove(default)
42
+ return wb
43
+
44
+
45
+ def write_xlsx(sheets, output_path: str | Path) -> Path:
46
+ """Render ``sheets`` and save the .xlsx to ``output_path``."""
47
+ path = Path(output_path)
48
+ render_xlsx(sheets).save(str(path))
49
+ return path
50
+
51
+
52
+ def _safe_title(name: str, used: set[str]) -> str:
53
+ cleaned = "".join(" " if c in _INVALID_TITLE_CHARS else c for c in str(name)).strip()
54
+ cleaned = cleaned[:_MAX_TITLE] or "Sheet"
55
+ base = cleaned
56
+ counter = 2
57
+ while cleaned in used:
58
+ suffix = f" ({counter})"
59
+ cleaned = base[: _MAX_TITLE - len(suffix)] + suffix
60
+ counter += 1
61
+ used.add(cleaned)
62
+ return cleaned
63
+
64
+
65
+ def _style_header(ws, ncols: int) -> None:
66
+ bold = Font(bold=True)
67
+ for col in range(1, ncols + 1):
68
+ ws.cell(row=1, column=col).font = bold
69
+ ws.freeze_panes = "A2"
70
+
71
+
72
+ def _autosize(ws, rows) -> None:
73
+ widths: dict[int, int] = {}
74
+ for row in rows:
75
+ for idx, cell in enumerate(row, start=1):
76
+ length = len(str(cell)) if cell is not None else 0
77
+ if length > widths.get(idx, 0):
78
+ widths[idx] = length
79
+ for idx, length in widths.items():
80
+ ws.column_dimensions[get_column_letter(idx)].width = min(max(length + 2, 8), 60)