markdocx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+
@@ -0,0 +1 @@
1
+ 3.14
markdocx-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 shynneri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: markdocx
3
+ Version: 0.1.0
4
+ Summary: Convert AI-generated Markdown textbooks to polished DOCX with native math equations and syntax-highlighted code
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.14
8
+ Requires-Dist: latex2mathml==3.78.1
9
+ Requires-Dist: lxml==6.0.2
10
+ Requires-Dist: markdown-it-py==4.0.0
11
+ Requires-Dist: matplotlib==3.10.8
12
+ Requires-Dist: mdit-py-plugins==0.5.0
13
+ Requires-Dist: pillow==12.1.0
14
+ Requires-Dist: pygments==2.19.2
15
+ Requires-Dist: python-docx==1.2.0
16
+ Description-Content-Type: text/markdown
17
+
18
+ <div align="center">
19
+
20
+ # MD to DOCX
21
+
22
+ **A Markdown-to-Word converter built for AI-generated textbooks**
23
+
24
+ Convert Markdown files — complete with LaTeX math, syntax-highlighted code, tables, and images — into polished `.docx` documents in one command.
25
+
26
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
27
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
28
+
29
+ </div>
30
+
31
+ ---
32
+
33
+ ## Why This Exists
34
+
35
+ Large language models (ChatGPT, Claude, Gemini, …) produce great Markdown, but the journey from `.md` to a well-formatted **Word document** is painful:
36
+
37
+ - LaTeX formulas become plain text or broken images
38
+ - Code blocks lose their highlighting
39
+ - Tables, lists, and blockquotes need manual reformatting
40
+
41
+ **MD to DOCX** bridges that gap. Feed it a Markdown file that follows a few simple rules and get a publication-ready `.docx` — math rendered as native Word OMML equations, code with VS Code–style colors, and everything else properly formatted.
42
+
43
+ ## Features
44
+
45
+ | Category | What you get |
46
+ |:---------|:-------------|
47
+ | **Math** | Inline (`$...$`) and display (`$$...$$`) LaTeX → native OMML equations in Word |
48
+ | **Code** | 30+ languages with Pygments syntax highlighting, VS Code light theme, language labels |
49
+ | **Tables** | Auto-formatted Table Grid — bold header row, left/center/right alignment, inline math in cells |
50
+ | **Lists** | Bullet (•◦▪) and numbered lists, up to 6 nesting levels |
51
+ | **Other** | Blockquotes, horizontal rules, clickable hyperlinks, local images, footnotes |
52
+
53
+ ## Quick Start
54
+
55
+ ### Installation
56
+
57
+ ```bash
58
+ git clone https://github.com/<your-username>/md_to_docx.git
59
+ cd md_to_docx
60
+
61
+ # Using uv (recommended)
62
+ uv sync
63
+
64
+ # Or using pip
65
+ pip install -r requirements.txt
66
+ ```
67
+
68
+ ### Usage
69
+
70
+ ```bash
71
+ # Convert a single file
72
+ python main.py input.md
73
+ python main.py input.md -o output.docx
74
+
75
+ # Convert an entire directory
76
+ python main.py ./chapters/ -o ./output/
77
+
78
+ # Recursively search subdirectories
79
+ python main.py ./chapters/ -o ./output/ -r
80
+
81
+ # Verbose logging
82
+ python main.py input.md -v
83
+ ```
84
+
85
+ ### CLI Options
86
+
87
+ | Flag | Description |
88
+ |:-----|:------------|
89
+ | `input` | Markdown file or directory to convert |
90
+ | `-o, --output` | Output file or directory path |
91
+ | `-r, --recursive` | Recursively find `.md` files in subdirectories |
92
+ | `-v, --verbose` | Show detailed processing logs |
93
+
94
+ ## How It Works
95
+
96
+ ```
97
+ Markdown file
98
+
99
+
100
+ md_parser.py ─── markdown-it-py tokenizer
101
+
102
+
103
+ docx_builder.py ─── walks the token stream, builds Word elements
104
+ ├── math_renderer.py ─── LaTeX → MathML → OMML (native Word equations)
105
+ ├── code_renderer.py ─── Pygments lexer → colored Word runs
106
+ └── styles.py ─── fonts, colors, spacing presets
107
+
108
+
109
+ .docx file ─── python-docx output
110
+ ```
111
+
112
+ ### Math Pipeline
113
+
114
+ LaTeX is converted to **native OMML** (Office Math Markup Language), not images. This means formulas are editable, scale perfectly, and look like they were typed in Word's equation editor.
115
+
116
+ ```
117
+ LaTeX string → latex2mathml → MathML → XSLT → OMML → Word paragraph
118
+ ```
119
+
120
+ ### Code Pipeline
121
+
122
+ ```
123
+ Source code → Pygments lexer + VS Code theme → colored Word runs inside a shaded table cell
124
+ ```
125
+
126
+ ## Project Structure
127
+
128
+ ```
129
+ md_to_docx/
130
+ ├── main.py # CLI entry point
131
+ ├── pyproject.toml # Project metadata & dependencies
132
+ ├── requirements.txt # Pip-compatible dependency list
133
+ ├── converter/
134
+ │ ├── __init__.py
135
+ │ ├── core.py # Top-level orchestrator
136
+ │ ├── md_parser.py # Markdown → token stream
137
+ │ ├── math_renderer.py # LaTeX → OMML (native Word math)
138
+ │ ├── code_renderer.py # Code → syntax-highlighted Word runs
139
+ │ ├── docx_builder.py # Token stream → DOCX elements
140
+ │ └── styles.py # Fonts, colors, and layout presets
141
+ └── rule/
142
+ ├── ai_gen_doc_rule.md # AI writing rules (Vietnamese)
143
+ └── ai_gen_doc_rule_en.md # AI writing rules (English)
144
+ ```
145
+
146
+ ## Dependencies
147
+
148
+ | Package | Version | Role |
149
+ |:--------|:--------|:-----|
150
+ | [python-docx](https://python-docx.readthedocs.io/) | 1.2.0 | DOCX generation |
151
+ | [markdown-it-py](https://github.com/executablebooks/markdown-it-py) | 4.0.0 | Markdown parsing |
152
+ | [mdit-py-plugins](https://github.com/executablebooks/mdit-py-plugins) | 0.5.0 | Math & footnote plugins |
153
+ | [latex2mathml](https://github.com/roniemartinez/latex2mathml) | 3.78.1 | LaTeX → MathML conversion |
154
+ | [lxml](https://lxml.de/) | 6.0.2 | XML/XSLT processing |
155
+ | [Pygments](https://pygments.org/) | 2.19.2 | Syntax highlighting |
156
+ | [matplotlib](https://matplotlib.org/) | 3.10.8 | LaTeX rendering (fallback) |
157
+ | [Pillow](https://python-pillow.org/) | 12.1.0 | Image processing |
158
+
159
+ ## AI Writing Rules
160
+
161
+ The `rule/` directory contains detailed guidelines for prompting AI models to produce Markdown that converts cleanly:
162
+
163
+ | File | Language | Description |
164
+ |:-----|:---------|:------------|
165
+ | `rule/ai_gen_doc_rule.md` | Vietnamese | Full rule set — heading structure, LaTeX constraints, code block format, tables, etc. |
166
+ | `rule/ai_gen_doc_rule_en.md` | English | Same rules, English version |
167
+
168
+ **How to use:** Paste the contents of the appropriate rule file into your AI system prompt (or at the start of the conversation) before asking it to write textbook content.
169
+
170
+ ## Contributing
171
+
172
+ Contributions are welcome. Please open an issue first to discuss what you'd like to change.
173
+
174
+ 1. Fork the repository
175
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
176
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
177
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
178
+ 5. Open a Pull Request
179
+
180
+ ## License
181
+
182
+ This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,165 @@
1
+ <div align="center">
2
+
3
+ # MD to DOCX
4
+
5
+ **A Markdown-to-Word converter built for AI-generated textbooks**
6
+
7
+ Convert Markdown files — complete with LaTeX math, syntax-highlighted code, tables, and images — into polished `.docx` documents in one command.
8
+
9
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
10
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
11
+
12
+ </div>
13
+
14
+ ---
15
+
16
+ ## Why This Exists
17
+
18
+ Large language models (ChatGPT, Claude, Gemini, …) produce great Markdown, but the journey from `.md` to a well-formatted **Word document** is painful:
19
+
20
+ - LaTeX formulas become plain text or broken images
21
+ - Code blocks lose their highlighting
22
+ - Tables, lists, and blockquotes need manual reformatting
23
+
24
+ **MD to DOCX** bridges that gap. Feed it a Markdown file that follows a few simple rules and get a publication-ready `.docx` — math rendered as native Word OMML equations, code with VS Code–style colors, and everything else properly formatted.
25
+
26
+ ## Features
27
+
28
+ | Category | What you get |
29
+ |:---------|:-------------|
30
+ | **Math** | Inline (`$...$`) and display (`$$...$$`) LaTeX → native OMML equations in Word |
31
+ | **Code** | 30+ languages with Pygments syntax highlighting, VS Code light theme, language labels |
32
+ | **Tables** | Auto-formatted Table Grid — bold header row, left/center/right alignment, inline math in cells |
33
+ | **Lists** | Bullet (•◦▪) and numbered lists, up to 6 nesting levels |
34
+ | **Other** | Blockquotes, horizontal rules, clickable hyperlinks, local images, footnotes |
35
+
36
+ ## Quick Start
37
+
38
+ ### Installation
39
+
40
+ ```bash
41
+ git clone https://github.com/<your-username>/md_to_docx.git
42
+ cd md_to_docx
43
+
44
+ # Using uv (recommended)
45
+ uv sync
46
+
47
+ # Or using pip
48
+ pip install -r requirements.txt
49
+ ```
50
+
51
+ ### Usage
52
+
53
+ ```bash
54
+ # Convert a single file
55
+ python main.py input.md
56
+ python main.py input.md -o output.docx
57
+
58
+ # Convert an entire directory
59
+ python main.py ./chapters/ -o ./output/
60
+
61
+ # Recursively search subdirectories
62
+ python main.py ./chapters/ -o ./output/ -r
63
+
64
+ # Verbose logging
65
+ python main.py input.md -v
66
+ ```
67
+
68
+ ### CLI Options
69
+
70
+ | Flag | Description |
71
+ |:-----|:------------|
72
+ | `input` | Markdown file or directory to convert |
73
+ | `-o, --output` | Output file or directory path |
74
+ | `-r, --recursive` | Recursively find `.md` files in subdirectories |
75
+ | `-v, --verbose` | Show detailed processing logs |
76
+
77
+ ## How It Works
78
+
79
+ ```
80
+ Markdown file
81
+
82
+
83
+ md_parser.py ─── markdown-it-py tokenizer
84
+
85
+
86
+ docx_builder.py ─── walks the token stream, builds Word elements
87
+ ├── math_renderer.py ─── LaTeX → MathML → OMML (native Word equations)
88
+ ├── code_renderer.py ─── Pygments lexer → colored Word runs
89
+ └── styles.py ─── fonts, colors, spacing presets
90
+
91
+
92
+ .docx file ─── python-docx output
93
+ ```
94
+
95
+ ### Math Pipeline
96
+
97
+ LaTeX is converted to **native OMML** (Office Math Markup Language), not images. This means formulas are editable, scale perfectly, and look like they were typed in Word's equation editor.
98
+
99
+ ```
100
+ LaTeX string → latex2mathml → MathML → XSLT → OMML → Word paragraph
101
+ ```
102
+
103
+ ### Code Pipeline
104
+
105
+ ```
106
+ Source code → Pygments lexer + VS Code theme → colored Word runs inside a shaded table cell
107
+ ```
108
+
109
+ ## Project Structure
110
+
111
+ ```
112
+ md_to_docx/
113
+ ├── main.py # CLI entry point
114
+ ├── pyproject.toml # Project metadata & dependencies
115
+ ├── requirements.txt # Pip-compatible dependency list
116
+ ├── converter/
117
+ │ ├── __init__.py
118
+ │ ├── core.py # Top-level orchestrator
119
+ │ ├── md_parser.py # Markdown → token stream
120
+ │ ├── math_renderer.py # LaTeX → OMML (native Word math)
121
+ │ ├── code_renderer.py # Code → syntax-highlighted Word runs
122
+ │ ├── docx_builder.py # Token stream → DOCX elements
123
+ │ └── styles.py # Fonts, colors, and layout presets
124
+ └── rule/
125
+ ├── ai_gen_doc_rule.md # AI writing rules (Vietnamese)
126
+ └── ai_gen_doc_rule_en.md # AI writing rules (English)
127
+ ```
128
+
129
+ ## Dependencies
130
+
131
+ | Package | Version | Role |
132
+ |:--------|:--------|:-----|
133
+ | [python-docx](https://python-docx.readthedocs.io/) | 1.2.0 | DOCX generation |
134
+ | [markdown-it-py](https://github.com/executablebooks/markdown-it-py) | 4.0.0 | Markdown parsing |
135
+ | [mdit-py-plugins](https://github.com/executablebooks/mdit-py-plugins) | 0.5.0 | Math & footnote plugins |
136
+ | [latex2mathml](https://github.com/roniemartinez/latex2mathml) | 3.78.1 | LaTeX → MathML conversion |
137
+ | [lxml](https://lxml.de/) | 6.0.2 | XML/XSLT processing |
138
+ | [Pygments](https://pygments.org/) | 2.19.2 | Syntax highlighting |
139
+ | [matplotlib](https://matplotlib.org/) | 3.10.8 | LaTeX rendering (fallback) |
140
+ | [Pillow](https://python-pillow.org/) | 12.1.0 | Image processing |
141
+
142
+ ## AI Writing Rules
143
+
144
+ The `rule/` directory contains detailed guidelines for prompting AI models to produce Markdown that converts cleanly:
145
+
146
+ | File | Language | Description |
147
+ |:-----|:---------|:------------|
148
+ | `rule/ai_gen_doc_rule.md` | Vietnamese | Full rule set — heading structure, LaTeX constraints, code block format, tables, etc. |
149
+ | `rule/ai_gen_doc_rule_en.md` | English | Same rules, English version |
150
+
151
+ **How to use:** Paste the contents of the appropriate rule file into your AI system prompt (or at the start of the conversation) before asking it to write textbook content.
152
+
153
+ ## Contributing
154
+
155
+ Contributions are welcome. Please open an issue first to discuss what you'd like to change.
156
+
157
+ 1. Fork the repository
158
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
159
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
160
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
161
+ 5. Open a Pull Request
162
+
163
+ ## License
164
+
165
+ This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
markdocx-0.1.0/main.py ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Convenience entry point — delegates to markdocx.cli.main().
4
+
5
+ After installing the package (pip install -e .), you can also run:
6
+ markdocx input.md -o output.docx
7
+ """
8
+
9
+ from markdocx.cli import main
10
+
11
+ if __name__ == "__main__":
12
+ main()
@@ -0,0 +1,27 @@
1
+ [project]
2
+ name = "markdocx"
3
+ version = "0.1.0"
4
+ description = "Convert AI-generated Markdown textbooks to polished DOCX with native math equations and syntax-highlighted code"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.14"
8
+ dependencies = [
9
+ "latex2mathml==3.78.1",
10
+ "lxml==6.0.2",
11
+ "markdown-it-py==4.0.0",
12
+ "matplotlib==3.10.8",
13
+ "mdit-py-plugins==0.5.0",
14
+ "pillow==12.1.0",
15
+ "pygments==2.19.2",
16
+ "python-docx==1.2.0",
17
+ ]
18
+
19
+ [project.scripts]
20
+ markdocx = "markdocx.cli:main"
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
25
+
26
+ [tool.hatch.build.targets.wheel]
27
+ packages = ["src/markdocx"]
@@ -0,0 +1,11 @@
1
+ # MD to DOCX Converter - Dependencies
2
+ # Latest versions as of 2026-02-08
3
+
4
+ python-docx==1.2.0
5
+ markdown-it-py==4.0.0
6
+ mdit-py-plugins==0.5.0
7
+ matplotlib==3.10.8
8
+ Pillow==12.1.0
9
+ Pygments==2.19.2
10
+ latex2mathml==3.78.1
11
+ lxml==6.0.2