marksmith 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marksmith-0.1.0/.github/workflows/ci.yml +51 -0
- marksmith-0.1.0/.github/workflows/publish.yml +51 -0
- marksmith-0.1.0/.gitignore +207 -0
- marksmith-0.1.0/LICENSE +21 -0
- marksmith-0.1.0/PKG-INFO +204 -0
- marksmith-0.1.0/README.md +151 -0
- marksmith-0.1.0/marksmith/__init__.py +3 -0
- marksmith-0.1.0/marksmith/__main__.py +6 -0
- marksmith-0.1.0/marksmith/cli.py +108 -0
- marksmith-0.1.0/marksmith/convert.py +357 -0
- marksmith-0.1.0/pyproject.toml +64 -0
- marksmith-0.1.0/tests/__init__.py +0 -0
- marksmith-0.1.0/tests/test_convert.py +160 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
strategy:
|
|
15
|
+
fail-fast: false
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: |
|
|
29
|
+
python -m pip install --upgrade pip
|
|
30
|
+
pip install .[dev]
|
|
31
|
+
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: pytest
|
|
34
|
+
|
|
35
|
+
lint:
|
|
36
|
+
name: Lint
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Set up Python
|
|
43
|
+
uses: actions/setup-python@v5
|
|
44
|
+
with:
|
|
45
|
+
python-version: "3.x"
|
|
46
|
+
|
|
47
|
+
- name: Install ruff
|
|
48
|
+
run: pip install ruff
|
|
49
|
+
|
|
50
|
+
- name: Run ruff
|
|
51
|
+
run: ruff check .
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
name: Build distribution
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.x"
|
|
19
|
+
|
|
20
|
+
- name: Install build dependencies
|
|
21
|
+
run: python -m pip install --upgrade build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Upload distribution artifacts
|
|
27
|
+
uses: actions/upload-artifact@v4
|
|
28
|
+
with:
|
|
29
|
+
name: dist
|
|
30
|
+
path: dist/
|
|
31
|
+
|
|
32
|
+
publish:
|
|
33
|
+
name: Publish to PyPI
|
|
34
|
+
needs: build
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
environment:
|
|
37
|
+
name: pypi
|
|
38
|
+
url: https://pypi.org/project/marksmith/
|
|
39
|
+
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write # Required for OIDC Trusted Publisher authentication
|
|
42
|
+
|
|
43
|
+
steps:
|
|
44
|
+
- name: Download distribution artifacts
|
|
45
|
+
uses: actions/download-artifact@v4
|
|
46
|
+
with:
|
|
47
|
+
name: dist
|
|
48
|
+
path: dist/
|
|
49
|
+
|
|
50
|
+
- name: Publish to PyPI
|
|
51
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
marksmith-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 tkdpython
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
marksmith-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: marksmith
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A markdown toolbox
|
|
5
|
+
Project-URL: Homepage, https://github.com/tkdpython/marksmith
|
|
6
|
+
Project-URL: Repository, https://github.com/tkdpython/marksmith
|
|
7
|
+
Project-URL: Issues, https://github.com/tkdpython/marksmith/issues
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 tkdpython
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: formatting,markdown,text,toolbox
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Operating System :: OS Independent
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
42
|
+
Requires-Python: >=3.9
|
|
43
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
44
|
+
Requires-Dist: markdown>=3.5
|
|
45
|
+
Requires-Dist: python-docx>=1.1
|
|
46
|
+
Requires-Dist: python-frontmatter>=1.0
|
|
47
|
+
Provides-Extra: dev
|
|
48
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
49
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
50
|
+
Provides-Extra: template
|
|
51
|
+
Requires-Dist: docxtpl>=0.16; extra == 'template'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# marksmith
|
|
55
|
+
|
|
56
|
+
> A Markdown toolbox — write docs in Markdown, ship them as polished DOCX.
|
|
57
|
+
|
|
58
|
+
[](https://pypi.org/project/marksmith/)
|
|
59
|
+
[](https://pypi.org/project/marksmith/)
|
|
60
|
+
[](https://github.com/tkdpython/marksmith/actions/workflows/ci.yml)
|
|
61
|
+
[](LICENSE)
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install marksmith
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Optional extras for template support *(coming soon)*:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install marksmith[template]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Quick start
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Using the installed script
|
|
81
|
+
marksmith convert my-doc.md output.docx
|
|
82
|
+
|
|
83
|
+
# Or via python -m
|
|
84
|
+
python -m marksmith convert my-doc.md output.docx
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Markdown front-matter
|
|
90
|
+
|
|
91
|
+
You can add a YAML front-matter block at the top of your Markdown file.
|
|
92
|
+
The metadata is written to the DOCX core properties (title, author, etc.)
|
|
93
|
+
and will be used to populate template placeholders once template support
|
|
94
|
+
is available.
|
|
95
|
+
|
|
96
|
+
```markdown
|
|
97
|
+
---
|
|
98
|
+
title: My Document
|
|
99
|
+
version: 1.0
|
|
100
|
+
author: Paul Cummings
|
|
101
|
+
date: 2026-03-16
|
|
102
|
+
classification: Internal
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
# My Document
|
|
106
|
+
|
|
107
|
+
Content goes here...
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Supported Markdown elements
|
|
113
|
+
|
|
114
|
+
| Element | Status |
|
|
115
|
+
| --- | --- |
|
|
116
|
+
| Headings H1 – H6 | ✅ |
|
|
117
|
+
| Bold / italic / inline code | ✅ |
|
|
118
|
+
| Fenced and indented code blocks | ✅ |
|
|
119
|
+
| Unordered lists (nested) | ✅ |
|
|
120
|
+
| Ordered lists (nested) | ✅ |
|
|
121
|
+
| Block-quotes | ✅ |
|
|
122
|
+
| Tables | ✅ |
|
|
123
|
+
| Thematic breaks (horizontal rules) | ✅ |
|
|
124
|
+
| Strikethrough | ✅ |
|
|
125
|
+
| Links (text rendered, no hyperlink) | ⚠️ |
|
|
126
|
+
| Images | ⚠️ placeholder text only |
|
|
127
|
+
| Inline HTML | ➖ ignored |
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Roadmap
|
|
132
|
+
|
|
133
|
+
### Template support *(next milestone)*
|
|
134
|
+
|
|
135
|
+
The goal is to allow teams to maintain brand-consistent DOCX output without
|
|
136
|
+
leaving Markdown. The workflow will be:
|
|
137
|
+
|
|
138
|
+
1. A corporate `.docx` template carries the company's styles, logo, header,
|
|
139
|
+
footer, and cover page. Jinja2-style tags act as placeholders:
|
|
140
|
+
|
|
141
|
+
```jinja
|
|
142
|
+
{{ title }} {{ version }} {{ author }}
|
|
143
|
+
{{ date }} {{ classification }}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
2. A special `{{ marksmith_content }}` tag marks the exact point in the
|
|
147
|
+
template where the converted Markdown body will be inserted.
|
|
148
|
+
|
|
149
|
+
3. Run the conversion:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
marksmith convert my-doc.md output.docx --template company-template.docx
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
marksmith will:
|
|
156
|
+
- Render all front-matter metadata into the Jinja2 placeholders.
|
|
157
|
+
- Convert the Markdown body to DOCX-native content.
|
|
158
|
+
- Insert the converted content at `{{ marksmith_content }}`.
|
|
159
|
+
- Save the merged document as `output.docx`.
|
|
160
|
+
|
|
161
|
+
Implemented via [`docxtpl`](https://docxtpl.readthedocs.io/) — install the
|
|
162
|
+
`marksmith[template]` extra when this ships.
|
|
163
|
+
|
|
164
|
+
### Planned future actions
|
|
165
|
+
|
|
166
|
+
| Action | Description |
|
|
167
|
+
| --- | --- |
|
|
168
|
+
| `convert` | Markdown → DOCX *(available now)* |
|
|
169
|
+
| `convert --template` | Merge into branded DOCX template *(coming soon)* |
|
|
170
|
+
| `lint` | Validate Markdown style and structure |
|
|
171
|
+
| `toc` | Generate / update table of contents |
|
|
172
|
+
| `diff` | Show structural diff between two Markdown files |
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Development
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
git clone https://github.com/tkdpython/marksmith
|
|
180
|
+
cd marksmith
|
|
181
|
+
pip install -e .[dev]
|
|
182
|
+
|
|
183
|
+
# Run tests
|
|
184
|
+
pytest
|
|
185
|
+
|
|
186
|
+
# Lint
|
|
187
|
+
ruff check .
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Releasing
|
|
193
|
+
|
|
194
|
+
1. Bump `__version__` in `marksmith/__init__.py`.
|
|
195
|
+
2. Commit and push.
|
|
196
|
+
3. Create a GitHub Release with a tag matching the version (e.g. `v0.2.0`).
|
|
197
|
+
4. The [publish workflow](.github/workflows/publish.yml) fires automatically
|
|
198
|
+
and publishes to PyPI via OIDC Trusted Publisher — no API tokens needed.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# marksmith
|
|
2
|
+
|
|
3
|
+
> A Markdown toolbox — write docs in Markdown, ship them as polished DOCX.
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/marksmith/)
|
|
6
|
+
[](https://pypi.org/project/marksmith/)
|
|
7
|
+
[](https://github.com/tkdpython/marksmith/actions/workflows/ci.yml)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install marksmith
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Optional extras for template support *(coming soon)*:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install marksmith[template]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick start
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Using the installed script
|
|
28
|
+
marksmith convert my-doc.md output.docx
|
|
29
|
+
|
|
30
|
+
# Or via python -m
|
|
31
|
+
python -m marksmith convert my-doc.md output.docx
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Markdown front-matter
|
|
37
|
+
|
|
38
|
+
You can add a YAML front-matter block at the top of your Markdown file.
|
|
39
|
+
The metadata is written to the DOCX core properties (title, author, etc.)
|
|
40
|
+
and will be used to populate template placeholders once template support
|
|
41
|
+
is available.
|
|
42
|
+
|
|
43
|
+
```markdown
|
|
44
|
+
---
|
|
45
|
+
title: My Document
|
|
46
|
+
version: 1.0
|
|
47
|
+
author: Paul Cummings
|
|
48
|
+
date: 2026-03-16
|
|
49
|
+
classification: Internal
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
# My Document
|
|
53
|
+
|
|
54
|
+
Content goes here...
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Supported Markdown elements
|
|
60
|
+
|
|
61
|
+
| Element | Status |
|
|
62
|
+
| --- | --- |
|
|
63
|
+
| Headings H1 – H6 | ✅ |
|
|
64
|
+
| Bold / italic / inline code | ✅ |
|
|
65
|
+
| Fenced and indented code blocks | ✅ |
|
|
66
|
+
| Unordered lists (nested) | ✅ |
|
|
67
|
+
| Ordered lists (nested) | ✅ |
|
|
68
|
+
| Block-quotes | ✅ |
|
|
69
|
+
| Tables | ✅ |
|
|
70
|
+
| Thematic breaks (horizontal rules) | ✅ |
|
|
71
|
+
| Strikethrough | ✅ |
|
|
72
|
+
| Links (text rendered, no hyperlink) | ⚠️ |
|
|
73
|
+
| Images | ⚠️ placeholder text only |
|
|
74
|
+
| Inline HTML | ➖ ignored |
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Roadmap
|
|
79
|
+
|
|
80
|
+
### Template support *(next milestone)*
|
|
81
|
+
|
|
82
|
+
The goal is to allow teams to maintain brand-consistent DOCX output without
|
|
83
|
+
leaving Markdown. The workflow will be:
|
|
84
|
+
|
|
85
|
+
1. A corporate `.docx` template carries the company's styles, logo, header,
|
|
86
|
+
footer, and cover page. Jinja2-style tags act as placeholders:
|
|
87
|
+
|
|
88
|
+
```jinja
|
|
89
|
+
{{ title }} {{ version }} {{ author }}
|
|
90
|
+
{{ date }} {{ classification }}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
2. A special `{{ marksmith_content }}` tag marks the exact point in the
|
|
94
|
+
template where the converted Markdown body will be inserted.
|
|
95
|
+
|
|
96
|
+
3. Run the conversion:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
marksmith convert my-doc.md output.docx --template company-template.docx
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
marksmith will:
|
|
103
|
+
- Render all front-matter metadata into the Jinja2 placeholders.
|
|
104
|
+
- Convert the Markdown body to DOCX-native content.
|
|
105
|
+
- Insert the converted content at `{{ marksmith_content }}`.
|
|
106
|
+
- Save the merged document as `output.docx`.
|
|
107
|
+
|
|
108
|
+
Implemented via [`docxtpl`](https://docxtpl.readthedocs.io/) — install the
|
|
109
|
+
`marksmith[template]` extra when this ships.
|
|
110
|
+
|
|
111
|
+
### Planned future actions
|
|
112
|
+
|
|
113
|
+
| Action | Description |
|
|
114
|
+
| --- | --- |
|
|
115
|
+
| `convert` | Markdown → DOCX *(available now)* |
|
|
116
|
+
| `convert --template` | Merge into branded DOCX template *(coming soon)* |
|
|
117
|
+
| `lint` | Validate Markdown style and structure |
|
|
118
|
+
| `toc` | Generate / update table of contents |
|
|
119
|
+
| `diff` | Show structural diff between two Markdown files |
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Development
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
git clone https://github.com/tkdpython/marksmith
|
|
127
|
+
cd marksmith
|
|
128
|
+
pip install -e .[dev]
|
|
129
|
+
|
|
130
|
+
# Run tests
|
|
131
|
+
pytest
|
|
132
|
+
|
|
133
|
+
# Lint
|
|
134
|
+
ruff check .
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Releasing
|
|
140
|
+
|
|
141
|
+
1. Bump `__version__` in `marksmith/__init__.py`.
|
|
142
|
+
2. Commit and push.
|
|
143
|
+
3. Create a GitHub Release with a tag matching the version (e.g. `v0.2.0`).
|
|
144
|
+
4. The [publish workflow](.github/workflows/publish.yml) fires automatically
|
|
145
|
+
and publishes to PyPI via OIDC Trusted Publisher — no API tokens needed.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Command-line interface for marksmith.
|
|
2
|
+
|
|
3
|
+
Invoked as::
|
|
4
|
+
|
|
5
|
+
marksmith <action> [args...]
|
|
6
|
+
python -m marksmith <action> [args...]
|
|
7
|
+
|
|
8
|
+
Actions are registered as argparse sub-commands. As the tool grows, new
|
|
9
|
+
actions are added by creating a new sub-parser block and a corresponding
|
|
10
|
+
handler function (or module).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from marksmith import __version__
|
|
17
|
+
from marksmith.convert import md_to_docx
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main() -> None:
|
|
21
|
+
"""Entry point for the marksmith CLI."""
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
prog="marksmith",
|
|
24
|
+
description="marksmith — a Markdown toolbox.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--version",
|
|
28
|
+
action="version",
|
|
29
|
+
version=f"%(prog)s {__version__}",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
subparsers = parser.add_subparsers(
|
|
33
|
+
dest="action",
|
|
34
|
+
metavar="<action>",
|
|
35
|
+
title="actions",
|
|
36
|
+
)
|
|
37
|
+
subparsers.required = True
|
|
38
|
+
|
|
39
|
+
# ── convert ──────────────────────────────────────────────────────────────
|
|
40
|
+
convert_parser = subparsers.add_parser(
|
|
41
|
+
"convert",
|
|
42
|
+
help="Convert a Markdown file to another format (currently: .docx)",
|
|
43
|
+
description=(
|
|
44
|
+
"Convert a Markdown file to a DOCX document.\n\n"
|
|
45
|
+
"FRONT-MATTER METADATA\n"
|
|
46
|
+
" The Markdown file may begin with a YAML front-matter block\n"
|
|
47
|
+
" (between '---' delimiters) containing document metadata:\n\n"
|
|
48
|
+
" ---\n"
|
|
49
|
+
" title: My Document\n"
|
|
50
|
+
" version: 1.0\n"
|
|
51
|
+
" author: Paul Cummings\n"
|
|
52
|
+
" date: 2026-03-16\n"
|
|
53
|
+
" classification: Internal\n"
|
|
54
|
+
" ---\n\n"
|
|
55
|
+
" Without a template, metadata is written to the DOCX core\n"
|
|
56
|
+
" properties (title, author, etc.).\n\n"
|
|
57
|
+
"TEMPLATE SUPPORT (coming soon — requires: pip install marksmith[template])\n"
|
|
58
|
+
" Provide a .docx template containing Jinja2-style placeholders\n"
|
|
59
|
+
" sourced from the front-matter metadata, e.g.:\n\n"
|
|
60
|
+
" {{ title }} {{ version }} {{ author }} {{ date }}\n\n"
|
|
61
|
+
" Plus a special tag marking where the Markdown body is inserted:\n\n"
|
|
62
|
+
" {{ marksmith_content }}\n\n"
|
|
63
|
+
" Rendered via docxtpl (python-docx-template)."
|
|
64
|
+
),
|
|
65
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
66
|
+
)
|
|
67
|
+
convert_parser.add_argument(
|
|
68
|
+
"input",
|
|
69
|
+
help="Path to the input Markdown (.md) file",
|
|
70
|
+
)
|
|
71
|
+
convert_parser.add_argument(
|
|
72
|
+
"output",
|
|
73
|
+
help="Path to the output file (.docx)",
|
|
74
|
+
)
|
|
75
|
+
convert_parser.add_argument(
|
|
76
|
+
"--template",
|
|
77
|
+
metavar="TEMPLATE",
|
|
78
|
+
default=None,
|
|
79
|
+
help=(
|
|
80
|
+
"Path to a .docx template file. Placeholders in the template "
|
|
81
|
+
"are filled from YAML front-matter metadata and the converted "
|
|
82
|
+
"Markdown body is inserted at {{ marksmith_content }}. "
|
|
83
|
+
"(Requires: pip install marksmith[template] — not yet implemented.)"
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
convert_parser.set_defaults(func=_cmd_convert)
|
|
87
|
+
|
|
88
|
+
# ── future actions go here ────────────────────────────────────────────────
|
|
89
|
+
# e.g. lint, diff, toc, etc.
|
|
90
|
+
|
|
91
|
+
args = parser.parse_args()
|
|
92
|
+
args.func(args)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _cmd_convert(args: argparse.Namespace) -> None:
|
|
96
|
+
"""Handle the 'convert' action."""
|
|
97
|
+
try:
|
|
98
|
+
md_to_docx(args.input, args.output, template_path=args.template)
|
|
99
|
+
print(f"\u2713 Converted '{args.input}' \u2192 '{args.output}'") # noqa: T201
|
|
100
|
+
except FileNotFoundError as exc:
|
|
101
|
+
print(f"Error: {exc}", file=sys.stderr) # noqa: T201
|
|
102
|
+
sys.exit(1)
|
|
103
|
+
except NotImplementedError as exc:
|
|
104
|
+
print(f"Not yet implemented: {exc}", file=sys.stderr) # noqa: T201
|
|
105
|
+
sys.exit(2)
|
|
106
|
+
except Exception as exc: # noqa: BLE001
|
|
107
|
+
print(f"Conversion failed: {exc}", file=sys.stderr) # noqa: T201
|
|
108
|
+
sys.exit(1)
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""Markdown to DOCX conversion for marksmith.
|
|
2
|
+
|
|
3
|
+
Pipeline
|
|
4
|
+
--------
|
|
5
|
+
Markdown file → parse YAML front-matter → render Markdown body to HTML
|
|
6
|
+
→ walk HTML tree → write python-docx Document → save .docx
|
|
7
|
+
|
|
8
|
+
Supported Markdown elements (first implementation)
|
|
9
|
+
---------------------------------------------------
|
|
10
|
+
* Headings H1 – H6
|
|
11
|
+
* Paragraphs with inline bold, italic, inline-code and links
|
|
12
|
+
* Fenced and indented code blocks
|
|
13
|
+
* Unordered and ordered lists (nested up to three levels)
|
|
14
|
+
* Block-quotes
|
|
15
|
+
* Tables (with bold header row)
|
|
16
|
+
* Thematic breaks (horizontal rules)
|
|
17
|
+
|
|
18
|
+
YAML Front-matter
|
|
19
|
+
-----------------
|
|
20
|
+
The Markdown file may start with a YAML front-matter block delimited by
|
|
21
|
+
``---`` lines. Recognised keys are written to the DOCX core properties:
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
title: My Document
|
|
25
|
+
version: 1.0
|
|
26
|
+
author: Paul Cummings
|
|
27
|
+
date: 2026-03-16
|
|
28
|
+
classification: Internal
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
All keys are available for template rendering (see *Template support* below).
|
|
32
|
+
|
|
33
|
+
Template support (PLANNED — not yet implemented)
|
|
34
|
+
-------------------------------------------------
|
|
35
|
+
A future ``--template`` option will accept a ``.docx`` file containing
|
|
36
|
+
Jinja2-style placeholders populated from the front-matter metadata:
|
|
37
|
+
|
|
38
|
+
{{ title }} {{ version }} {{ author }} {{ date }}
|
|
39
|
+
|
|
40
|
+
A special ``{{ marksmith_content }}`` tag marks the point in the template
|
|
41
|
+
at which the converted Markdown body will be inserted. This will be
|
|
42
|
+
implemented using ``docxtpl`` (python-docx-template) and requires the
|
|
43
|
+
optional ``marksmith[template]`` extras.
|
|
44
|
+
|
|
45
|
+
The intended workflow for producing branded company documents is:
|
|
46
|
+
|
|
47
|
+
1. Author writes content in plain Markdown with a YAML front-matter header.
|
|
48
|
+
2. A corporate ``.docx`` template carries the company's styles, logo, header,
|
|
49
|
+
footer, and cover page—with Jinja2 tags as placeholders.
|
|
50
|
+
3. ``marksmith convert doc.md output.docx --template company.docx``
|
|
51
|
+
merges metadata into the template and inserts the body at
|
|
52
|
+
``{{ marksmith_content }}``.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
from __future__ import annotations
|
|
56
|
+
|
|
57
|
+
import contextlib
|
|
58
|
+
import os
|
|
59
|
+
from pathlib import Path
|
|
60
|
+
|
|
61
|
+
import frontmatter
|
|
62
|
+
import markdown
|
|
63
|
+
from bs4 import BeautifulSoup, NavigableString, Tag
|
|
64
|
+
from docx import Document
|
|
65
|
+
from docx.oxml import OxmlElement
|
|
66
|
+
from docx.oxml.ns import qn
|
|
67
|
+
from docx.shared import Inches, Pt
|
|
68
|
+
|
|
69
|
+
# Markdown extensions enabled for all conversions.
|
|
70
|
+
_MD_EXTENSIONS = ["tables", "fenced_code", "sane_lists"]
|
|
71
|
+
|
|
72
|
+
# Maximum list-nesting depth supported by the default DOCX styles.
|
|
73
|
+
_MAX_LIST_DEPTH = 3
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ── Public API ────────────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def md_to_docx(
|
|
80
|
+
input_path: str,
|
|
81
|
+
output_path: str,
|
|
82
|
+
template_path: str | None = None,
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Convert a Markdown file to a DOCX document.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
input_path:
|
|
89
|
+
Path to the source ``.md`` file.
|
|
90
|
+
output_path:
|
|
91
|
+
Destination path for the generated ``.docx`` file. The parent
|
|
92
|
+
directory is created automatically if it does not exist.
|
|
93
|
+
template_path:
|
|
94
|
+
Optional path to a ``.docx`` template. **Not yet implemented** —
|
|
95
|
+
passing a value raises ``NotImplementedError``.
|
|
96
|
+
"""
|
|
97
|
+
input_path = str(input_path)
|
|
98
|
+
output_path = str(output_path)
|
|
99
|
+
|
|
100
|
+
if not os.path.isfile(input_path):
|
|
101
|
+
raise FileNotFoundError(f"Input file not found: {input_path}")
|
|
102
|
+
|
|
103
|
+
suffix = Path(output_path).suffix.lower()
|
|
104
|
+
if suffix != ".docx":
|
|
105
|
+
raise ValueError(f"Unsupported output format '{suffix}'. Only .docx is currently supported.")
|
|
106
|
+
|
|
107
|
+
if template_path is not None:
|
|
108
|
+
raise NotImplementedError(
|
|
109
|
+
"Template support is not yet implemented. "
|
|
110
|
+
"It will be available in a future release via 'pip install marksmith[template]'."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
with open(input_path, encoding="utf-8") as fh:
|
|
114
|
+
raw = fh.read()
|
|
115
|
+
|
|
116
|
+
metadata, body = _parse_frontmatter(raw)
|
|
117
|
+
html = _md_to_html(body)
|
|
118
|
+
doc = _html_to_docx(html)
|
|
119
|
+
_apply_core_properties(doc, metadata)
|
|
120
|
+
|
|
121
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
122
|
+
doc.save(output_path)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ── Internal helpers ──────────────────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _parse_frontmatter(raw: str) -> tuple[dict, str]:
|
|
129
|
+
"""Return ``(metadata_dict, markdown_body)`` parsed from *raw*."""
|
|
130
|
+
post = frontmatter.loads(raw)
|
|
131
|
+
return dict(post.metadata), post.content
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _md_to_html(md_text: str) -> str:
|
|
135
|
+
"""Render Markdown text to an HTML string."""
|
|
136
|
+
return markdown.markdown(md_text, extensions=_MD_EXTENSIONS)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _apply_core_properties(doc: Document, metadata: dict) -> None:
|
|
140
|
+
"""Write recognised front-matter keys to DOCX core properties."""
|
|
141
|
+
props = doc.core_properties
|
|
142
|
+
if "title" in metadata:
|
|
143
|
+
props.title = str(metadata["title"])
|
|
144
|
+
if "author" in metadata:
|
|
145
|
+
props.author = str(metadata["author"])
|
|
146
|
+
if "description" in metadata:
|
|
147
|
+
props.description = str(metadata["description"])
|
|
148
|
+
if "version" in metadata:
|
|
149
|
+
props.revision = str(metadata["version"])
|
|
150
|
+
if "keywords" in metadata:
|
|
151
|
+
props.keywords = str(metadata["keywords"])
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _html_to_docx(html: str) -> Document:
|
|
155
|
+
"""Build and return a :class:`docx.Document` from an HTML string."""
|
|
156
|
+
doc = Document()
|
|
157
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
158
|
+
|
|
159
|
+
for element in soup.children:
|
|
160
|
+
_process_block(doc, element, depth=1)
|
|
161
|
+
|
|
162
|
+
return doc
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# ── Block-level element processors ───────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _process_block(doc: Document, element: Tag | NavigableString, depth: int = 1) -> None:
|
|
169
|
+
"""Dispatch a single block-level HTML element to the appropriate handler."""
|
|
170
|
+
if isinstance(element, NavigableString):
|
|
171
|
+
text = str(element).strip()
|
|
172
|
+
if text:
|
|
173
|
+
doc.add_paragraph(text)
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
tag = element.name
|
|
177
|
+
|
|
178
|
+
if tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
|
179
|
+
level = int(tag[1])
|
|
180
|
+
doc.add_heading(element.get_text(strip=True), level=level)
|
|
181
|
+
|
|
182
|
+
elif tag == "p":
|
|
183
|
+
para = doc.add_paragraph()
|
|
184
|
+
_add_inline_content(para, element)
|
|
185
|
+
|
|
186
|
+
elif tag in ("ul", "ol"):
|
|
187
|
+
_process_list(doc, element, ordered=(tag == "ol"), depth=depth)
|
|
188
|
+
|
|
189
|
+
elif tag == "pre":
|
|
190
|
+
code_el = element.find("code")
|
|
191
|
+
code_text = code_el.get_text() if code_el else element.get_text()
|
|
192
|
+
para = doc.add_paragraph(style="Normal")
|
|
193
|
+
run = para.add_run(code_text)
|
|
194
|
+
run.font.name = "Courier New"
|
|
195
|
+
run.font.size = Pt(9)
|
|
196
|
+
|
|
197
|
+
elif tag == "blockquote":
|
|
198
|
+
_process_blockquote(doc, element)
|
|
199
|
+
|
|
200
|
+
elif tag == "table":
|
|
201
|
+
_process_table(doc, element)
|
|
202
|
+
|
|
203
|
+
elif tag == "hr":
|
|
204
|
+
_add_horizontal_rule(doc)
|
|
205
|
+
|
|
206
|
+
# Silently ignore unknown / structural tags (html, body, div, etc.)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _process_list(
|
|
210
|
+
doc: Document,
|
|
211
|
+
element: Tag,
|
|
212
|
+
ordered: bool = False,
|
|
213
|
+
depth: int = 1,
|
|
214
|
+
) -> None:
|
|
215
|
+
"""Recursively render an HTML ``<ul>`` or ``<ol>`` into list paragraphs."""
|
|
216
|
+
clamped = min(depth, _MAX_LIST_DEPTH)
|
|
217
|
+
if ordered:
|
|
218
|
+
style = "List Number" if clamped == 1 else f"List Number {clamped}"
|
|
219
|
+
else:
|
|
220
|
+
style = "List Bullet" if clamped == 1 else f"List Bullet {clamped}"
|
|
221
|
+
|
|
222
|
+
for li in element.find_all("li", recursive=False):
|
|
223
|
+
para = doc.add_paragraph(style=style)
|
|
224
|
+
|
|
225
|
+
# Add inline content from the list item, skipping nested list children.
|
|
226
|
+
for child in li.children:
|
|
227
|
+
child_tag = getattr(child, "name", None)
|
|
228
|
+
if child_tag not in ("ul", "ol"):
|
|
229
|
+
_add_inline_content_node(para, child)
|
|
230
|
+
|
|
231
|
+
# Recurse into any nested lists.
|
|
232
|
+
for child in li.children:
|
|
233
|
+
child_tag = getattr(child, "name", None)
|
|
234
|
+
if child_tag in ("ul", "ol"):
|
|
235
|
+
_process_list(doc, child, ordered=(child_tag == "ol"), depth=depth + 1)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _process_blockquote(doc: Document, element: Tag) -> None:
|
|
239
|
+
"""Render a ``<blockquote>`` as indented italic paragraphs."""
|
|
240
|
+
for child in element.children:
|
|
241
|
+
child_tag = getattr(child, "name", None)
|
|
242
|
+
if child_tag == "p":
|
|
243
|
+
para = doc.add_paragraph()
|
|
244
|
+
para.paragraph_format.left_indent = Inches(0.4)
|
|
245
|
+
for run_child in child.children:
|
|
246
|
+
run = _add_inline_content_node(para, run_child)
|
|
247
|
+
if run:
|
|
248
|
+
run.italic = True
|
|
249
|
+
elif isinstance(child, NavigableString):
|
|
250
|
+
text = str(child).strip()
|
|
251
|
+
if text:
|
|
252
|
+
para = doc.add_paragraph(text)
|
|
253
|
+
para.paragraph_format.left_indent = Inches(0.4)
|
|
254
|
+
for run in para.runs:
|
|
255
|
+
run.italic = True
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _process_table(doc: Document, element: Tag) -> None:
|
|
259
|
+
"""Render an HTML ``<table>`` into a DOCX table."""
|
|
260
|
+
rows = element.find_all("tr")
|
|
261
|
+
if not rows:
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
col_count = max(len(r.find_all(["td", "th"])) for r in rows)
|
|
265
|
+
if col_count == 0:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
table = doc.add_table(rows=len(rows), cols=col_count)
|
|
269
|
+
with contextlib.suppress(KeyError):
|
|
270
|
+
table.style = "Table Grid"
|
|
271
|
+
|
|
272
|
+
for row_idx, row in enumerate(rows):
|
|
273
|
+
cells = row.find_all(["td", "th"])
|
|
274
|
+
for col_idx, cell in enumerate(cells):
|
|
275
|
+
docx_cell = table.rows[row_idx].cells[col_idx]
|
|
276
|
+
docx_cell.text = ""
|
|
277
|
+
para = docx_cell.paragraphs[0]
|
|
278
|
+
_add_inline_content(para, cell)
|
|
279
|
+
if cell.name == "th":
|
|
280
|
+
for run in para.runs:
|
|
281
|
+
run.bold = True
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _add_horizontal_rule(doc: Document) -> None:
|
|
285
|
+
"""Insert a paragraph styled as a horizontal rule using OOXML borders."""
|
|
286
|
+
para = doc.add_paragraph()
|
|
287
|
+
p_pr = para._p.get_or_add_pPr()
|
|
288
|
+
p_bdr = OxmlElement("w:pBdr")
|
|
289
|
+
bottom = OxmlElement("w:bottom")
|
|
290
|
+
bottom.set(qn("w:val"), "single")
|
|
291
|
+
bottom.set(qn("w:sz"), "6")
|
|
292
|
+
bottom.set(qn("w:space"), "1")
|
|
293
|
+
bottom.set(qn("w:color"), "auto")
|
|
294
|
+
p_bdr.append(bottom)
|
|
295
|
+
p_pr.append(p_bdr)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
# ── Inline content helpers ────────────────────────────────────────────────────
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _add_inline_content(para, element: Tag) -> None:
|
|
302
|
+
"""Add all inline children of *element* to *para*."""
|
|
303
|
+
for child in element.children:
|
|
304
|
+
_add_inline_content_node(para, child)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _add_inline_content_node(para, node) -> None: # noqa: PLR0911
|
|
308
|
+
"""Add a single inline node (NavigableString or Tag) to *para*.
|
|
309
|
+
|
|
310
|
+
Returns the last Run added, or None if no run was created.
|
|
311
|
+
"""
|
|
312
|
+
if isinstance(node, NavigableString):
|
|
313
|
+
text = str(node)
|
|
314
|
+
if text:
|
|
315
|
+
return para.add_run(text)
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
tag = node.name
|
|
319
|
+
|
|
320
|
+
if tag in ("strong", "b"):
|
|
321
|
+
run = para.add_run(node.get_text())
|
|
322
|
+
run.bold = True
|
|
323
|
+
return run
|
|
324
|
+
|
|
325
|
+
if tag in ("em", "i"):
|
|
326
|
+
run = para.add_run(node.get_text())
|
|
327
|
+
run.italic = True
|
|
328
|
+
return run
|
|
329
|
+
|
|
330
|
+
if tag == "code":
|
|
331
|
+
run = para.add_run(node.get_text())
|
|
332
|
+
run.font.name = "Courier New"
|
|
333
|
+
run.font.size = Pt(9)
|
|
334
|
+
return run
|
|
335
|
+
|
|
336
|
+
if tag in ("del", "s"):
|
|
337
|
+
run = para.add_run(node.get_text())
|
|
338
|
+
run.font.strike = True
|
|
339
|
+
return run
|
|
340
|
+
|
|
341
|
+
if tag == "a":
|
|
342
|
+
# TODO: Add proper OOXML hyperlink support (requires relationship injection).
|
|
343
|
+
run = para.add_run(node.get_text())
|
|
344
|
+
run.underline = True
|
|
345
|
+
return run
|
|
346
|
+
|
|
347
|
+
if tag == "img":
|
|
348
|
+
# TODO: Embed images via run.add_picture() once image download/path
|
|
349
|
+
# resolution is implemented.
|
|
350
|
+
run = para.add_run(f"[image: {node.get('alt', '')}]")
|
|
351
|
+
run.italic = True
|
|
352
|
+
return run
|
|
353
|
+
|
|
354
|
+
# For any other inline tag (span, abbr, etc.) recurse into its children.
|
|
355
|
+
for child in node.children:
|
|
356
|
+
_add_inline_content_node(para, child)
|
|
357
|
+
return None
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "marksmith"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "A markdown toolbox"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
keywords = ["markdown", "toolbox", "text", "formatting"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Topic :: Text Processing :: Markup",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"markdown>=3.5",
|
|
28
|
+
"python-frontmatter>=1.0",
|
|
29
|
+
"python-docx>=1.1",
|
|
30
|
+
"beautifulsoup4>=4.12",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.scripts]
|
|
34
|
+
marksmith = "marksmith.cli:main"
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
template = [
|
|
38
|
+
"docxtpl>=0.16",
|
|
39
|
+
]
|
|
40
|
+
dev = [
|
|
41
|
+
"pytest>=7",
|
|
42
|
+
"ruff",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/tkdpython/marksmith"
|
|
47
|
+
Repository = "https://github.com/tkdpython/marksmith"
|
|
48
|
+
Issues = "https://github.com/tkdpython/marksmith/issues"
|
|
49
|
+
|
|
50
|
+
[tool.hatch.version]
|
|
51
|
+
path = "marksmith/__init__.py"
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
line-length = 100
|
|
55
|
+
|
|
56
|
+
[tool.ruff.lint]
|
|
57
|
+
select = ["E", "F", "W", "I"]
|
|
58
|
+
ignore = ["E501"]
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint.per-file-ignores]
|
|
61
|
+
"tests/**" = ["S101"] # assert is expected in pytest tests
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Tests for marksmith.convert."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from marksmith.convert import _md_to_html, _parse_frontmatter, md_to_docx
|
|
6
|
+
|
|
7
|
+
# ── _parse_frontmatter ────────────────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_parse_frontmatter_extracts_metadata():
|
|
11
|
+
raw = "---\ntitle: My Doc\nversion: 1.0\n---\n# Hello"
|
|
12
|
+
meta, body = _parse_frontmatter(raw)
|
|
13
|
+
assert meta["title"] == "My Doc"
|
|
14
|
+
assert meta["version"] == 1.0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_parse_frontmatter_returns_body():
|
|
18
|
+
raw = "---\ntitle: Test\n---\n# Hello\n\nSome text."
|
|
19
|
+
_, body = _parse_frontmatter(raw)
|
|
20
|
+
assert "# Hello" in body
|
|
21
|
+
assert "Some text." in body
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_parse_frontmatter_no_frontmatter():
|
|
25
|
+
raw = "# Hello\n\nJust content, no metadata."
|
|
26
|
+
meta, body = _parse_frontmatter(raw)
|
|
27
|
+
assert meta == {}
|
|
28
|
+
assert "# Hello" in body
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_parse_frontmatter_empty_frontmatter():
|
|
32
|
+
raw = "---\n---\n# Hello"
|
|
33
|
+
meta, body = _parse_frontmatter(raw)
|
|
34
|
+
assert meta == {}
|
|
35
|
+
assert "# Hello" in body
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ── _md_to_html ───────────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_md_to_html_heading():
|
|
42
|
+
html = _md_to_html("# Hello World")
|
|
43
|
+
assert "<h1>Hello World</h1>" in html
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_md_to_html_bold():
|
|
47
|
+
html = _md_to_html("This is **bold** text.")
|
|
48
|
+
assert "<strong>bold</strong>" in html
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_md_to_html_code_block():
|
|
52
|
+
html = _md_to_html("```python\nprint('hi')\n```")
|
|
53
|
+
assert "<pre>" in html
|
|
54
|
+
assert "<code" in html # may include class="language-python"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_md_to_html_table():
|
|
58
|
+
md = "| A | B |\n|---|---|\n| 1 | 2 |"
|
|
59
|
+
html = _md_to_html(md)
|
|
60
|
+
assert "<table>" in html
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_md_to_html_unordered_list():
|
|
64
|
+
html = _md_to_html("- item one\n- item two")
|
|
65
|
+
assert "<ul>" in html
|
|
66
|
+
assert "<li>" in html
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_md_to_html_ordered_list():
|
|
70
|
+
html = _md_to_html("1. first\n2. second")
|
|
71
|
+
assert "<ol>" in html
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ── md_to_docx ────────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_md_to_docx_file_not_found():
|
|
78
|
+
with pytest.raises(FileNotFoundError, match="nonexistent.md"):
|
|
79
|
+
md_to_docx("nonexistent.md", "output.docx")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_md_to_docx_unsupported_format(tmp_path):
|
|
83
|
+
md_file = tmp_path / "test.md"
|
|
84
|
+
md_file.write_text("# Hello")
|
|
85
|
+
with pytest.raises(ValueError, match=".txt"):
|
|
86
|
+
md_to_docx(str(md_file), str(tmp_path / "output.txt"))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_md_to_docx_template_raises_not_implemented(tmp_path):
|
|
90
|
+
md_file = tmp_path / "test.md"
|
|
91
|
+
md_file.write_text("# Hello")
|
|
92
|
+
with pytest.raises(NotImplementedError):
|
|
93
|
+
md_to_docx(str(md_file), str(tmp_path / "output.docx"), template_path="tmpl.docx")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_md_to_docx_creates_file(tmp_path):
|
|
97
|
+
md_file = tmp_path / "test.md"
|
|
98
|
+
md_file.write_text("# Hello\n\nThis is a paragraph.")
|
|
99
|
+
output_file = tmp_path / "output.docx"
|
|
100
|
+
md_to_docx(str(md_file), str(output_file))
|
|
101
|
+
assert output_file.exists()
|
|
102
|
+
assert output_file.stat().st_size > 0
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_md_to_docx_with_frontmatter(tmp_path):
|
|
106
|
+
md_file = tmp_path / "test.md"
|
|
107
|
+
md_file.write_text("---\ntitle: My Doc\nauthor: Test Author\n---\n# Hello")
|
|
108
|
+
output_file = tmp_path / "output.docx"
|
|
109
|
+
md_to_docx(str(md_file), str(output_file))
|
|
110
|
+
assert output_file.exists()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_md_to_docx_creates_parent_dirs(tmp_path):
|
|
114
|
+
md_file = tmp_path / "test.md"
|
|
115
|
+
md_file.write_text("# Hello")
|
|
116
|
+
output_file = tmp_path / "subdir" / "nested" / "output.docx"
|
|
117
|
+
md_to_docx(str(md_file), str(output_file))
|
|
118
|
+
assert output_file.exists()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_md_to_docx_all_block_elements(tmp_path):
|
|
122
|
+
"""Smoke test covering all supported block-level elements."""
|
|
123
|
+
md = """---
|
|
124
|
+
title: Smoke Test
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
# Heading 1
|
|
128
|
+
|
|
129
|
+
## Heading 2
|
|
130
|
+
|
|
131
|
+
### Heading 3
|
|
132
|
+
|
|
133
|
+
A paragraph with **bold**, *italic*, `inline code`, and ~~strikethrough~~.
|
|
134
|
+
|
|
135
|
+
- Bullet one
|
|
136
|
+
- Bullet two
|
|
137
|
+
- Nested bullet
|
|
138
|
+
|
|
139
|
+
1. First
|
|
140
|
+
2. Second
|
|
141
|
+
|
|
142
|
+
> A blockquote paragraph.
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
def hello():
|
|
146
|
+
print("hello")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
| Col A | Col B |
|
|
150
|
+
| --- | --- |
|
|
151
|
+
| val 1 | val 2 |
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
"""
|
|
155
|
+
md_file = tmp_path / "smoke.md"
|
|
156
|
+
md_file.write_text(md)
|
|
157
|
+
output_file = tmp_path / "smoke.docx"
|
|
158
|
+
md_to_docx(str(md_file), str(output_file))
|
|
159
|
+
assert output_file.exists()
|
|
160
|
+
assert output_file.stat().st_size > 0
|