docx-plus 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docx_plus-0.1.0/.gitignore +53 -0
- docx_plus-0.1.0/LICENSE +21 -0
- docx_plus-0.1.0/PKG-INFO +265 -0
- docx_plus-0.1.0/README.md +227 -0
- docx_plus-0.1.0/SPEC.md +969 -0
- docx_plus-0.1.0/docx_plus/__init__.py +9 -0
- docx_plus-0.1.0/docx_plus/_testing/__init__.py +1 -0
- docx_plus-0.1.0/docx_plus/_testing/ooxml_asserts.py +133 -0
- docx_plus-0.1.0/docx_plus/controls/__init__.py +35 -0
- docx_plus-0.1.0/docx_plus/controls/builder.py +404 -0
- docx_plus-0.1.0/docx_plus/controls/read.py +528 -0
- docx_plus-0.1.0/docx_plus/core/__init__.py +45 -0
- docx_plus-0.1.0/docx_plus/core/ids.py +130 -0
- docx_plus-0.1.0/docx_plus/core/ns.py +64 -0
- docx_plus-0.1.0/docx_plus/core/oxml.py +106 -0
- docx_plus-0.1.0/docx_plus/core/parts.py +14 -0
- docx_plus-0.1.0/docx_plus/examples/__init__.py +18 -0
- docx_plus-0.1.0/docx_plus/examples/build_form.py +131 -0
- docx_plus-0.1.0/docx_plus/examples/inspect_document.py +147 -0
- docx_plus-0.1.0/docx_plus/examples/populate_form.py +121 -0
- docx_plus-0.1.0/docx_plus/examples/restyle_existing.py +119 -0
- docx_plus-0.1.0/docx_plus/fields/__init__.py +27 -0
- docx_plus-0.1.0/docx_plus/fields/simple.py +186 -0
- docx_plus-0.1.0/docx_plus/fields/update.py +92 -0
- docx_plus-0.1.0/docx_plus/protection/__init__.py +24 -0
- docx_plus-0.1.0/docx_plus/protection/document.py +141 -0
- docx_plus-0.1.0/docx_plus/py.typed +0 -0
- docx_plus-0.1.0/docx_plus/styles/__init__.py +49 -0
- docx_plus-0.1.0/docx_plus/styles/inspect.py +789 -0
- docx_plus-0.1.0/docx_plus/styles/modify.py +2405 -0
- docx_plus-0.1.0/docx_plus/styles/theme.py +313 -0
- docx_plus-0.1.0/pyproject.toml +126 -0
- docx_plus-0.1.0/tests/__init__.py +0 -0
- docx_plus-0.1.0/tests/conftest.py +48 -0
- docx_plus-0.1.0/tests/fixtures/__init__.py +0 -0
- docx_plus-0.1.0/tests/fixtures/build_fixtures.py +230 -0
- docx_plus-0.1.0/tests/fixtures/word_samples/README.md +85 -0
- docx_plus-0.1.0/tests/fixtures/word_samples/sample-1.docx +0 -0
- docx_plus-0.1.0/tests/fixtures/word_samples/sample-2.docx +0 -0
- docx_plus-0.1.0/tests/test_cascade_numbering.py +113 -0
- docx_plus-0.1.0/tests/test_cascade_provenance.py +176 -0
- docx_plus-0.1.0/tests/test_cascade_run_target.py +117 -0
- docx_plus-0.1.0/tests/test_cascade_toggles.py +189 -0
- docx_plus-0.1.0/tests/test_controls_builder.py +340 -0
- docx_plus-0.1.0/tests/test_controls_read.py +311 -0
- docx_plus-0.1.0/tests/test_core_ids.py +108 -0
- docx_plus-0.1.0/tests/test_core_ns.py +46 -0
- docx_plus-0.1.0/tests/test_core_oxml.py +63 -0
- docx_plus-0.1.0/tests/test_examples_libreoffice.py +98 -0
- docx_plus-0.1.0/tests/test_examples_smoke.py +67 -0
- docx_plus-0.1.0/tests/test_fields.py +317 -0
- docx_plus-0.1.0/tests/test_import_invariant.py +54 -0
- docx_plus-0.1.0/tests/test_integration_smoke.py +44 -0
- docx_plus-0.1.0/tests/test_protection.py +186 -0
- docx_plus-0.1.0/tests/test_smoke.py +10 -0
- docx_plus-0.1.0/tests/test_styles_inspect.py +317 -0
- docx_plus-0.1.0/tests/test_styles_modify.py +932 -0
- docx_plus-0.1.0/tests/test_styles_theme.py +157 -0
- docx_plus-0.1.0/tests/test_theme_edge_cases.py +175 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
|
|
8
|
+
# Distribution / packaging
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
*.egg-info/
|
|
12
|
+
*.egg
|
|
13
|
+
wheels/
|
|
14
|
+
.eggs/
|
|
15
|
+
|
|
16
|
+
# Environments
|
|
17
|
+
.venv/
|
|
18
|
+
venv/
|
|
19
|
+
env/
|
|
20
|
+
ENV/
|
|
21
|
+
|
|
22
|
+
# Testing / coverage
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.mypy_cache/
|
|
25
|
+
.ruff_cache/
|
|
26
|
+
.coverage
|
|
27
|
+
.coverage.*
|
|
28
|
+
coverage.xml
|
|
29
|
+
htmlcov/
|
|
30
|
+
.tox/
|
|
31
|
+
.nox/
|
|
32
|
+
|
|
33
|
+
# Generated fixtures (rebuilt by tests/fixtures/build_fixtures.py)
|
|
34
|
+
tests/fixtures/*.docx
|
|
35
|
+
|
|
36
|
+
# Build artifacts from docs
|
|
37
|
+
site/
|
|
38
|
+
docs/_build/
|
|
39
|
+
|
|
40
|
+
# IDE / editor
|
|
41
|
+
.idea/
|
|
42
|
+
.vscode/
|
|
43
|
+
*.swp
|
|
44
|
+
*.swo
|
|
45
|
+
.DS_Store
|
|
46
|
+
|
|
47
|
+
# uv
|
|
48
|
+
# Keep uv.lock committed; ignore uv's local cache only
|
|
49
|
+
.uv-cache/
|
|
50
|
+
|
|
51
|
+
# Reference artifacts (extracted from docx-skill-files.zip for inspection)
|
|
52
|
+
.skill-ref/
|
|
53
|
+
docx-skill-files.zip
|
docx_plus-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tom Villani, PhD
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
docx_plus-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docx_plus
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: OOXML-level extensions for python-docx: style cascade, content controls, fields.
|
|
5
|
+
Project-URL: Homepage, https://github.com/thomas-villani/docx-plus
|
|
6
|
+
Project-URL: Documentation, https://thomas-villani.github.io/docx-plus/
|
|
7
|
+
Project-URL: Repository, https://github.com/thomas-villani/docx-plus
|
|
8
|
+
Project-URL: Issues, https://github.com/thomas-villani/docx-plus/issues
|
|
9
|
+
Author-email: "Tom Villani, PhD" <tomrhobus@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: docx,forms,ooxml,python-docx,styles,word
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Office/Business
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: lxml>=4.9
|
|
27
|
+
Requires-Dist: python-docx>=1.0.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: lxml-stubs>=0.5; extra == 'dev'
|
|
30
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'dev'
|
|
31
|
+
Requires-Dist: mkdocs>=1.6; extra == 'dev'
|
|
32
|
+
Requires-Dist: mkdocstrings[python]>=0.26; extra == 'dev'
|
|
33
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# docx_plus
|
|
40
|
+
|
|
41
|
+
OOXML-level extensions for [python-docx](https://python-docx.readthedocs.io/).
|
|
42
|
+
Composes with python-docx rather than replacing it: callers keep their
|
|
43
|
+
`Document` object and use `docx_plus` for the operations python-docx
|
|
44
|
+
can't reach.
|
|
45
|
+
|
|
46
|
+
**v0.1 capabilities**:
|
|
47
|
+
|
|
48
|
+
- **Style cascade**: read the effective formatting that would apply to
|
|
49
|
+
any paragraph/run/cell, with per-field provenance; modify styles in
|
|
50
|
+
the Word-native way rather than scattering direct formatting.
|
|
51
|
+
- **Content controls**: build text / dropdown / date / checkbox
|
|
52
|
+
controls with `FormBuilder`; read their values back; round-trip them
|
|
53
|
+
through save/reopen.
|
|
54
|
+
- **Fields**: insert PAGE / NUMPAGES / DATE / generic complex fields;
|
|
55
|
+
mark fields dirty so Word recalculates them on next open.
|
|
56
|
+
- **Protection**: enforce form-fill, read-only, comments-only, or
|
|
57
|
+
tracked-changes mode at the document level.
|
|
58
|
+
|
|
59
|
+
> **Status:** v0.1 complete. Pre-publication — not yet on PyPI. Read
|
|
60
|
+
> [`SPEC.md`](SPEC.md) for the API contract and
|
|
61
|
+
> [`IMPLEMENTATION.md`](IMPLEMENTATION.md) for the build plan.
|
|
62
|
+
|
|
63
|
+
## Install (development)
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
git clone https://github.com/thomas-villani/docx-plus.git
|
|
67
|
+
cd docx-plus
|
|
68
|
+
uv sync --extra dev # or: pip install -e ".[dev]"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 60-second quickstart
|
|
72
|
+
|
|
73
|
+
### Inspect: why does this paragraph look the way it does?
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from docx import Document
|
|
77
|
+
from docx_plus.styles import resolve_effective_formatting
|
|
78
|
+
|
|
79
|
+
doc = Document("report.docx")
|
|
80
|
+
p = doc.paragraphs[0]
|
|
81
|
+
|
|
82
|
+
resolved = resolve_effective_formatting(p, include_provenance=True)
|
|
83
|
+
print(resolved.style_name) # e.g. "Title"
|
|
84
|
+
print(resolved.font_size) # e.g. 28.0 (points)
|
|
85
|
+
print(resolved.bold) # True / False / None
|
|
86
|
+
print(resolved.provenance["font_size"]) # FormattingSource(layer='paragraphStyle', ...)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
`ResolvedFormatting` carries every formatting field that the OOXML
|
|
90
|
+
cascade can set — `font_name`, `font_size`, `bold`, `italic`, `color_rgb`,
|
|
91
|
+
`alignment`, `indent_*`, `spacing_*`, `line_spacing`, plus run-level
|
|
92
|
+
toggles. With `include_provenance=True`, every populated field is
|
|
93
|
+
keyed in `.provenance` to the cascade layer (and style ID) that
|
|
94
|
+
contributed it. That's how you answer "why is this paragraph 14pt
|
|
95
|
+
italic?" — the provenance tells you exactly which style in the
|
|
96
|
+
basedOn chain set the size and whether the italic came through XOR.
|
|
97
|
+
|
|
98
|
+
### Modify: define a custom heading and apply it
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from docx import Document
|
|
102
|
+
from docx_plus.styles import create_style, apply_style
|
|
103
|
+
|
|
104
|
+
doc = Document()
|
|
105
|
+
create_style(
|
|
106
|
+
doc, "BrandHeading",
|
|
107
|
+
style_type="paragraph",
|
|
108
|
+
based_on="Heading1",
|
|
109
|
+
font_name="Inter",
|
|
110
|
+
font_size=18.0,
|
|
111
|
+
color_rgb="2F5496",
|
|
112
|
+
bold=True,
|
|
113
|
+
spacing_after=240,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
p = doc.add_paragraph("Hello, world")
|
|
117
|
+
apply_style(p, "BrandHeading")
|
|
118
|
+
doc.save("out.docx")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
This is the Word-native workflow: define a style, apply it. Changing
|
|
122
|
+
the style later changes every paragraph that uses it, not just the
|
|
123
|
+
ones you remember to update.
|
|
124
|
+
|
|
125
|
+
### Ensure: materialise a built-in latent style
|
|
126
|
+
|
|
127
|
+
Word's built-ins (`Heading1`–`Heading9`, `Title`, `Quote`, `TOC1`–`TOC9`,
|
|
128
|
+
`FootnoteText`, `BlockText`, `PlainText`, …) are *latent* — defined by
|
|
129
|
+
Word's defaults but not actually present in `styles.xml` until they're
|
|
130
|
+
used. `ensure_style` knows about **107** of them, with defaults
|
|
131
|
+
extracted from real Word-saved samples (not guessed):
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from docx import Document
|
|
135
|
+
from docx_plus.styles import ensure_style, apply_style
|
|
136
|
+
|
|
137
|
+
doc = Document()
|
|
138
|
+
ensure_style(doc, "Heading1") # idempotent — materialises if absent
|
|
139
|
+
ensure_style(doc, "Heading1") # ...no-op the second time
|
|
140
|
+
ensure_style(doc, "TOC2") # also works for less-common built-ins
|
|
141
|
+
ensure_style(doc, "BlockText")
|
|
142
|
+
apply_style(doc.add_paragraph("Intro"), "Heading1")
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
The full list is tiered in [Architecture §5](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/#5-built-in-styles-table)
|
|
146
|
+
— Core/A–G cover essentially every style a Word user reaches for.
|
|
147
|
+
|
|
148
|
+
For documents authored elsewhere where IDs may not match (e.g. style
|
|
149
|
+
named `"Heading 1"` with a space), `ensure_style(doc, "Heading1",
|
|
150
|
+
match_existing=True)` will find the existing definition via case- and
|
|
151
|
+
space-insensitive matching, or use [`remap_styles`](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/#4-style-remapping-phase-35)
|
|
152
|
+
for document-wide normalisation.
|
|
153
|
+
|
|
154
|
+
### Forms: build a fillable document with `FormBuilder`
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from docx_plus.controls import FormBuilder
|
|
158
|
+
|
|
159
|
+
fb = FormBuilder() # or FormBuilder("template.docx")
|
|
160
|
+
fb.doc.add_heading("New employee form", level=1)
|
|
161
|
+
|
|
162
|
+
p = fb.doc.add_paragraph("Full name: ")
|
|
163
|
+
fb.add_text_control(p, tag="full_name", placeholder="Type your name")
|
|
164
|
+
|
|
165
|
+
p = fb.doc.add_paragraph("Department: ")
|
|
166
|
+
fb.add_dropdown(p, tag="dept", items=["Engineering", "Design", "Ops"])
|
|
167
|
+
|
|
168
|
+
p = fb.doc.add_paragraph("Start date: ")
|
|
169
|
+
fb.add_date_picker(p, tag="start_date", date_format="M/d/yyyy")
|
|
170
|
+
|
|
171
|
+
p = fb.doc.add_paragraph("Remote? ")
|
|
172
|
+
fb.add_checkbox(p, tag="remote", checked=False)
|
|
173
|
+
|
|
174
|
+
fb.save("form.docx")
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Read or update an existing form's values with `read_controls` /
|
|
178
|
+
`set_control_value`:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from docx import Document
|
|
182
|
+
from docx_plus.controls import read_controls, set_control_value
|
|
183
|
+
|
|
184
|
+
doc = Document("form.docx")
|
|
185
|
+
set_control_value(doc, "full_name", "Ada Lovelace")
|
|
186
|
+
set_control_value(doc, "dept", "Engineering")
|
|
187
|
+
doc.save("form_filled.docx")
|
|
188
|
+
|
|
189
|
+
values = read_controls(Document("form_filled.docx"))
|
|
190
|
+
print(values["full_name"].value) # 'Ada Lovelace'
|
|
191
|
+
print(values["dept"].value) # 'Engineering'
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Fields and protection: page numbers + lock-down
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from docx import Document
|
|
198
|
+
from docx_plus.fields import add_page_number_field, mark_fields_dirty
|
|
199
|
+
from docx_plus.protection import protect_document
|
|
200
|
+
|
|
201
|
+
doc = Document()
|
|
202
|
+
p = doc.add_paragraph("Page ")
|
|
203
|
+
add_page_number_field(p)
|
|
204
|
+
p.add_run(" of ")
|
|
205
|
+
add_page_number_field(p, field="NUMPAGES")
|
|
206
|
+
|
|
207
|
+
mark_fields_dirty(doc) # Word recalculates fields on open
|
|
208
|
+
protect_document(doc, mode="forms") # only content controls editable
|
|
209
|
+
|
|
210
|
+
doc.save("report.docx")
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
`add_date_field` and the generic `add_field(instruction=..., initial_text=...)`
|
|
214
|
+
cover dates and any other complex field (TOC, REF, MERGEFIELD, …).
|
|
215
|
+
`unprotect_document(doc)` removes any protection;
|
|
216
|
+
`is_protected(doc)` is a one-liner predicate.
|
|
217
|
+
|
|
218
|
+
## What's next
|
|
219
|
+
|
|
220
|
+
v0.1 ships the four capabilities listed at the top of this README.
|
|
221
|
+
The [`v0.2 deferred list`](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/)
|
|
222
|
+
(SPEC §15) tracks what comes after — anchored comments, footnotes /
|
|
223
|
+
endnotes, bookmarks and cross-references, a `sections/` API for
|
|
224
|
+
columns and mid-document section breaks, content-control data binding
|
|
225
|
+
to Custom XML Parts, theme writing, and password-protected forms.
|
|
226
|
+
Open an issue if your use case needs any of these and you'd like to
|
|
227
|
+
help shape the design.
|
|
228
|
+
|
|
229
|
+
<details>
|
|
230
|
+
<summary>Build phases (for contributors)</summary>
|
|
231
|
+
|
|
232
|
+
| Phase | Deliverable | Status |
|
|
233
|
+
|---|---|---|
|
|
234
|
+
| 1 | Foundation (`core/ns`, `core/oxml`, `core/ids`, `_testing/`) | ✓ complete |
|
|
235
|
+
| 2 | Style inspection (`styles/inspect`, `styles/theme`) | ✓ complete |
|
|
236
|
+
| 3 | Style modification (`styles/modify`) | ✓ complete |
|
|
237
|
+
| 3.5 | Style remapping (`find_matching_style`, `remap_styles`, `ensure_style(match_existing=)`) | ✓ complete |
|
|
238
|
+
| 4 | Content controls (`controls/`) | ✓ complete |
|
|
239
|
+
| 5 | Fields + document protection (`fields/`, `protection/`) | ✓ complete |
|
|
240
|
+
| 6 | Polish — examples, headless LibreOffice smoke tests, CI doc build | ✓ complete |
|
|
241
|
+
|
|
242
|
+
</details>
|
|
243
|
+
|
|
244
|
+
## Documentation
|
|
245
|
+
|
|
246
|
+
Full docs (rendered by [MkDocs](https://www.mkdocs.org) +
|
|
247
|
+
[mkdocstrings](https://mkdocstrings.github.io)) are published at
|
|
248
|
+
<https://thomas-villani.github.io/docx-plus/>.
|
|
249
|
+
|
|
250
|
+
- [Architecture](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/)
|
|
251
|
+
— module layout, cascade algorithm, schema-strict insertion, error
|
|
252
|
+
hierarchy, invariants
|
|
253
|
+
- [API Index](https://thomas-villani.github.io/docx-plus/API/) —
|
|
254
|
+
hand-curated index of every public symbol with links to the
|
|
255
|
+
auto-generated reference
|
|
256
|
+
- [Test Gaps](https://thomas-villani.github.io/docx-plus/TEST_GAPS/) —
|
|
257
|
+
honest accounting of where the test suite has real holes (snapshot
|
|
258
|
+
at end of Phase 5)
|
|
259
|
+
- Per-module API reference lives under
|
|
260
|
+
<https://thomas-villani.github.io/docx-plus/reference/>;
|
|
261
|
+
`uv run mkdocs serve` to browse locally.
|
|
262
|
+
|
|
263
|
+
## License
|
|
264
|
+
|
|
265
|
+
MIT. Copyright (c) 2026 Tom Villani, PhD. See [`LICENSE`](LICENSE).
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# docx_plus
|
|
2
|
+
|
|
3
|
+
OOXML-level extensions for [python-docx](https://python-docx.readthedocs.io/).
|
|
4
|
+
Composes with python-docx rather than replacing it: callers keep their
|
|
5
|
+
`Document` object and use `docx_plus` for the operations python-docx
|
|
6
|
+
can't reach.
|
|
7
|
+
|
|
8
|
+
**v0.1 capabilities**:
|
|
9
|
+
|
|
10
|
+
- **Style cascade**: read the effective formatting that would apply to
|
|
11
|
+
any paragraph/run/cell, with per-field provenance; modify styles in
|
|
12
|
+
the Word-native way rather than scattering direct formatting.
|
|
13
|
+
- **Content controls**: build text / dropdown / date / checkbox
|
|
14
|
+
controls with `FormBuilder`; read their values back; round-trip them
|
|
15
|
+
through save/reopen.
|
|
16
|
+
- **Fields**: insert PAGE / NUMPAGES / DATE / generic complex fields;
|
|
17
|
+
mark fields dirty so Word recalculates them on next open.
|
|
18
|
+
- **Protection**: enforce form-fill, read-only, comments-only, or
|
|
19
|
+
tracked-changes mode at the document level.
|
|
20
|
+
|
|
21
|
+
> **Status:** v0.1 complete. Pre-publication — not yet on PyPI. Read
|
|
22
|
+
> [`SPEC.md`](SPEC.md) for the API contract and
|
|
23
|
+
> [`IMPLEMENTATION.md`](IMPLEMENTATION.md) for the build plan.
|
|
24
|
+
|
|
25
|
+
## Install (development)
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
git clone https://github.com/thomas-villani/docx-plus.git
|
|
29
|
+
cd docx-plus
|
|
30
|
+
uv sync --extra dev # or: pip install -e ".[dev]"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## 60-second quickstart
|
|
34
|
+
|
|
35
|
+
### Inspect: why does this paragraph look the way it does?
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from docx import Document
|
|
39
|
+
from docx_plus.styles import resolve_effective_formatting
|
|
40
|
+
|
|
41
|
+
doc = Document("report.docx")
|
|
42
|
+
p = doc.paragraphs[0]
|
|
43
|
+
|
|
44
|
+
resolved = resolve_effective_formatting(p, include_provenance=True)
|
|
45
|
+
print(resolved.style_name) # e.g. "Title"
|
|
46
|
+
print(resolved.font_size) # e.g. 28.0 (points)
|
|
47
|
+
print(resolved.bold) # True / False / None
|
|
48
|
+
print(resolved.provenance["font_size"]) # FormattingSource(layer='paragraphStyle', ...)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
`ResolvedFormatting` carries every formatting field that the OOXML
|
|
52
|
+
cascade can set — `font_name`, `font_size`, `bold`, `italic`, `color_rgb`,
|
|
53
|
+
`alignment`, `indent_*`, `spacing_*`, `line_spacing`, plus run-level
|
|
54
|
+
toggles. With `include_provenance=True`, every populated field is
|
|
55
|
+
keyed in `.provenance` to the cascade layer (and style ID) that
|
|
56
|
+
contributed it. That's how you answer "why is this paragraph 14pt
|
|
57
|
+
italic?" — the provenance tells you exactly which style in the
|
|
58
|
+
basedOn chain set the size and whether the italic came through XOR.
|
|
59
|
+
|
|
60
|
+
### Modify: define a custom heading and apply it
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from docx import Document
|
|
64
|
+
from docx_plus.styles import create_style, apply_style
|
|
65
|
+
|
|
66
|
+
doc = Document()
|
|
67
|
+
create_style(
|
|
68
|
+
doc, "BrandHeading",
|
|
69
|
+
style_type="paragraph",
|
|
70
|
+
based_on="Heading1",
|
|
71
|
+
font_name="Inter",
|
|
72
|
+
font_size=18.0,
|
|
73
|
+
color_rgb="2F5496",
|
|
74
|
+
bold=True,
|
|
75
|
+
spacing_after=240,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
p = doc.add_paragraph("Hello, world")
|
|
79
|
+
apply_style(p, "BrandHeading")
|
|
80
|
+
doc.save("out.docx")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
This is the Word-native workflow: define a style, apply it. Changing
|
|
84
|
+
the style later changes every paragraph that uses it, not just the
|
|
85
|
+
ones you remember to update.
|
|
86
|
+
|
|
87
|
+
### Ensure: materialise a built-in latent style
|
|
88
|
+
|
|
89
|
+
Word's built-ins (`Heading1`–`Heading9`, `Title`, `Quote`, `TOC1`–`TOC9`,
|
|
90
|
+
`FootnoteText`, `BlockText`, `PlainText`, …) are *latent* — defined by
|
|
91
|
+
Word's defaults but not actually present in `styles.xml` until they're
|
|
92
|
+
used. `ensure_style` knows about **107** of them, with defaults
|
|
93
|
+
extracted from real Word-saved samples (not guessed):
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from docx import Document
|
|
97
|
+
from docx_plus.styles import ensure_style, apply_style
|
|
98
|
+
|
|
99
|
+
doc = Document()
|
|
100
|
+
ensure_style(doc, "Heading1") # idempotent — materialises if absent
|
|
101
|
+
ensure_style(doc, "Heading1") # ...no-op the second time
|
|
102
|
+
ensure_style(doc, "TOC2") # also works for less-common built-ins
|
|
103
|
+
ensure_style(doc, "BlockText")
|
|
104
|
+
apply_style(doc.add_paragraph("Intro"), "Heading1")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
The full list is tiered in [Architecture §5](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/#5-built-in-styles-table)
|
|
108
|
+
— Core/A–G cover essentially every style a Word user reaches for.
|
|
109
|
+
|
|
110
|
+
For documents authored elsewhere where IDs may not match (e.g. style
|
|
111
|
+
named `"Heading 1"` with a space), `ensure_style(doc, "Heading1",
|
|
112
|
+
match_existing=True)` will find the existing definition via case- and
|
|
113
|
+
space-insensitive matching, or use [`remap_styles`](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/#4-style-remapping-phase-35)
|
|
114
|
+
for document-wide normalisation.
|
|
115
|
+
|
|
116
|
+
### Forms: build a fillable document with `FormBuilder`
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from docx_plus.controls import FormBuilder
|
|
120
|
+
|
|
121
|
+
fb = FormBuilder() # or FormBuilder("template.docx")
|
|
122
|
+
fb.doc.add_heading("New employee form", level=1)
|
|
123
|
+
|
|
124
|
+
p = fb.doc.add_paragraph("Full name: ")
|
|
125
|
+
fb.add_text_control(p, tag="full_name", placeholder="Type your name")
|
|
126
|
+
|
|
127
|
+
p = fb.doc.add_paragraph("Department: ")
|
|
128
|
+
fb.add_dropdown(p, tag="dept", items=["Engineering", "Design", "Ops"])
|
|
129
|
+
|
|
130
|
+
p = fb.doc.add_paragraph("Start date: ")
|
|
131
|
+
fb.add_date_picker(p, tag="start_date", date_format="M/d/yyyy")
|
|
132
|
+
|
|
133
|
+
p = fb.doc.add_paragraph("Remote? ")
|
|
134
|
+
fb.add_checkbox(p, tag="remote", checked=False)
|
|
135
|
+
|
|
136
|
+
fb.save("form.docx")
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Read or update an existing form's values with `read_controls` /
|
|
140
|
+
`set_control_value`:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from docx import Document
|
|
144
|
+
from docx_plus.controls import read_controls, set_control_value
|
|
145
|
+
|
|
146
|
+
doc = Document("form.docx")
|
|
147
|
+
set_control_value(doc, "full_name", "Ada Lovelace")
|
|
148
|
+
set_control_value(doc, "dept", "Engineering")
|
|
149
|
+
doc.save("form_filled.docx")
|
|
150
|
+
|
|
151
|
+
values = read_controls(Document("form_filled.docx"))
|
|
152
|
+
print(values["full_name"].value) # 'Ada Lovelace'
|
|
153
|
+
print(values["dept"].value) # 'Engineering'
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Fields and protection: page numbers + lock-down
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from docx import Document
|
|
160
|
+
from docx_plus.fields import add_page_number_field, mark_fields_dirty
|
|
161
|
+
from docx_plus.protection import protect_document
|
|
162
|
+
|
|
163
|
+
doc = Document()
|
|
164
|
+
p = doc.add_paragraph("Page ")
|
|
165
|
+
add_page_number_field(p)
|
|
166
|
+
p.add_run(" of ")
|
|
167
|
+
add_page_number_field(p, field="NUMPAGES")
|
|
168
|
+
|
|
169
|
+
mark_fields_dirty(doc) # Word recalculates fields on open
|
|
170
|
+
protect_document(doc, mode="forms") # only content controls editable
|
|
171
|
+
|
|
172
|
+
doc.save("report.docx")
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
`add_date_field` and the generic `add_field(instruction=..., initial_text=...)`
|
|
176
|
+
cover dates and any other complex field (TOC, REF, MERGEFIELD, …).
|
|
177
|
+
`unprotect_document(doc)` removes any protection;
|
|
178
|
+
`is_protected(doc)` is a one-liner predicate.
|
|
179
|
+
|
|
180
|
+
## What's next
|
|
181
|
+
|
|
182
|
+
v0.1 ships the four capabilities listed at the top of this README.
|
|
183
|
+
The [`v0.2 deferred list`](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/)
|
|
184
|
+
(SPEC §15) tracks what comes after — anchored comments, footnotes /
|
|
185
|
+
endnotes, bookmarks and cross-references, a `sections/` API for
|
|
186
|
+
columns and mid-document section breaks, content-control data binding
|
|
187
|
+
to Custom XML Parts, theme writing, and password-protected forms.
|
|
188
|
+
Open an issue if your use case needs any of these and you'd like to
|
|
189
|
+
help shape the design.
|
|
190
|
+
|
|
191
|
+
<details>
|
|
192
|
+
<summary>Build phases (for contributors)</summary>
|
|
193
|
+
|
|
194
|
+
| Phase | Deliverable | Status |
|
|
195
|
+
|---|---|---|
|
|
196
|
+
| 1 | Foundation (`core/ns`, `core/oxml`, `core/ids`, `_testing/`) | ✓ complete |
|
|
197
|
+
| 2 | Style inspection (`styles/inspect`, `styles/theme`) | ✓ complete |
|
|
198
|
+
| 3 | Style modification (`styles/modify`) | ✓ complete |
|
|
199
|
+
| 3.5 | Style remapping (`find_matching_style`, `remap_styles`, `ensure_style(match_existing=)`) | ✓ complete |
|
|
200
|
+
| 4 | Content controls (`controls/`) | ✓ complete |
|
|
201
|
+
| 5 | Fields + document protection (`fields/`, `protection/`) | ✓ complete |
|
|
202
|
+
| 6 | Polish — examples, headless LibreOffice smoke tests, CI doc build | ✓ complete |
|
|
203
|
+
|
|
204
|
+
</details>
|
|
205
|
+
|
|
206
|
+
## Documentation
|
|
207
|
+
|
|
208
|
+
Full docs (rendered by [MkDocs](https://www.mkdocs.org) +
|
|
209
|
+
[mkdocstrings](https://mkdocstrings.github.io)) are published at
|
|
210
|
+
<https://thomas-villani.github.io/docx-plus/>.
|
|
211
|
+
|
|
212
|
+
- [Architecture](https://thomas-villani.github.io/docx-plus/ARCHITECTURE/)
|
|
213
|
+
— module layout, cascade algorithm, schema-strict insertion, error
|
|
214
|
+
hierarchy, invariants
|
|
215
|
+
- [API Index](https://thomas-villani.github.io/docx-plus/API/) —
|
|
216
|
+
hand-curated index of every public symbol with links to the
|
|
217
|
+
auto-generated reference
|
|
218
|
+
- [Test Gaps](https://thomas-villani.github.io/docx-plus/TEST_GAPS/) —
|
|
219
|
+
honest accounting of where the test suite has real holes (snapshot
|
|
220
|
+
at end of Phase 5)
|
|
221
|
+
- Per-module API reference lives under
|
|
222
|
+
<https://thomas-villani.github.io/docx-plus/reference/>;
|
|
223
|
+
`uv run mkdocs serve` to browse locally.
|
|
224
|
+
|
|
225
|
+
## License
|
|
226
|
+
|
|
227
|
+
MIT. Copyright (c) 2026 Tom Villani, PhD. See [`LICENSE`](LICENSE).
|