docx-plus 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docx_plus/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """docx_plus — OOXML-level extensions for python-docx.
2
+
3
+ See ``SPEC.md`` at the project root for the public API contract.
4
+ """
5
+
6
+ from docx_plus.core import DocxPlusError
7
+
8
+ __all__ = ["DocxPlusError"]
9
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Internal test helpers. Not part of the public API."""
@@ -0,0 +1,133 @@
1
+ """Shared OOXML assertion helpers used across the test suite.
2
+
3
+ Internal API — not part of the public surface. Built out lazily as later
4
+ phases introduce call sites (SPEC §10).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ from lxml import etree
12
+
13
+ from docx_plus.controls.read import ControlType, _classify_sdt
14
+ from docx_plus.core.ns import qn
15
+ from docx_plus.core.oxml import xpath
16
+
17
+ if TYPE_CHECKING:
18
+ from docx.document import Document
19
+
20
+
21
+ def assert_ids_unique(doc: Document) -> None:
22
+ """Assert every ``w:id`` on a ``w:sdt`` descendant is unique within the doc.
23
+
24
+ Args:
25
+ doc: python-docx Document to inspect.
26
+
27
+ Raises:
28
+ AssertionError: If any ID appears more than once.
29
+ """
30
+ seen: dict[int, int] = {}
31
+ for id_el in xpath(doc.element.body, ".//w:sdt/w:sdtPr/w:id"):
32
+ raw = id_el.get(qn("w:val"))
33
+ if raw is None:
34
+ continue
35
+ try:
36
+ value = int(raw)
37
+ except ValueError:
38
+ continue
39
+ seen[value] = seen.get(value, 0) + 1
40
+ duplicates = {v: count for v, count in seen.items() if count > 1}
41
+ assert not duplicates, f"duplicate SDT w:id values: {duplicates}"
42
+
43
+
44
+ def assert_style_defined(doc: Document, style_id: str) -> None:
45
+ """Assert ``style_id`` is materialized in ``word/styles.xml``.
46
+
47
+ Args:
48
+ doc: python-docx Document to inspect.
49
+ style_id: The style's ``w:styleId`` attribute value.
50
+
51
+ Raises:
52
+ AssertionError: If no ``w:style`` element with that id exists.
53
+ """
54
+ styles_element = doc.styles.element
55
+ matches = xpath(styles_element, ".//w:style[@w:styleId=$sid]", sid=style_id)
56
+ assert matches, f"style {style_id!r} not defined in styles.xml"
57
+
58
+
59
+ def count_controls(
60
+ doc: Document,
61
+ control_type: ControlType | None = None,
62
+ ) -> int:
63
+ """Count ``w:sdt`` elements in the document body.
64
+
65
+ Args:
66
+ doc: python-docx Document to inspect.
67
+ control_type: If given, only count controls of this type
68
+ (``"text"``, ``"dropdown"``, ``"combobox"``, ``"date"``,
69
+ ``"checkbox"``). ``None`` (default) counts every recognised SDT.
70
+
71
+ Returns:
72
+ The number of matching content controls.
73
+ """
74
+ body: Any = doc.element.body
75
+ count = 0
76
+ for sdt in xpath(body, ".//w:sdt"):
77
+ if not isinstance(sdt, etree._Element):
78
+ continue
79
+ kind = _classify_sdt(sdt)
80
+ if kind is None:
81
+ continue
82
+ if control_type is None or kind == control_type:
83
+ count += 1
84
+ return count
85
+
86
+
87
+ def assert_protected(doc: Document, mode: str | None = None) -> None:
88
+ """Assert ``w:documentProtection`` is enforced in ``settings.xml``.
89
+
90
+ Args:
91
+ doc: python-docx Document to inspect.
92
+ mode: If given, also assert ``w:edit`` matches this value (one of
93
+ ``"forms"``, ``"readOnly"``, ``"comments"``, ``"trackedChanges"``).
94
+ ``None`` (default) only checks presence + enforcement.
95
+
96
+ Raises:
97
+ AssertionError: If protection is absent, enforcement is not ``"1"``,
98
+ or ``mode`` was supplied and does not match.
99
+ """
100
+ settings = doc.settings.element
101
+ element = settings.find(qn("w:documentProtection"))
102
+ assert element is not None, "w:documentProtection is not present in settings.xml"
103
+ enforcement = element.get(qn("w:enforcement"))
104
+ assert enforcement == "1", f"w:enforcement is {enforcement!r}, expected '1'"
105
+ if mode is not None:
106
+ actual = element.get(qn("w:edit"))
107
+ assert actual == mode, f"w:edit is {actual!r}, expected {mode!r}"
108
+
109
+
110
+ def assert_field_dirty(doc: Document) -> None:
111
+ """Assert ``w:updateFields="true"`` is set in ``settings.xml``.
112
+
113
+ Args:
114
+ doc: python-docx Document to inspect.
115
+
116
+ Raises:
117
+ AssertionError: If the element is absent or its ``w:val`` is not
118
+ ``"true"``.
119
+ """
120
+ settings = doc.settings.element
121
+ element = settings.find(qn("w:updateFields"))
122
+ assert element is not None, "w:updateFields is not present in settings.xml"
123
+ value = element.get(qn("w:val"))
124
+ assert value == "true", f"w:updateFields/@w:val is {value!r}, expected 'true'"
125
+
126
+
127
+ __all__ = [
128
+ "assert_field_dirty",
129
+ "assert_ids_unique",
130
+ "assert_protected",
131
+ "assert_style_defined",
132
+ "count_controls",
133
+ ]
@@ -0,0 +1,35 @@
1
+ """Content controls / fillable forms (Phase 4)."""
2
+
3
+ from docx_plus.controls.builder import (
4
+ DropdownItem,
5
+ FormBuilder,
6
+ InvalidDropdownItemError,
7
+ MissingNamespaceError,
8
+ )
9
+ from docx_plus.controls.read import (
10
+ ControlNotFoundError,
11
+ ControlType,
12
+ ControlTypeError,
13
+ ControlValue,
14
+ DuplicateTagError,
15
+ ValueNotInListError,
16
+ clear_control,
17
+ read_controls,
18
+ set_control_value,
19
+ )
20
+
21
+ __all__ = [
22
+ "ControlNotFoundError",
23
+ "ControlType",
24
+ "ControlTypeError",
25
+ "ControlValue",
26
+ "DropdownItem",
27
+ "DuplicateTagError",
28
+ "FormBuilder",
29
+ "InvalidDropdownItemError",
30
+ "MissingNamespaceError",
31
+ "ValueNotInListError",
32
+ "clear_control",
33
+ "read_controls",
34
+ "set_control_value",
35
+ ]
@@ -0,0 +1,404 @@
1
+ """Build Word content controls (SDTs) — text, dropdown, date, checkbox.
2
+
3
+ python-docx stops at the paragraph/run layer; content controls are ``w:sdt``
4
+ elements that have to be synthesised at the lxml level. :class:`FormBuilder`
5
+ wraps a python-docx :class:`~docx.document.Document` and provides ``add_*``
6
+ methods that emit valid ``w:sdt`` blocks and append them inline to a
7
+ paragraph.
8
+
9
+ The builder handles the three failure modes the docx-forms skill prototype
10
+ identified:
11
+
12
+ 1. ``w:id`` collisions — every id flows through :class:`IdRegistry`.
13
+ 2. The latent ``PlaceholderText`` style — materialised on construction so the
14
+ grey placeholder text actually renders.
15
+ 3. ``w14`` namespace declaration on the document root — required by
16
+ ``w14:checkbox``; verified at construction time.
17
+
18
+ This module imports only from ``docx_plus.core`` (SPEC §9.1). The
19
+ ``PlaceholderText`` style definition is duplicated here intentionally rather
20
+ than reused from :mod:`docx_plus.styles.modify`.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+ from typing import TYPE_CHECKING, Any
27
+
28
+ from docx import Document
29
+ from lxml import etree
30
+
31
+ from docx_plus.core import DocxPlusError
32
+ from docx_plus.core.ids import IdRegistry
33
+ from docx_plus.core.ns import W14
34
+ from docx_plus.core.oxml import el, sub, xpath
35
+
36
+ if TYPE_CHECKING:
37
+ from docx.document import Document as DocxDocument
38
+ from docx.text.paragraph import Paragraph
39
+
40
+ DropdownItem = str | tuple[str, str]
41
+
42
+
43
+ # --------------------------------------------------------------------------
44
+ # Errors.
45
+ # --------------------------------------------------------------------------
46
+
47
+
48
+ class MissingNamespaceError(DocxPlusError):
49
+ """Raised when a required namespace is not declared on the document root."""
50
+
51
+
52
+ class InvalidDropdownItemError(DocxPlusError, TypeError):
53
+ """Raised when a dropdown ``items`` entry is not a ``str`` or ``(str, str)``.
54
+
55
+ Subclasses ``TypeError`` so existing ``except TypeError:`` clauses still
56
+ catch it; also subclasses :class:`DocxPlusError` per SPEC §9.7.
57
+ """
58
+
59
+
60
+ # --------------------------------------------------------------------------
61
+ # Module constants — match Word's defaults so the rendered controls look
62
+ # right before the user touches them in Word.
63
+ # --------------------------------------------------------------------------
64
+
65
+ _PLACEHOLDER_STYLE_ID = "PlaceholderText"
66
+ _PLACEHOLDER_STYLE_NAME = "Placeholder Text"
67
+
68
+ # Match Word's default checkbox glyphs (MS Gothic, U+2612 / U+2610).
69
+ _CHECKBOX_CHECKED_GLYPH = "☒"
70
+ _CHECKBOX_UNCHECKED_GLYPH = "☐"
71
+ _CHECKBOX_CHECKED_HEX = "2612"
72
+ _CHECKBOX_UNCHECKED_HEX = "2610"
73
+ _CHECKBOX_FONT = "MS Gothic"
74
+
75
+
76
+ # --------------------------------------------------------------------------
77
+ # FormBuilder.
78
+ # --------------------------------------------------------------------------
79
+
80
+
81
+ class FormBuilder:
82
+ """Wrap a python-docx Document and add fillable content controls.
83
+
84
+ ``self.doc`` is the underlying :class:`~docx.document.Document` — use it
85
+ for ordinary document construction (headings, paragraphs, tables). Use the
86
+ ``add_*`` methods to drop content controls into paragraphs you have made.
87
+
88
+ Each ``add_*`` method appends the SDT *inline* at the end of the paragraph
89
+ you pass, so put the field's label in the paragraph text first.
90
+ """
91
+
92
+ doc: DocxDocument
93
+
94
+ def __init__(
95
+ self,
96
+ document_or_path: DocxDocument | str | os.PathLike[str] | None = None,
97
+ *,
98
+ id_registry: IdRegistry | None = None,
99
+ ) -> None:
100
+ """Open or wrap a document and prepare the builder state.
101
+
102
+ Args:
103
+ document_or_path: An open :class:`~docx.document.Document`, a path
104
+ to a ``.docx`` file to open, or ``None`` to start a blank
105
+ document.
106
+ id_registry: An existing :class:`IdRegistry` to share with other
107
+ builders. ``None`` (default) creates a fresh registry seeded
108
+ from the document's existing SDT ids.
109
+
110
+ Raises:
111
+ MissingNamespaceError: If the document root does not declare the
112
+ ``w14`` namespace (required by ``w14:checkbox``). Fresh
113
+ python-docx documents always declare it.
114
+ """
115
+ if document_or_path is None:
116
+ self.doc = Document()
117
+ elif isinstance(document_or_path, (str, os.PathLike)):
118
+ self.doc = Document(os.fspath(document_or_path))
119
+ else:
120
+ self.doc = document_or_path
121
+
122
+ self._id_registry = id_registry if id_registry is not None else IdRegistry(self.doc)
123
+ _verify_w14_declared(self.doc)
124
+ _ensure_placeholder_style(self.doc)
125
+
126
+ # -- public control builders ----------------------------------------------
127
+
128
+ def add_text_control(
129
+ self,
130
+ paragraph: Paragraph,
131
+ *,
132
+ tag: str,
133
+ alias: str | None = None,
134
+ placeholder: str = "Click to enter text",
135
+ multiline: bool = False,
136
+ ) -> etree._Element:
137
+ """Append an inline plain-text content control to ``paragraph``.
138
+
139
+ Args:
140
+ paragraph: The python-docx paragraph to append into.
141
+ tag: Stable machine-readable identifier for the control.
142
+ alias: Optional human-friendly label shown in Word's UI.
143
+ placeholder: The grey "click here" prompt rendered inside the
144
+ empty control.
145
+ multiline: If ``True``, allow hard line breaks inside the control
146
+ (use for addresses, comment boxes).
147
+
148
+ Returns:
149
+ The created ``w:sdt`` element.
150
+ """
151
+ sdt, sdt_pr, sdt_content = self._new_sdt(tag=tag, alias=alias)
152
+ sub(sdt_pr, "w:showingPlcHdr")
153
+ text_attrs: dict[str, str] = {"w:multiLine": "1"} if multiline else {}
154
+ sub(sdt_pr, "w:text", **text_attrs)
155
+
156
+ sdt_content.append(_placeholder_run(placeholder))
157
+ sdt.append(sdt_content)
158
+ paragraph._p.append(sdt)
159
+ return sdt
160
+
161
+ def add_dropdown(
162
+ self,
163
+ paragraph: Paragraph,
164
+ *,
165
+ tag: str,
166
+ items: list[DropdownItem],
167
+ alias: str | None = None,
168
+ placeholder: str = "Choose an item",
169
+ editable: bool = False,
170
+ ) -> etree._Element:
171
+ """Append a dropdown (or combobox) content control to ``paragraph``.
172
+
173
+ Args:
174
+ paragraph: The python-docx paragraph to append into.
175
+ tag: Stable machine-readable identifier for the control.
176
+ items: A list of either plain strings, or ``(display, value)``
177
+ tuples when the stored value should differ from the shown
178
+ label.
179
+ alias: Optional human-friendly label shown in Word's UI.
180
+ placeholder: The "Choose an item" prompt rendered inside the
181
+ empty control. A placeholder list-item with empty value is
182
+ also added as the first dropdown entry.
183
+ editable: If ``True``, produce a ``w:comboBox`` (user may type a
184
+ value not in the list) instead of a ``w:dropDownList``.
185
+
186
+ Returns:
187
+ The created ``w:sdt`` element.
188
+
189
+ Raises:
190
+ TypeError: If ``items`` contains anything that is not a string
191
+ or a 2-tuple of strings.
192
+ """
193
+ sdt, sdt_pr, sdt_content = self._new_sdt(tag=tag, alias=alias)
194
+ sub(sdt_pr, "w:showingPlcHdr")
195
+ list_tag = "w:comboBox" if editable else "w:dropDownList"
196
+ list_el = sub(sdt_pr, list_tag)
197
+
198
+ sub(list_el, "w:listItem", **{"w:displayText": placeholder, "w:value": ""})
199
+ for raw_item in items:
200
+ display, value = _normalise_dropdown_item(raw_item)
201
+ sub(list_el, "w:listItem", **{"w:displayText": display, "w:value": value})
202
+
203
+ sdt_content.append(_placeholder_run(placeholder))
204
+ sdt.append(sdt_content)
205
+ paragraph._p.append(sdt)
206
+ return sdt
207
+
208
+ def add_date_picker(
209
+ self,
210
+ paragraph: Paragraph,
211
+ *,
212
+ tag: str,
213
+ alias: str | None = None,
214
+ placeholder: str = "Click to select a date",
215
+ date_format: str = "M/d/yyyy",
216
+ lcid: str = "en-US",
217
+ ) -> etree._Element:
218
+ """Append a date-picker content control to ``paragraph``.
219
+
220
+ Args:
221
+ paragraph: The python-docx paragraph to append into.
222
+ tag: Stable machine-readable identifier for the control.
223
+ alias: Optional human-friendly label shown in Word's UI.
224
+ placeholder: The grey "click here" prompt rendered inside the
225
+ empty control.
226
+ date_format: Word's date-format string (e.g. ``"M/d/yyyy"``,
227
+ ``"dddd, MMMM d, yyyy"``).
228
+ lcid: Locale identifier (BCP-47 form, e.g. ``"en-US"``).
229
+
230
+ Returns:
231
+ The created ``w:sdt`` element.
232
+ """
233
+ sdt, sdt_pr, sdt_content = self._new_sdt(tag=tag, alias=alias)
234
+ sub(sdt_pr, "w:showingPlcHdr")
235
+ date_el = sub(sdt_pr, "w:date")
236
+ sub(date_el, "w:dateFormat", **{"w:val": date_format})
237
+ sub(date_el, "w:lid", **{"w:val": lcid})
238
+ sub(date_el, "w:storeMappedDataAs", **{"w:val": "dateTime"})
239
+ sub(date_el, "w:calendar", **{"w:val": "gregorian"})
240
+
241
+ sdt_content.append(_placeholder_run(placeholder))
242
+ sdt.append(sdt_content)
243
+ paragraph._p.append(sdt)
244
+ return sdt
245
+
246
+ def add_checkbox(
247
+ self,
248
+ paragraph: Paragraph,
249
+ *,
250
+ tag: str,
251
+ alias: str | None = None,
252
+ checked: bool = False,
253
+ ) -> etree._Element:
254
+ """Append a Word 2010+ ``w14:checkbox`` content control to ``paragraph``.
255
+
256
+ The visible glyph and the ``w14:checked`` flag are kept in sync, so
257
+ the box renders correctly even before Word ever opens the file.
258
+
259
+ Args:
260
+ paragraph: The python-docx paragraph to append into.
261
+ tag: Stable machine-readable identifier for the control.
262
+ alias: Optional human-friendly label shown in Word's UI.
263
+ checked: Initial checked state.
264
+
265
+ Returns:
266
+ The created ``w:sdt`` element.
267
+ """
268
+ sdt, sdt_pr, sdt_content = self._new_sdt(tag=tag, alias=alias)
269
+ checkbox = sub(sdt_pr, "w14:checkbox")
270
+ sub(checkbox, "w14:checked", **{"w14:val": "1" if checked else "0"})
271
+ sub(
272
+ checkbox,
273
+ "w14:checkedState",
274
+ **{"w14:val": _CHECKBOX_CHECKED_HEX, "w14:font": _CHECKBOX_FONT},
275
+ )
276
+ sub(
277
+ checkbox,
278
+ "w14:uncheckedState",
279
+ **{"w14:val": _CHECKBOX_UNCHECKED_HEX, "w14:font": _CHECKBOX_FONT},
280
+ )
281
+
282
+ sdt_content.append(
283
+ _checkbox_glyph_run(
284
+ _CHECKBOX_CHECKED_GLYPH if checked else _CHECKBOX_UNCHECKED_GLYPH,
285
+ ),
286
+ )
287
+ sdt.append(sdt_content)
288
+ paragraph._p.append(sdt)
289
+ return sdt
290
+
291
+ def save(self, path: str | os.PathLike[str]) -> str:
292
+ """Save the wrapped document to ``path`` and return the path as a string."""
293
+ self.doc.save(os.fspath(path))
294
+ return os.fspath(path)
295
+
296
+ # -- internals ------------------------------------------------------------
297
+
298
+ def _new_sdt(
299
+ self,
300
+ *,
301
+ tag: str,
302
+ alias: str | None,
303
+ ) -> tuple[etree._Element, etree._Element, etree._Element]:
304
+ """Build the shared ``w:sdt``/``w:sdtPr``/``w:sdtContent`` scaffold.
305
+
306
+ sdtPr child order matches the docx-forms skill prototype:
307
+ ``[alias?], tag, id, [showingPlcHdr], <type-marker>``. Caller appends
308
+ showingPlcHdr and the type marker (and finally the populated
309
+ sdtContent) in that order.
310
+ """
311
+ sdt = el("w:sdt")
312
+ sdt_pr = sub(sdt, "w:sdtPr")
313
+
314
+ if alias is not None:
315
+ sub(sdt_pr, "w:alias", **{"w:val": alias})
316
+ sub(sdt_pr, "w:tag", **{"w:val": tag})
317
+ sub(sdt_pr, "w:id", **{"w:val": str(self._id_registry.next())})
318
+
319
+ sdt_content = el("w:sdtContent")
320
+ return sdt, sdt_pr, sdt_content
321
+
322
+
323
+ # --------------------------------------------------------------------------
324
+ # Module-level helpers (private).
325
+ # --------------------------------------------------------------------------
326
+
327
+
328
+ def _placeholder_run(text: str) -> etree._Element:
329
+ """Build a ``w:r`` carrying the ``PlaceholderText`` rStyle and ``text``."""
330
+ run = el("w:r")
331
+ rpr = sub(run, "w:rPr")
332
+ sub(rpr, "w:rStyle", **{"w:val": _PLACEHOLDER_STYLE_ID})
333
+ text_el = sub(run, "w:t")
334
+ text_el.text = text
335
+ return run
336
+
337
+
338
+ def _checkbox_glyph_run(glyph: str) -> etree._Element:
339
+ """Build the ``w:r`` that renders the checkbox glyph in ``MS Gothic``."""
340
+ run = el("w:r")
341
+ rpr = sub(run, "w:rPr")
342
+ sub(
343
+ rpr,
344
+ "w:rFonts",
345
+ **{
346
+ "w:ascii": _CHECKBOX_FONT,
347
+ "w:hAnsi": _CHECKBOX_FONT,
348
+ "w:eastAsia": _CHECKBOX_FONT,
349
+ },
350
+ )
351
+ text_el = sub(run, "w:t")
352
+ text_el.text = glyph
353
+ return run
354
+
355
+
356
+ def _normalise_dropdown_item(raw: DropdownItem) -> tuple[str, str]:
357
+ """Convert ``raw`` into a ``(display, value)`` pair."""
358
+ if isinstance(raw, str):
359
+ return raw, raw
360
+ if isinstance(raw, tuple) and len(raw) == 2 and all(isinstance(p, str) for p in raw):
361
+ return raw[0], raw[1]
362
+ raise InvalidDropdownItemError(
363
+ f"dropdown item must be str or (str, str) tuple; got {type(raw).__name__}: {raw!r}",
364
+ )
365
+
366
+
367
+ def _verify_w14_declared(doc: DocxDocument) -> None:
368
+ """Raise :class:`MissingNamespaceError` if the document root lacks ``w14``."""
369
+ nsmap: dict[str | None, str] = doc.element.nsmap
370
+ declared = any(uri == W14 for uri in nsmap.values())
371
+ if not declared:
372
+ raise MissingNamespaceError(
373
+ "document root does not declare the w14 namespace; "
374
+ "w14:checkbox controls cannot be authored. Expected nsmap entry "
375
+ f"with URI {W14!r}.",
376
+ )
377
+
378
+
379
+ def _ensure_placeholder_style(doc: DocxDocument) -> None:
380
+ """Materialise the ``PlaceholderText`` character style if absent.
381
+
382
+ Independent of :func:`docx_plus.styles.modify.ensure_style` per SPEC §9.1
383
+ (controls/ may not import styles/). The definition mirrors Word's default.
384
+ """
385
+ styles_root: Any = doc.styles.element
386
+ for style in xpath(styles_root, "./w:style[@w:styleId=$sid]", sid=_PLACEHOLDER_STYLE_ID):
387
+ if isinstance(style, etree._Element):
388
+ return
389
+
390
+ style_el = el(
391
+ "w:style",
392
+ **{"w:type": "character", "w:styleId": _PLACEHOLDER_STYLE_ID},
393
+ )
394
+ sub(style_el, "w:name", **{"w:val": _PLACEHOLDER_STYLE_NAME})
395
+ sub(style_el, "w:basedOn", **{"w:val": "DefaultParagraphFont"})
396
+ sub(style_el, "w:uiPriority", **{"w:val": "99"})
397
+ sub(style_el, "w:semiHidden")
398
+ sub(style_el, "w:unhideWhenUsed")
399
+ rpr = sub(style_el, "w:rPr")
400
+ sub(rpr, "w:color", **{"w:val": "808080"})
401
+ styles_root.append(style_el)
402
+
403
+
404
+ __all__ = ["DropdownItem", "FormBuilder", "InvalidDropdownItemError", "MissingNamespaceError"]