docxrender 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, cast
5
+
6
+ from docx.document import Document as DocxDocument
7
+ from docx.enum.table import WD_TABLE_ALIGNMENT
8
+ from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK
9
+ from docx.oxml import OxmlElement # pyright: ignore[reportUnknownVariableType]
10
+ from docx.oxml.ns import qn
11
+ from docx.shared import Cm, Inches, Pt
12
+ from docx.table import Table
13
+ from docx.text.paragraph import Paragraph
14
+ from docx.text.run import Run
15
+
16
+ from docxrender.contracts import DocxStyle
17
+ from docxrender.markdown import (
18
+ MarkdownBlock,
19
+ MarkdownHeading,
20
+ MarkdownImage,
21
+ MarkdownOrderedList,
22
+ MarkdownPageBreak,
23
+ MarkdownParagraph,
24
+ MarkdownSpacer,
25
+ MarkdownTable,
26
+ )
27
+
28
+
29
+ def insert_markdown_blocks(
30
+ document: DocxDocument,
31
+ markdown_blocks: tuple[MarkdownBlock, ...],
32
+ *,
33
+ anchor_token: str,
34
+ dir_base: Path,
35
+ style: DocxStyle,
36
+ ) -> None:
37
+ anchor = _find_anchor_paragraph(document, anchor_token)
38
+ if anchor is None:
39
+ for block in markdown_blocks:
40
+ _append_block(
41
+ document,
42
+ block,
43
+ dir_base=dir_base,
44
+ style=style,
45
+ )
46
+ else:
47
+ for block in markdown_blocks:
48
+ _insert_block_before_anchor(
49
+ document,
50
+ anchor,
51
+ block,
52
+ dir_base=dir_base,
53
+ style=style,
54
+ )
55
+ _remove_paragraph(anchor)
56
+
57
+
58
+ def _find_anchor_paragraph(
59
+ document: DocxDocument,
60
+ anchor_token: str,
61
+ ) -> Paragraph | None:
62
+ for paragraph in document.paragraphs:
63
+ if paragraph.text.strip() == anchor_token:
64
+ return paragraph
65
+ return None
66
+
67
+
68
+ def _append_block(
69
+ document: DocxDocument,
70
+ block: MarkdownBlock,
71
+ *,
72
+ dir_base: Path,
73
+ style: DocxStyle,
74
+ ) -> None:
75
+ match block:
76
+ case MarkdownHeading(level=level, text=text):
77
+ paragraph = document.add_heading(level=level)
78
+ _write_heading(paragraph, text, level=level, style=style)
79
+ case MarkdownParagraph(text=text):
80
+ paragraph = document.add_paragraph()
81
+ _apply_paragraph_style(paragraph, text=text, style=style)
82
+ _write_text_with_line_breaks(
83
+ paragraph,
84
+ text,
85
+ style=style,
86
+ size_pt=_paragraph_text_size(text, style=style),
87
+ )
88
+ case MarkdownOrderedList(items=items):
89
+ for item in items:
90
+ paragraph = document.add_paragraph(style="List Number")
91
+ _apply_ordered_list_style(paragraph)
92
+ _write_text_with_line_breaks(paragraph, item, style=style)
93
+ case MarkdownTable(rows=rows):
94
+ _append_table(document, rows, style=style)
95
+ case MarkdownImage(path=path, caption=caption, width_pct=width_pct):
96
+ _append_image(
97
+ document,
98
+ dir_base / path,
99
+ caption=caption,
100
+ width_pct=width_pct,
101
+ style=style,
102
+ )
103
+ case MarkdownPageBreak():
104
+ document.add_page_break()
105
+ case MarkdownSpacer():
106
+ document.add_paragraph()
107
+
108
+
109
+ def _insert_block_before_anchor(
110
+ document: DocxDocument,
111
+ anchor: Paragraph,
112
+ block: MarkdownBlock,
113
+ *,
114
+ dir_base: Path,
115
+ style: DocxStyle,
116
+ ) -> None:
117
+ match block:
118
+ case MarkdownHeading(level=level, text=text):
119
+ paragraph = anchor.insert_paragraph_before(style=f"Heading {level}")
120
+ _write_heading(paragraph, text, level=level, style=style)
121
+ case MarkdownParagraph(text=text):
122
+ paragraph = anchor.insert_paragraph_before()
123
+ _apply_paragraph_style(paragraph, text=text, style=style)
124
+ _write_text_with_line_breaks(
125
+ paragraph,
126
+ text,
127
+ style=style,
128
+ size_pt=_paragraph_text_size(text, style=style),
129
+ )
130
+ case MarkdownOrderedList(items=items):
131
+ for item in items:
132
+ paragraph = anchor.insert_paragraph_before(style="List Number")
133
+ _apply_ordered_list_style(paragraph)
134
+ _write_text_with_line_breaks(paragraph, item, style=style)
135
+ case MarkdownTable(rows=rows):
136
+ table = _append_table(document, rows, style=style)
137
+ _insert_table_before_anchor(anchor, table)
138
+ case MarkdownImage(path=path, caption=caption, width_pct=width_pct):
139
+ paragraph_image = anchor.insert_paragraph_before()
140
+ _add_picture(paragraph_image, dir_base / path, width_pct=width_pct)
141
+ if caption:
142
+ paragraph_caption = anchor.insert_paragraph_before()
143
+ paragraph_caption.alignment = WD_ALIGN_PARAGRAPH.CENTER
144
+ _write_text_with_line_breaks(
145
+ paragraph_caption,
146
+ caption,
147
+ style=style,
148
+ size_pt=style.sizes.pt_caption,
149
+ )
150
+ case MarkdownPageBreak():
151
+ anchor.insert_paragraph_before().add_run().add_break(WD_BREAK.PAGE)
152
+ case MarkdownSpacer():
153
+ anchor.insert_paragraph_before()
154
+
155
+
156
+ def _write_heading(
157
+ paragraph: Paragraph,
158
+ text: str,
159
+ *,
160
+ level: int,
161
+ style: DocxStyle,
162
+ ) -> None:
163
+ paragraph.paragraph_format.left_indent = Pt(0)
164
+ paragraph.paragraph_format.first_line_indent = None
165
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
166
+ run = paragraph.add_run(text)
167
+ _apply_run_style(
168
+ run,
169
+ style=style,
170
+ size_pt=style.sizes.pt_heading_by_level.get(level, style.sizes.pt_body),
171
+ east_asia_font=style.fonts.font_name_heading_east_asia,
172
+ )
173
+ run.bold = True
174
+
175
+
176
+ def _write_text_with_line_breaks(
177
+ paragraph: Paragraph,
178
+ text: str,
179
+ *,
180
+ style: DocxStyle,
181
+ size_pt: float | None = None,
182
+ ) -> None:
183
+ size_effective = style.sizes.pt_body if size_pt is None else size_pt
184
+ for idx, line in enumerate(text.split("\n")):
185
+ if idx > 0:
186
+ paragraph.add_run().add_break(WD_BREAK.LINE)
187
+ run = paragraph.add_run(line)
188
+ _apply_run_style(run, style=style, size_pt=size_effective)
189
+
190
+
191
+ def _apply_run_style(
192
+ run: Run,
193
+ *,
194
+ style: DocxStyle,
195
+ size_pt: float,
196
+ east_asia_font: str | None = None,
197
+ ) -> None:
198
+ run.font.name = style.fonts.font_name_latin
199
+ run.font.size = Pt(size_pt)
200
+ run_properties = cast(Any, run)._element.get_or_add_rPr()
201
+ run_fonts = run_properties.get_or_add_rFonts()
202
+ run_fonts.set(
203
+ qn("w:eastAsia"),
204
+ east_asia_font or style.fonts.font_name_body_east_asia,
205
+ )
206
+
207
+
208
+ def _apply_paragraph_style(
209
+ paragraph: Paragraph,
210
+ *,
211
+ text: str,
212
+ style: DocxStyle,
213
+ ) -> None:
214
+ paragraph.paragraph_format.space_before = Pt(0)
215
+ paragraph.paragraph_format.space_after = Pt(0)
216
+ if _is_note_text(text, style=style):
217
+ paragraph.paragraph_format.first_line_indent = None
218
+ paragraph.paragraph_format.line_spacing = style.paragraph.line_spacing_note
219
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
220
+ return
221
+ paragraph.paragraph_format.first_line_indent = Cm(
222
+ style.paragraph.first_line_indent_cm
223
+ )
224
+ paragraph.paragraph_format.line_spacing = style.paragraph.line_spacing_body
225
+
226
+
227
+ def _apply_ordered_list_style(paragraph: Paragraph) -> None:
228
+ paragraph.paragraph_format.left_indent = Cm(0.74)
229
+ paragraph.paragraph_format.first_line_indent = Cm(-0.74)
230
+
231
+
232
+ def _is_note_text(text: str, *, style: DocxStyle) -> bool:
233
+ return text.strip().startswith(style.paragraph.note_prefixes)
234
+
235
+
236
+ def _paragraph_text_size(text: str, *, style: DocxStyle) -> float:
237
+ if _is_note_text(text, style=style):
238
+ return style.sizes.pt_caption
239
+ return style.sizes.pt_body
240
+
241
+
242
+ def _append_table(
243
+ document: DocxDocument,
244
+ rows: tuple[tuple[str, ...], ...],
245
+ *,
246
+ style: DocxStyle,
247
+ ) -> Table:
248
+ if not rows:
249
+ return document.add_table(rows=0, cols=0)
250
+ count_cols = max(len(row) for row in rows)
251
+ table = document.add_table(rows=len(rows), cols=count_cols)
252
+ table.alignment = WD_TABLE_ALIGNMENT.LEFT
253
+ _apply_three_line_table_borders(table, style=style)
254
+ for row_idx, row in enumerate(rows):
255
+ for col_idx, value in enumerate(row):
256
+ paragraph = table.cell(row_idx, col_idx).paragraphs[0]
257
+ paragraph.paragraph_format.first_line_indent = None
258
+ paragraph.paragraph_format.line_spacing = style.table.line_spacing
259
+ _write_text_with_line_breaks(
260
+ paragraph,
261
+ value,
262
+ style=style,
263
+ size_pt=style.sizes.pt_table,
264
+ )
265
+ if row_idx == 0:
266
+ for run in paragraph.runs:
267
+ run.bold = True
268
+ return table
269
+
270
+
271
+ def _append_image(
272
+ document: DocxDocument,
273
+ path_image: Path,
274
+ *,
275
+ caption: str,
276
+ width_pct: float,
277
+ style: DocxStyle,
278
+ ) -> None:
279
+ paragraph = document.add_paragraph()
280
+ _add_picture(paragraph, path_image, width_pct=width_pct)
281
+ if caption:
282
+ paragraph_caption = document.add_paragraph()
283
+ paragraph_caption.alignment = WD_ALIGN_PARAGRAPH.CENTER
284
+ _write_text_with_line_breaks(
285
+ paragraph_caption,
286
+ caption,
287
+ style=style,
288
+ size_pt=style.sizes.pt_caption,
289
+ )
290
+
291
+
292
+ def _add_picture(paragraph: Paragraph, path_image: Path, *, width_pct: float) -> None:
293
+ width_inches = 6.0 * max(0.1, min(width_pct / 100.0, 1.0))
294
+ paragraph.add_run().add_picture(str(path_image), width=Inches(width_inches))
295
+
296
+
297
+ def _insert_table_before_anchor(anchor: Paragraph, table: Table) -> None:
298
+ cast(Any, anchor)._p.addprevious(cast(Any, table)._tbl)
299
+
300
+
301
+ def _remove_paragraph(paragraph: Paragraph) -> None:
302
+ element = cast(Any, paragraph)._element
303
+ element.getparent().remove(element)
304
+
305
+
306
+ def _apply_three_line_table_borders(table: Table, *, style: DocxStyle) -> None:
307
+ for row in table.rows:
308
+ for cell in row.cells:
309
+ _set_cell_border(cell, edge_name="top", value="nil", size="0", style=style)
310
+ _set_cell_border(
311
+ cell,
312
+ edge_name="bottom",
313
+ value="nil",
314
+ size="0",
315
+ style=style,
316
+ )
317
+ if not table.rows:
318
+ return
319
+ for cell in table.rows[0].cells:
320
+ _set_cell_border(
321
+ cell,
322
+ edge_name="top",
323
+ value="single",
324
+ size=style.table.border_size_main,
325
+ style=style,
326
+ )
327
+ _set_cell_border(
328
+ cell,
329
+ edge_name="bottom",
330
+ value="single",
331
+ size=style.table.border_size_header,
332
+ style=style,
333
+ )
334
+ if len(table.rows) > 1:
335
+ for cell in table.rows[-1].cells:
336
+ _set_cell_border(
337
+ cell,
338
+ edge_name="bottom",
339
+ value="single",
340
+ size=style.table.border_size_main,
341
+ style=style,
342
+ )
343
+
344
+
345
+ def _set_cell_border(
346
+ cell: Any,
347
+ *,
348
+ edge_name: str,
349
+ value: str,
350
+ size: str,
351
+ style: DocxStyle,
352
+ ) -> None:
353
+ tc_pr = cell._tc.get_or_add_tcPr() # noqa: SLF001
354
+ borders: Any = tc_pr.first_child_found_in("w:tcBorders")
355
+ if borders is None:
356
+ borders = cast(Any, OxmlElement("w:tcBorders"))
357
+ tc_pr.append(borders)
358
+ edge: Any = borders.find(qn(f"w:{edge_name}"))
359
+ if edge is None:
360
+ edge = cast(Any, OxmlElement(f"w:{edge_name}"))
361
+ borders.append(edge)
362
+ _set_xml_border(edge, value=value, size=size, color=style.table.border_color)
363
+
364
+
365
+ def _set_xml_border(edge: Any, *, value: str, size: str, color: str) -> None:
366
+ edge.set(qn("w:val"), value)
367
+ edge.set(qn("w:sz"), size)
368
+ edge.set(qn("w:space"), "0")
369
+ edge.set(qn("w:color"), color)
@@ -0,0 +1,141 @@
1
+ """DOCX field preparation helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import tempfile
7
+ import zipfile
8
+ from collections.abc import Callable
9
+ from pathlib import Path
10
+
11
+ DOCX_FIELD_PART_PATTERN = re.compile(r"word/(?:document|header\d+|footer\d+)\.xml$")
12
+ DOCX_SETTING_PART_PATTERN = re.compile(r"word/settings\.xml$")
13
+ DOCX_PARAGRAPH_PATTERN = re.compile(r"<w:p\b.*?</w:p>", re.S)
14
+ DOCX_FIELD_BEGIN_RUN_PATTERN = re.compile(
15
+ (
16
+ r"<w:r\b[^>]*>"
17
+ r"(?:(?!</w:r>).)*?"
18
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"begin\"[^>]*/>"
19
+ r"(?:(?!</w:r>).)*?"
20
+ r"</w:r>"
21
+ ),
22
+ re.S,
23
+ )
24
+ DOCX_FIELD_SEPARATE_RUN_PATTERN = re.compile(
25
+ (
26
+ r"<w:r\b[^>]*>"
27
+ r"(?:(?!</w:r>).)*?"
28
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"separate\"[^>]*/>"
29
+ r"(?:(?!</w:r>).)*?"
30
+ r"</w:r>"
31
+ ),
32
+ re.S,
33
+ )
34
+ DOCX_FIELD_END_RUN_PATTERN = re.compile(
35
+ (
36
+ r"<w:r\b[^>]*>"
37
+ r"(?:(?!</w:r>).)*?"
38
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"end\"[^>]*/>"
39
+ r"(?:(?!</w:r>).)*?"
40
+ r"</w:r>"
41
+ ),
42
+ re.S,
43
+ )
44
+
45
+
46
+ def write_docx_field_update_markers(file_docx: Path) -> None:
47
+ def edit_part(filename: str, data: bytes) -> bytes:
48
+ if DOCX_SETTING_PART_PATTERN.fullmatch(filename):
49
+ return _ensure_update_fields_setting(data.decode("utf-8")).encode("utf-8")
50
+ if DOCX_FIELD_PART_PATTERN.fullmatch(filename):
51
+ return _ensure_field_dirty_attrs(data.decode("utf-8")).encode("utf-8")
52
+ return data
53
+
54
+ _rewrite_docx_zip(file_docx, edit_part)
55
+
56
+
57
+ def write_frozen_docx_fields(file_docx: Path) -> None:
58
+ def edit_part(filename: str, data: bytes) -> bytes:
59
+ if DOCX_SETTING_PART_PATTERN.fullmatch(filename):
60
+ return _strip_docx_update_fields_setting(data.decode("utf-8")).encode(
61
+ "utf-8"
62
+ )
63
+ if DOCX_FIELD_PART_PATTERN.fullmatch(filename):
64
+ text_part = data.decode("utf-8")
65
+ text_part = _strip_docx_field_dirty_attrs(text_part)
66
+ text_part = _freeze_docx_field_result_runs(text_part)
67
+ return text_part.encode("utf-8")
68
+ return data
69
+
70
+ _rewrite_docx_zip(file_docx, edit_part)
71
+
72
+
73
+ def _rewrite_docx_zip(
74
+ file_docx: Path,
75
+ edit_part: Callable[[str, bytes], bytes],
76
+ ) -> None:
77
+ with tempfile.NamedTemporaryFile(
78
+ suffix=".docx",
79
+ delete=False,
80
+ dir=file_docx.parent,
81
+ ) as file_tmp:
82
+ path_tmp = Path(file_tmp.name)
83
+ try:
84
+ with (
85
+ zipfile.ZipFile(file_docx, "r") as zip_in,
86
+ zipfile.ZipFile(path_tmp, "w", compression=zipfile.ZIP_DEFLATED) as zip_out,
87
+ ):
88
+ for item in zip_in.infolist():
89
+ zip_out.writestr(item, edit_part(item.filename, zip_in.read(item)))
90
+ path_tmp.replace(file_docx)
91
+ finally:
92
+ if path_tmp.exists():
93
+ path_tmp.unlink()
94
+
95
+
96
+ def _ensure_update_fields_setting(text_settings_xml: str) -> str:
97
+ if "<w:updateFields" in text_settings_xml:
98
+ return re.sub(
99
+ r"<w:updateFields\b[^>]*/>",
100
+ '<w:updateFields w:val="true"/>',
101
+ text_settings_xml,
102
+ )
103
+ return text_settings_xml.replace(
104
+ "</w:settings>",
105
+ '<w:updateFields w:val="true"/></w:settings>',
106
+ )
107
+
108
+
109
+ def _strip_docx_update_fields_setting(text_settings_xml: str) -> str:
110
+ return re.sub(r"<w:updateFields\b[^>]*/>", "", text_settings_xml)
111
+
112
+
113
+ def _ensure_field_dirty_attrs(text_part_xml: str) -> str:
114
+ return re.sub(
115
+ r"<w:fldChar\b(?![^>]*/?w:dirty=)",
116
+ '<w:fldChar w:dirty="true"',
117
+ text_part_xml,
118
+ )
119
+
120
+
121
+ def _strip_docx_field_dirty_attrs(text_part_xml: str) -> str:
122
+ return re.sub(r"\s+w:dirty=\"[^\"]*\"", "", text_part_xml)
123
+
124
+
125
+ def _freeze_docx_field_result_runs(text_part_xml: str) -> str:
126
+ return DOCX_PARAGRAPH_PATTERN.sub(_freeze_field_paragraph_match, text_part_xml)
127
+
128
+
129
+ def _freeze_field_paragraph_match(match_paragraph: re.Match[str]) -> str:
130
+ text_paragraph = match_paragraph.group(0)
131
+ if "<w:fldChar" not in text_paragraph:
132
+ return text_paragraph
133
+ text_paragraph = DOCX_FIELD_BEGIN_RUN_PATTERN.sub("", text_paragraph)
134
+ text_paragraph = DOCX_FIELD_SEPARATE_RUN_PATTERN.sub("", text_paragraph)
135
+ text_paragraph = DOCX_FIELD_END_RUN_PATTERN.sub("", text_paragraph)
136
+ text_paragraph = re.sub(
137
+ r"<w:instrText\b[^>]*>.*?</w:instrText>",
138
+ "",
139
+ text_paragraph,
140
+ )
141
+ return text_paragraph
@@ -0,0 +1,113 @@
1
+ """Optional DOCX field refresh helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import time
7
+ import zipfile
8
+ from html import unescape
9
+ from pathlib import Path
10
+
11
+ from docxrender.contracts import DocxFieldRefreshOptions
12
+ from docxrender.docx.fields import DOCX_FIELD_PART_PATTERN, write_frozen_docx_fields
13
+
14
+ TOC_FIELD_PATTERN = re.compile(
15
+ (
16
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"begin\"[^>]*/>"
17
+ r"(?:(?!<w:fldChar\b[^>]*\bw:fldCharType=\"end\").)*?"
18
+ r"<w:instrText\b[^>]*>[^<]*\bTOC\b[^<]*</w:instrText>"
19
+ r"(?:(?!<w:fldChar\b[^>]*\bw:fldCharType=\"end\").)*?"
20
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"separate\"[^>]*/>"
21
+ r"(?P<result>.*?)"
22
+ r"<w:fldChar\b[^>]*\bw:fldCharType=\"end\"[^>]*/>"
23
+ ),
24
+ re.S,
25
+ )
26
+ TEXT_RUN_PATTERN = re.compile(r"<w:t\b[^>]*>(?P<text>.*?)</w:t>", re.S)
27
+
28
+
29
+ def refresh_docx_fields(
30
+ file_docx: Path,
31
+ *,
32
+ options: DocxFieldRefreshOptions | None,
33
+ ) -> None:
34
+ if options is None:
35
+ return
36
+
37
+ file_refreshed = options.file_out_docx_refreshed or file_docx
38
+ from docxrender.pdf_uno import refresh_docx_with_uno
39
+
40
+ refresh_docx_with_uno(
41
+ file_in_docx=file_docx,
42
+ file_out_docx=file_refreshed,
43
+ options=options,
44
+ )
45
+ wait_for_refreshed_docx(file_refreshed, options=options)
46
+ if options.should_require_toc:
47
+ validate_docx_toc_result(file_refreshed)
48
+ if options.should_freeze_fields:
49
+ write_frozen_docx_fields(file_refreshed)
50
+
51
+
52
+ def wait_for_refreshed_docx(
53
+ file_docx: Path,
54
+ *,
55
+ options: DocxFieldRefreshOptions,
56
+ ) -> None:
57
+ deadline = time.monotonic() + options.timeout_seconds
58
+ stable_checks_required = max(options.stable_checks, 1)
59
+ stable_checks_seen = 0
60
+ stat_previous: tuple[int, int] | None = None
61
+
62
+ while time.monotonic() <= deadline:
63
+ if file_docx.exists() and file_docx.is_file() and file_docx.stat().st_size > 0:
64
+ stat_current = (file_docx.stat().st_size, file_docx.stat().st_mtime_ns)
65
+ if stat_current == stat_previous:
66
+ stable_checks_seen += 1
67
+ else:
68
+ stable_checks_seen = 1
69
+ stat_previous = stat_current
70
+ if stable_checks_seen >= stable_checks_required:
71
+ return
72
+ time.sleep(max(options.poll_interval_seconds, 0.0))
73
+
74
+ raise TimeoutError(
75
+ "Refreshed DOCX did not become stable before timeout: "
76
+ f"file_docx={file_docx.resolve()} "
77
+ f"timeout_seconds={options.timeout_seconds} "
78
+ f"stable_checks={options.stable_checks}"
79
+ )
80
+
81
+
82
+ def validate_docx_toc_result(file_docx: Path) -> None:
83
+ if has_materialized_toc_result(file_docx):
84
+ return
85
+ raise RuntimeError(
86
+ "DOCX TOC result was not materialized after field refresh: "
87
+ f"file_docx={file_docx.resolve()}"
88
+ )
89
+
90
+
91
+ def has_materialized_toc_result(file_docx: Path) -> bool:
92
+ for text_part in read_docx_field_parts(file_docx):
93
+ for match in TOC_FIELD_PATTERN.finditer(text_part):
94
+ if extract_text_from_field_result(match.group("result")).strip():
95
+ return True
96
+ return False
97
+
98
+
99
+ def read_docx_field_parts(file_docx: Path) -> tuple[str, ...]:
100
+ parts: list[str] = []
101
+ with zipfile.ZipFile(file_docx, "r") as zip_file:
102
+ for name in zip_file.namelist():
103
+ if DOCX_FIELD_PART_PATTERN.fullmatch(name):
104
+ parts.append(zip_file.read(name).decode("utf-8"))
105
+ return tuple(parts)
106
+
107
+
108
+ def extract_text_from_field_result(text_result_xml: str) -> str:
109
+ texts = [
110
+ unescape(match.group("text"))
111
+ for match in TEXT_RUN_PATTERN.finditer(text_result_xml)
112
+ ]
113
+ return "".join(texts)