offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1237 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from offagent.domain.locators import parse_locator, to_v2_locator
7
+ from offagent.domain.models import (
8
+ BlockStyle,
9
+ BlockBundle,
10
+ DocxRun,
11
+ DocxParagraph,
12
+ DocxTableCell,
13
+ DocxTable,
14
+ DocumentBlock,
15
+ DocumentRef,
16
+ InlineFragment,
17
+ InlineStyle,
18
+ IndexedItem,
19
+ SectionPayload,
20
+ StructureSection,
21
+ TextContainerSnapshot,
22
+ VisibleTextRange,
23
+ )
24
+ from offagent.domain.text_fragments import (
25
+ apply_style_to_range,
26
+ fragment_text,
27
+ normalize_fragments,
28
+ )
29
+ from offagent.errors import (
30
+ InvalidArgumentsError,
31
+ TargetNotEditableError,
32
+ TargetNotFoundError,
33
+ )
34
+
35
+ try:
36
+ from docx import Document
37
+ from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
38
+ from docx.oxml.table import CT_Tbl
39
+ from docx.oxml.text.paragraph import CT_P
40
+ from docx.shared import Pt, RGBColor
41
+ from docx.table import Table
42
+ from docx.text.paragraph import Paragraph
43
+ from docx.text.run import Run
44
+ except ModuleNotFoundError: # pragma: no cover - exercised through dependency checks
45
+ Document = None
46
+ WD_ALIGN_PARAGRAPH = None
47
+ WD_COLOR_INDEX = None
48
+ CT_Tbl = None
49
+ CT_P = None
50
+ Pt = None
51
+ RGBColor = None
52
+ Table = None
53
+ Paragraph = None
54
+ Run = None
55
+
56
+ RunFormatting = InlineStyle
57
+
58
+
59
+ @dataclass(frozen=True)
60
+ class ResolvedParagraphTarget:
61
+ block_index: int
62
+ paragraph_index: int
63
+ paragraph: Paragraph
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class ResolvedTableCellTarget:
68
+ block_index: int
69
+ table_index: int
70
+ row_index: int
71
+ column_index: int
72
+ table: Table
73
+
74
+
75
+ ResolvedTarget = ResolvedParagraphTarget | ResolvedTableCellTarget
76
+
77
+
78
+ def extract_document(document_path: Path) -> list[IndexedItem]:
79
+ items: list[IndexedItem] = []
80
+
81
+ for paragraph in get_paragraphs(document_path):
82
+ locator = f"para:{paragraph.paragraph_index}"
83
+ items.append(
84
+ IndexedItem(
85
+ item_id=locator,
86
+ item_type="paragraph",
87
+ locator=locator,
88
+ preview=paragraph.preview,
89
+ content_text=paragraph.text,
90
+ metadata={
91
+ "paragraph_index": paragraph.paragraph_index,
92
+ "block_index": paragraph.block_index,
93
+ "style_name": paragraph.style_name,
94
+ "is_heading": paragraph.is_heading,
95
+ },
96
+ )
97
+ )
98
+
99
+ return items
100
+
101
+
102
+ def build_embedding_text(item: IndexedItem, document_path: Path) -> str:
103
+ del document_path
104
+ return item.content_text
105
+
106
+
107
+ def read_paragraph(document_path: Path, item_id: str) -> str:
108
+ paragraph = _resolve_paragraph(_open_document(document_path), item_id)
109
+ return paragraph.text
110
+
111
+
112
+ def replace_paragraph(
113
+ document_path: Path, item_id: str, text: str, output_path: Path | None = None
114
+ ) -> Path:
115
+ document = _open_document(document_path)
116
+ paragraph = _resolve_paragraph(document, item_id)
117
+ formatting = _capture_run_formatting(paragraph.runs[0] if paragraph.runs else None)
118
+ _clear_paragraph(paragraph)
119
+ replacement_run = paragraph.add_run(text)
120
+ _apply_run_formatting(replacement_run, formatting)
121
+ target_path = _target_path(document_path, output_path)
122
+ document.save(target_path)
123
+ return target_path
124
+
125
+
126
+ def append_paragraph(
127
+ document_path: Path, item_id: str, text: str, output_path: Path | None = None
128
+ ) -> Path:
129
+ document = _open_document(document_path)
130
+ paragraph = _resolve_paragraph(document, item_id)
131
+ if paragraph.runs:
132
+ paragraph.runs[-1].text = f"{paragraph.runs[-1].text}{text}"
133
+ else:
134
+ paragraph.add_run(text)
135
+ target_path = _target_path(document_path, output_path)
136
+ document.save(target_path)
137
+ return target_path
138
+
139
+
140
+ def make_table_cell_locator(table_index: int, row_index: int, column_index: int) -> str:
141
+ return f"table:{table_index}:cell:{row_index}:{column_index}"
142
+
143
+
144
+ def parse_table_cell_locator(locator: str) -> tuple[int, int, int]:
145
+ parts = locator.split(":")
146
+ if len(parts) != 5 or parts[0] != "table" or parts[2] != "cell":
147
+ raise InvalidArgumentsError(f"Unsupported DOCX table cell locator: {locator}")
148
+ try:
149
+ table_index = int(parts[1])
150
+ row_index = int(parts[3])
151
+ column_index = int(parts[4])
152
+ except ValueError as exc:
153
+ raise InvalidArgumentsError(
154
+ f"Invalid DOCX table cell locator: {locator}"
155
+ ) from exc
156
+ return table_index, row_index, column_index
157
+
158
+
159
+ def resolve_structure(document_path: Path) -> tuple[StructureSection, ...]:
160
+ document = _open_document(document_path)
161
+ sections: list[StructureSection] = []
162
+
163
+ paragraph_index = 0
164
+ table_index = 0
165
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
166
+ if block_type == "paragraph":
167
+ paragraph_model = _paragraph_model(block, block_index, paragraph_index)
168
+ sections.append(
169
+ StructureSection(
170
+ locator=f"para:{paragraph_index}",
171
+ section_type="paragraph",
172
+ preview=paragraph_model.preview,
173
+ metadata={
174
+ "block_index": block_index,
175
+ "block_type": "paragraph",
176
+ "paragraph_index": paragraph_index,
177
+ "style_name": paragraph_model.style_name,
178
+ "is_heading": paragraph_model.is_heading,
179
+ },
180
+ )
181
+ )
182
+ paragraph_index += 1
183
+ continue
184
+
185
+ table_model = _table_model(block, block_index, table_index)
186
+ sections.append(
187
+ StructureSection(
188
+ locator=make_table_cell_locator(table_index, 0, 0),
189
+ section_type="table",
190
+ preview=table_model.preview,
191
+ metadata={
192
+ "block_index": block_index,
193
+ "block_type": "table",
194
+ "table_index": table_index,
195
+ "row_count": len(table_model.rows),
196
+ "column_count": max(
197
+ (len(row) for row in table_model.rows), default=0
198
+ ),
199
+ },
200
+ )
201
+ )
202
+ table_index += 1
203
+
204
+ return tuple(sections)
205
+
206
+
207
+ def get_section(document_path: Path, locator: str) -> SectionPayload:
208
+ document = _open_document(document_path)
209
+ resolved = _resolve_locator(document, locator)
210
+ document_ref = _document_ref(document_path)
211
+
212
+ if isinstance(resolved, ResolvedParagraphTarget):
213
+ paragraph_model = _paragraph_model(
214
+ resolved.paragraph,
215
+ resolved.block_index,
216
+ resolved.paragraph_index,
217
+ )
218
+ return SectionPayload(
219
+ document=document_ref,
220
+ locator=f"para:{resolved.paragraph_index}",
221
+ section_type="paragraph",
222
+ preview=paragraph_model.preview,
223
+ metadata={
224
+ "block_index": resolved.block_index,
225
+ "block_type": "paragraph",
226
+ "paragraph_index": resolved.paragraph_index,
227
+ },
228
+ block_type="paragraph",
229
+ text=paragraph_model.text,
230
+ style_name=paragraph_model.style_name,
231
+ is_heading=paragraph_model.is_heading,
232
+ runs=tuple(_run_model(run) for run in resolved.paragraph.runs),
233
+ )
234
+
235
+ table_model = _table_model(
236
+ resolved.table, resolved.block_index, resolved.table_index
237
+ )
238
+ cells = tuple(
239
+ DocxTableCell(
240
+ locator=make_table_cell_locator(
241
+ resolved.table_index, row_index, column_index
242
+ ),
243
+ row_index=row_index,
244
+ column_index=column_index,
245
+ text=cell.text,
246
+ metadata={},
247
+ )
248
+ for row_index, row in enumerate(resolved.table.rows)
249
+ for column_index, cell in enumerate(row.cells)
250
+ )
251
+ return SectionPayload(
252
+ document=document_ref,
253
+ locator=make_table_cell_locator(resolved.table_index, 0, 0),
254
+ section_type="table",
255
+ preview=table_model.preview,
256
+ metadata={
257
+ "block_index": resolved.block_index,
258
+ "block_type": "table",
259
+ "table_index": resolved.table_index,
260
+ "row_count": len(table_model.rows),
261
+ "column_count": max((len(row) for row in table_model.rows), default=0),
262
+ },
263
+ block_type="table",
264
+ rows=table_model.rows,
265
+ table_cells=cells,
266
+ )
267
+
268
+
269
+ def read_node(document_path: Path, locator: str) -> tuple[str, str, dict[str, object]]:
270
+ document = _open_document(document_path)
271
+ resolved = _resolve_locator(document, locator)
272
+
273
+ if isinstance(resolved, ResolvedParagraphTarget):
274
+ paragraph_model = _paragraph_model(
275
+ resolved.paragraph,
276
+ resolved.block_index,
277
+ resolved.paragraph_index,
278
+ )
279
+ return (
280
+ "paragraph",
281
+ paragraph_model.text,
282
+ {
283
+ "block_index": resolved.block_index,
284
+ "paragraph_index": resolved.paragraph_index,
285
+ "style_name": paragraph_model.style_name,
286
+ "is_heading": paragraph_model.is_heading,
287
+ },
288
+ )
289
+
290
+ cell = resolved.table.rows[resolved.row_index].cells[resolved.column_index]
291
+ return (
292
+ "table_cell",
293
+ cell.text,
294
+ {
295
+ "block_index": resolved.block_index,
296
+ "table_index": resolved.table_index,
297
+ "row_index": resolved.row_index,
298
+ "column_index": resolved.column_index,
299
+ },
300
+ )
301
+
302
+
303
+ def write_node(
304
+ document_path: Path, locator: str, text: str, output_path: Path | None = None
305
+ ) -> Path:
306
+ document = _open_document(document_path)
307
+ resolved = _resolve_locator(document, locator)
308
+
309
+ if isinstance(resolved, ResolvedParagraphTarget):
310
+ return replace_paragraph(
311
+ document_path, f"para:{resolved.paragraph_index}", text, output_path
312
+ )
313
+
314
+ cell = resolved.table.rows[resolved.row_index].cells[resolved.column_index]
315
+ cell.text = text
316
+ target_path = _target_path(document_path, output_path)
317
+ document.save(target_path)
318
+ return target_path
319
+
320
+
321
+ def read_paragraph_fragments(
322
+ document_path: Path, locator: str
323
+ ) -> TextContainerSnapshot:
324
+ document = _open_document(document_path)
325
+ canonical, components = _canonical_docx_locator(locator)
326
+ if len(components) != 3 or components[:2] != ("docx", "para"):
327
+ raise InvalidArgumentsError("DOCX fragment reads require a paragraph locator.")
328
+
329
+ paragraph = _resolve_paragraph(document, f"para:{components[2]}")
330
+ fragments = _read_docx_paragraph_fragments(paragraph)
331
+ return TextContainerSnapshot(
332
+ locator=canonical,
333
+ object_type="paragraph",
334
+ text=fragment_text(fragments),
335
+ fragments=fragments,
336
+ metadata={"paragraph_index": int(components[2])},
337
+ )
338
+
339
+
340
+ def rewrite_paragraph_fragments(
341
+ document_path: Path,
342
+ locator: str,
343
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
344
+ output_path: Path | None = None,
345
+ ) -> tuple[Path, str, TextContainerSnapshot]:
346
+ document = _open_document(document_path)
347
+ canonical, components = _canonical_docx_locator(locator)
348
+ if len(components) != 3 or components[:2] != ("docx", "para"):
349
+ raise InvalidArgumentsError("DOCX fragment writes require a paragraph locator.")
350
+
351
+ paragraph = _resolve_paragraph(document, f"para:{components[2]}")
352
+ normalized = normalize_fragments(fragments)
353
+ _rewrite_docx_paragraph(paragraph, normalized)
354
+ target_path = _target_path(document_path, output_path)
355
+ document.save(target_path)
356
+ snapshot = TextContainerSnapshot(
357
+ locator=canonical,
358
+ object_type="paragraph",
359
+ text=fragment_text(normalized),
360
+ fragments=normalized,
361
+ metadata={"paragraph_index": int(components[2])},
362
+ )
363
+ return target_path, canonical, snapshot
364
+
365
+
366
+ def insert_paragraph(
367
+ document_path: Path,
368
+ text: str,
369
+ *,
370
+ style_name: str | None = None,
371
+ after_locator: str | None = None,
372
+ output_path: Path | None = None,
373
+ ) -> tuple[Path, str]:
374
+ if after_locator is None:
375
+ target_path, block_index = append_paragraph_block(
376
+ document_path,
377
+ text,
378
+ style_name=style_name,
379
+ output_path=output_path,
380
+ )
381
+ paragraph_count = len(get_paragraphs(target_path))
382
+ return target_path, f"para:{paragraph_count - 1}"
383
+
384
+ document = _open_document(document_path)
385
+ resolved = _resolve_locator(document, after_locator)
386
+ paragraph_index = (
387
+ resolved.paragraph_index + 1
388
+ if isinstance(resolved, ResolvedParagraphTarget)
389
+ else _paragraphs_before_block(document, resolved.block_index)
390
+ )
391
+
392
+ anchor_element = (
393
+ resolved.paragraph._element
394
+ if isinstance(resolved, ResolvedParagraphTarget)
395
+ else resolved.table._element
396
+ )
397
+ new_element = document.element.body.add_p()
398
+ anchor_element.addnext(new_element)
399
+ paragraph = Paragraph(new_element, document)
400
+ paragraph.add_run(text)
401
+ if style_name is not None:
402
+ try:
403
+ paragraph.style = style_name
404
+ except (KeyError, ValueError) as exc:
405
+ raise InvalidArgumentsError(
406
+ f"Unknown DOCX paragraph style: {style_name}"
407
+ ) from exc
408
+
409
+ target_path = _target_path(document_path, output_path)
410
+ document.save(target_path)
411
+ return target_path, f"para:{paragraph_index}"
412
+
413
+
414
+ def create_docx(output_path: Path) -> Path:
415
+ document = _open_document_from_default_template()
416
+ document.save(output_path)
417
+ return output_path
418
+
419
+
420
+ def add_paragraph(
421
+ document_path: Path,
422
+ text: str,
423
+ output_path: Path | None = None,
424
+ ) -> tuple[Path, str]:
425
+ target_path, legacy_locator = insert_paragraph(
426
+ document_path,
427
+ text,
428
+ output_path=output_path,
429
+ )
430
+ return target_path, to_v2_locator(legacy_locator, file_type="docx")
431
+
432
+
433
+ def add_heading(
434
+ document_path: Path,
435
+ text: str,
436
+ level: int,
437
+ output_path: Path | None = None,
438
+ ) -> tuple[Path, str]:
439
+ if level < 1 or level > 9:
440
+ raise InvalidArgumentsError("DOCX heading level must be between 1 and 9.")
441
+
442
+ document = _open_document(document_path)
443
+ document.add_heading(text, level=level)
444
+ paragraph_index = (
445
+ sum(1 for block_type, _ in _iter_blocks(document) if block_type == "paragraph")
446
+ - 1
447
+ )
448
+ target_path = _target_path(document_path, output_path)
449
+ document.save(target_path)
450
+ return target_path, f"docx:para:{paragraph_index}"
451
+
452
+
453
+ def add_table(
454
+ document_path: Path,
455
+ rows: int,
456
+ columns: int,
457
+ output_path: Path | None = None,
458
+ ) -> tuple[Path, str]:
459
+ if rows < 1 or columns < 1:
460
+ raise InvalidArgumentsError("DOCX table rows and columns must be positive.")
461
+
462
+ document = _open_document(document_path)
463
+ table_index = sum(
464
+ 1 for block_type, _ in _iter_blocks(document) if block_type == "table"
465
+ )
466
+ document.add_table(rows=rows, cols=columns)
467
+ target_path = _target_path(document_path, output_path)
468
+ document.save(target_path)
469
+ return target_path, f"docx:table:{table_index}"
470
+
471
+
472
+ def style_run(
473
+ document_path: Path,
474
+ locator: str,
475
+ style: InlineStyle,
476
+ clear_fields: list[str] | tuple[str, ...],
477
+ output_path: Path | None = None,
478
+ ) -> tuple[Path, str, dict[str, object]]:
479
+ document = _open_document(document_path)
480
+ canonical, components = _canonical_docx_locator(locator)
481
+ if (
482
+ len(components) != 5
483
+ or components[:2] != ("docx", "para")
484
+ or components[3] != "run"
485
+ ):
486
+ raise InvalidArgumentsError("DOCX inline styling requires a run locator.")
487
+
488
+ paragraph = _resolve_paragraph(document, f"para:{components[2]}")
489
+ run_index = _parse_int_component(components[4], locator)
490
+ try:
491
+ run = paragraph.runs[run_index]
492
+ except IndexError as exc:
493
+ raise TargetNotFoundError(
494
+ f"Run {run_index} does not exist in paragraph {components[2]}."
495
+ ) from exc
496
+
497
+ cleared_fields = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
498
+ _apply_docx_inline_style(run, style, cleared_fields)
499
+ target_path = _target_path(document_path, output_path)
500
+ document.save(target_path)
501
+ return target_path, canonical, {"cleared_fields": cleared_fields}
502
+
503
+
504
+ def style_paragraph_range(
505
+ document_path: Path,
506
+ locator: str,
507
+ text_range: VisibleTextRange,
508
+ style: InlineStyle,
509
+ clear_fields: list[str] | tuple[str, ...],
510
+ output_path: Path | None = None,
511
+ ) -> tuple[Path, str, dict[str, object]]:
512
+ snapshot = read_paragraph_fragments(document_path, locator)
513
+ cleared_fields = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
514
+ styled = apply_style_to_range(
515
+ snapshot.fragments, text_range, style=style, clear_fields=cleared_fields
516
+ )
517
+ target_path, canonical, rewritten = rewrite_paragraph_fragments(
518
+ document_path,
519
+ locator,
520
+ styled,
521
+ output_path=output_path,
522
+ )
523
+ return (
524
+ target_path,
525
+ canonical,
526
+ {
527
+ "cleared_fields": cleared_fields,
528
+ "range": {"start": text_range.start, "end": text_range.end},
529
+ "text": rewritten.text,
530
+ },
531
+ )
532
+
533
+
534
+ def style_paragraph(
535
+ document_path: Path,
536
+ locator: str,
537
+ style: BlockStyle,
538
+ clear_fields: list[str] | tuple[str, ...],
539
+ output_path: Path | None = None,
540
+ ) -> tuple[Path, str, dict[str, object]]:
541
+ document = _open_document(document_path)
542
+ canonical, components = _canonical_docx_locator(locator)
543
+ if len(components) != 3 or components[:2] != ("docx", "para"):
544
+ raise InvalidArgumentsError("DOCX block styling requires a paragraph locator.")
545
+
546
+ paragraph = _resolve_paragraph(document, f"para:{components[2]}")
547
+ cleared_fields = _normalize_clear_fields(clear_fields, _BLOCK_STYLE_FIELDS)
548
+ skipped_fields = _apply_docx_block_style(paragraph, style, cleared_fields)
549
+ target_path = _target_path(document_path, output_path)
550
+ document.save(target_path)
551
+ return (
552
+ target_path,
553
+ canonical,
554
+ {"cleared_fields": cleared_fields, "skipped_fields": skipped_fields},
555
+ )
556
+
557
+
558
+ def set_structural_role(
559
+ document_path: Path,
560
+ locator: str,
561
+ role: str,
562
+ level: int | None,
563
+ output_path: Path | None = None,
564
+ ) -> tuple[Path, str, dict[str, object]]:
565
+ document = _open_document(document_path)
566
+ canonical, components = _canonical_docx_locator(locator)
567
+ if len(components) != 3 or components[:2] != ("docx", "para"):
568
+ raise InvalidArgumentsError(
569
+ "set_structural_role requires a DOCX paragraph locator."
570
+ )
571
+
572
+ style_name = _docx_structural_style_name(role, level)
573
+ if not any(getattr(style, "name", None) == style_name for style in document.styles):
574
+ raise TargetNotEditableError(
575
+ f"DOCX style {style_name!r} is not available in the document."
576
+ )
577
+
578
+ paragraph = _resolve_paragraph(document, f"para:{components[2]}")
579
+ paragraph.style = style_name
580
+ target_path = _target_path(document_path, output_path)
581
+ document.save(target_path)
582
+ return (
583
+ target_path,
584
+ canonical,
585
+ {"role": role, "level": level, "style_name": style_name},
586
+ )
587
+
588
+
589
+ def get_blocks(document_path: Path) -> tuple[DocumentBlock, ...]:
590
+ document = _open_document(document_path)
591
+ blocks: list[DocumentBlock] = []
592
+
593
+ paragraph_index = 0
594
+ table_index = 0
595
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
596
+ if block_type == "paragraph":
597
+ paragraph_model = _paragraph_model(block, block_index, paragraph_index)
598
+ blocks.append(
599
+ DocumentBlock(
600
+ block_index=block_index,
601
+ block_type="paragraph",
602
+ preview=paragraph_model.preview,
603
+ metadata={
604
+ "paragraph_index": paragraph_model.paragraph_index,
605
+ "style_name": paragraph_model.style_name,
606
+ "is_heading": paragraph_model.is_heading,
607
+ },
608
+ )
609
+ )
610
+ paragraph_index += 1
611
+ else:
612
+ table_model = _table_model(block, block_index, table_index)
613
+ blocks.append(
614
+ DocumentBlock(
615
+ block_index=block_index,
616
+ block_type="table",
617
+ preview=table_model.preview,
618
+ metadata={
619
+ "table_index": table_model.table_index,
620
+ "row_count": len(table_model.rows),
621
+ "column_count": max(
622
+ (len(row) for row in table_model.rows), default=0
623
+ ),
624
+ },
625
+ )
626
+ )
627
+ table_index += 1
628
+
629
+ return tuple(blocks)
630
+
631
+
632
+ def get_paragraphs(document_path: Path) -> tuple[DocxParagraph, ...]:
633
+ document = _open_document(document_path)
634
+ paragraphs: list[DocxParagraph] = []
635
+
636
+ paragraph_index = 0
637
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
638
+ if block_type != "paragraph":
639
+ continue
640
+ paragraphs.append(_paragraph_model(block, block_index, paragraph_index))
641
+ paragraph_index += 1
642
+
643
+ return tuple(paragraphs)
644
+
645
+
646
+ def get_tables(document_path: Path) -> tuple[DocxTable, ...]:
647
+ document = _open_document(document_path)
648
+ tables: list[DocxTable] = []
649
+
650
+ table_index = 0
651
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
652
+ if block_type != "table":
653
+ continue
654
+ tables.append(_table_model(block, block_index, table_index))
655
+ table_index += 1
656
+
657
+ return tuple(tables)
658
+
659
+
660
+ def get_block_bundle(document_path: Path, block_index: int) -> BlockBundle:
661
+ document = _open_document(document_path)
662
+
663
+ paragraph_index = 0
664
+ table_index = 0
665
+ for current_block_index, (block_type, block) in enumerate(_iter_blocks(document)):
666
+ if current_block_index != block_index:
667
+ if block_type == "paragraph":
668
+ paragraph_index += 1
669
+ else:
670
+ table_index += 1
671
+ continue
672
+
673
+ if block_type == "paragraph":
674
+ paragraph_model = _paragraph_model(
675
+ block, current_block_index, paragraph_index
676
+ )
677
+ return BlockBundle(
678
+ document=_document_ref(document_path),
679
+ block=DocumentBlock(
680
+ block_index=current_block_index,
681
+ block_type="paragraph",
682
+ preview=paragraph_model.preview,
683
+ metadata={
684
+ "paragraph_index": paragraph_model.paragraph_index,
685
+ "style_name": paragraph_model.style_name,
686
+ "is_heading": paragraph_model.is_heading,
687
+ },
688
+ ),
689
+ paragraph=paragraph_model,
690
+ )
691
+
692
+ table_model = _table_model(block, current_block_index, table_index)
693
+ return BlockBundle(
694
+ document=_document_ref(document_path),
695
+ block=DocumentBlock(
696
+ block_index=current_block_index,
697
+ block_type="table",
698
+ preview=table_model.preview,
699
+ metadata={
700
+ "table_index": table_model.table_index,
701
+ "row_count": len(table_model.rows),
702
+ "column_count": max(
703
+ (len(row) for row in table_model.rows), default=0
704
+ ),
705
+ },
706
+ ),
707
+ table=table_model,
708
+ )
709
+
710
+ raise TargetNotFoundError(f"Block {block_index} does not exist in the document.")
711
+
712
+
713
+ def append_paragraph_block(
714
+ document_path: Path,
715
+ text: str,
716
+ *,
717
+ style_name: str | None = None,
718
+ output_path: Path | None = None,
719
+ ) -> tuple[Path, int]:
720
+ document = _open_document(document_path)
721
+ block_index = len(list(_iter_blocks(document)))
722
+ paragraph = document.add_paragraph(text)
723
+ if style_name is not None:
724
+ try:
725
+ paragraph.style = style_name
726
+ except (KeyError, ValueError) as exc:
727
+ raise InvalidArgumentsError(
728
+ f"Unknown DOCX paragraph style: {style_name}"
729
+ ) from exc
730
+ target_path = _target_path(document_path, output_path)
731
+ document.save(target_path)
732
+ return target_path, block_index
733
+
734
+
735
+ def replace_block(
736
+ document_path: Path, block_index: int, text: str, output_path: Path | None = None
737
+ ) -> Path:
738
+ document = _open_document(document_path)
739
+
740
+ paragraph_index = 0
741
+ for current_block_index, (block_type, block) in enumerate(_iter_blocks(document)):
742
+ if current_block_index != block_index:
743
+ if block_type == "paragraph":
744
+ paragraph_index += 1
745
+ continue
746
+
747
+ if block_type == "table":
748
+ raise TargetNotEditableError(
749
+ "DOCX table block replacement is not supported."
750
+ )
751
+
752
+ item_id = f"para:{paragraph_index}"
753
+ return replace_paragraph(document_path, item_id, text, output_path)
754
+
755
+ raise TargetNotFoundError(f"Block {block_index} does not exist in the document.")
756
+
757
+
758
+ def get_tables_result(document_path: Path) -> tuple[DocxTable, ...]:
759
+ return get_tables(document_path)
760
+
761
+
762
+ def _open_document(document_path: Path):
763
+ if Document is None:
764
+ raise RuntimeError("python-docx is required for DOCX operations.")
765
+ return Document(str(document_path))
766
+
767
+
768
+ def _open_document_from_default_template():
769
+ if Document is None:
770
+ raise RuntimeError("python-docx is required for DOCX operations.")
771
+ return Document()
772
+
773
+
774
+ def _document_ref(document_path: Path):
775
+ resolved_path = document_path.resolve()
776
+ stat = resolved_path.stat()
777
+ return DocumentRef(
778
+ document_id=resolved_path.as_posix(),
779
+ path=resolved_path,
780
+ file_type="docx",
781
+ display_name=resolved_path.name,
782
+ modified_time=stat.st_mtime,
783
+ )
784
+
785
+
786
+ def _resolve_paragraph(document, item_id: str):
787
+ if not item_id.startswith("para:"):
788
+ raise InvalidArgumentsError(f"Unsupported DOCX paragraph item id: {item_id}")
789
+
790
+ try:
791
+ paragraph_index = int(item_id.split(":", maxsplit=1)[1])
792
+ except ValueError as exc:
793
+ raise InvalidArgumentsError(
794
+ f"Invalid DOCX paragraph item id: {item_id}"
795
+ ) from exc
796
+
797
+ try:
798
+ return document.paragraphs[paragraph_index]
799
+ except IndexError as exc:
800
+ raise TargetNotFoundError(
801
+ f"Paragraph {paragraph_index} does not exist in the document."
802
+ ) from exc
803
+
804
+
805
+ def _resolve_locator(document, locator: str) -> ResolvedTarget:
806
+ normalized = locator.strip()
807
+ if normalized.startswith("para:"):
808
+ paragraph_index = _parse_paragraph_locator(normalized)
809
+ current_paragraph_index = 0
810
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
811
+ if block_type != "paragraph":
812
+ continue
813
+ if current_paragraph_index == paragraph_index:
814
+ return ResolvedParagraphTarget(
815
+ block_index=block_index,
816
+ paragraph_index=paragraph_index,
817
+ paragraph=block,
818
+ )
819
+ current_paragraph_index += 1
820
+ raise TargetNotFoundError(
821
+ f"Paragraph {paragraph_index} does not exist in the document."
822
+ )
823
+
824
+ table_index, row_index, column_index = parse_table_cell_locator(normalized)
825
+ current_table_index = 0
826
+ for block_index, (block_type, block) in enumerate(_iter_blocks(document)):
827
+ if block_type != "table":
828
+ continue
829
+ if current_table_index == table_index:
830
+ try:
831
+ block.rows[row_index].cells[column_index]
832
+ except IndexError as exc:
833
+ raise TargetNotFoundError(
834
+ f"Table cell {table_index}:{row_index}:{column_index} does not exist."
835
+ ) from exc
836
+ return ResolvedTableCellTarget(
837
+ block_index=block_index,
838
+ table_index=table_index,
839
+ row_index=row_index,
840
+ column_index=column_index,
841
+ table=block,
842
+ )
843
+ current_table_index += 1
844
+
845
+ raise TargetNotFoundError(f"Table {table_index} does not exist in the document.")
846
+
847
+
848
+ def _parse_paragraph_locator(locator: str) -> int:
849
+ try:
850
+ return int(locator.split(":", maxsplit=1)[1])
851
+ except ValueError as exc:
852
+ raise InvalidArgumentsError(
853
+ f"Invalid DOCX paragraph item id: {locator}"
854
+ ) from exc
855
+
856
+
857
+ def _capture_run_formatting(run: Run | None) -> RunFormatting | None:
858
+ if run is None:
859
+ return None
860
+
861
+ return RunFormatting(
862
+ bold=run.bold,
863
+ italic=run.italic,
864
+ underline=run.underline,
865
+ strike=run.font.strike,
866
+ font_name=run.font.name,
867
+ font_size=None if run.font.size is None else run.font.size.pt,
868
+ font_color=None if run.font.color.rgb is None else str(run.font.color.rgb),
869
+ highlight=_docx_highlight_name(run.font.highlight_color),
870
+ )
871
+
872
+
873
+ def _apply_run_formatting(run: Run, formatting: RunFormatting | None) -> None:
874
+ if formatting is None:
875
+ return
876
+
877
+ _apply_docx_inline_style(run, formatting, ())
878
+
879
+
880
+ def _clear_paragraph(paragraph) -> None:
881
+ paragraph_element = paragraph._element
882
+ for child in list(paragraph_element):
883
+ if child.tag.endswith("}pPr"):
884
+ continue
885
+ paragraph_element.remove(child)
886
+
887
+
888
+ def _ensure_rewritable_docx_paragraph(paragraph) -> None:
889
+ for child in list(paragraph._element):
890
+ if child.tag.endswith("}pPr"):
891
+ continue
892
+ if not child.tag.endswith("}r"):
893
+ raise TargetNotEditableError(
894
+ "DOCX paragraph contains inline content that cannot be safely reconstructed."
895
+ )
896
+
897
+
898
+ def _read_docx_paragraph_fragments(paragraph) -> tuple[InlineFragment, ...]:
899
+ _ensure_rewritable_docx_paragraph(paragraph)
900
+ if not paragraph.runs:
901
+ return ()
902
+ return normalize_fragments(
903
+ [
904
+ InlineFragment(
905
+ text=run.text,
906
+ style=_capture_run_formatting(run) or InlineStyle(),
907
+ )
908
+ for run in paragraph.runs
909
+ ]
910
+ )
911
+
912
+
913
+ def _rewrite_docx_paragraph(
914
+ paragraph,
915
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
916
+ ) -> None:
917
+ _ensure_rewritable_docx_paragraph(paragraph)
918
+ _clear_paragraph(paragraph)
919
+ normalized = normalize_fragments(fragments)
920
+ if not normalized:
921
+ paragraph.add_run("")
922
+ return
923
+ for fragment in normalized:
924
+ run = paragraph.add_run(fragment.text)
925
+ _apply_docx_inline_style(run, fragment.style, ())
926
+
927
+
928
+ def _target_path(document_path: Path, output_path: Path | None) -> Path:
929
+ return document_path if output_path is None else output_path
930
+
931
+
932
+ def _iter_blocks(document) -> list[tuple[str, Paragraph | Table]]:
933
+ if CT_P is None or CT_Tbl is None or Paragraph is None or Table is None:
934
+ raise RuntimeError("python-docx is required for DOCX operations.")
935
+
936
+ parent = document.element.body
937
+ blocks: list[tuple[str, Paragraph | Table]] = []
938
+ for child in parent.iterchildren():
939
+ if isinstance(child, CT_P):
940
+ blocks.append(("paragraph", Paragraph(child, document)))
941
+ elif isinstance(child, CT_Tbl):
942
+ blocks.append(("table", Table(child, document)))
943
+ return blocks
944
+
945
+
946
+ def _paragraph_model(
947
+ paragraph, block_index: int, paragraph_index: int
948
+ ) -> DocxParagraph:
949
+ style_name = paragraph.style.name if paragraph.style is not None else None
950
+ is_heading = bool(style_name and style_name.startswith("Heading"))
951
+ text = paragraph.text
952
+ return DocxParagraph(
953
+ block_index=block_index,
954
+ paragraph_index=paragraph_index,
955
+ text=text,
956
+ style_name=style_name,
957
+ is_heading=is_heading,
958
+ preview=text[:120],
959
+ metadata={},
960
+ )
961
+
962
+
963
+ def _table_model(table, block_index: int, table_index: int) -> DocxTable:
964
+ rows = tuple(tuple(cell.text for cell in row.cells) for row in table.rows)
965
+ preview = " | ".join(cell for row in rows for cell in row if cell)[:120]
966
+ return DocxTable(
967
+ block_index=block_index,
968
+ table_index=table_index,
969
+ rows=rows,
970
+ preview=preview,
971
+ metadata={},
972
+ )
973
+
974
+
975
+ def _run_model(run) -> DocxRun:
976
+ color_rgb = None
977
+ if run.font.color.rgb is not None:
978
+ color_rgb = str(run.font.color.rgb)
979
+
980
+ font_size = None
981
+ if run.font.size is not None:
982
+ font_size = int(run.font.size)
983
+
984
+ return DocxRun(
985
+ text=run.text,
986
+ bold=run.bold,
987
+ italic=run.italic,
988
+ underline=run.underline,
989
+ strike=run.font.strike,
990
+ font_name=run.font.name,
991
+ font_size=font_size,
992
+ color_rgb=color_rgb,
993
+ )
994
+
995
+
996
+ def _paragraphs_before_block(document, block_index: int) -> int:
997
+ count = 0
998
+ for current_block_index, (block_type, _) in enumerate(_iter_blocks(document)):
999
+ if current_block_index > block_index:
1000
+ break
1001
+ if current_block_index == block_index:
1002
+ break
1003
+ if block_type == "paragraph":
1004
+ count += 1
1005
+ return count
1006
+
1007
+
1008
+ _INLINE_STYLE_FIELDS = frozenset(
1009
+ {
1010
+ "bold",
1011
+ "italic",
1012
+ "underline",
1013
+ "strike",
1014
+ "font_name",
1015
+ "font_size",
1016
+ "font_color",
1017
+ "highlight",
1018
+ }
1019
+ )
1020
+ _BLOCK_STYLE_FIELDS = frozenset(
1021
+ {
1022
+ "alignment",
1023
+ "indent_level",
1024
+ "left_indent",
1025
+ "right_indent",
1026
+ "spacing_before",
1027
+ "spacing_after",
1028
+ "line_spacing",
1029
+ "wrap_text",
1030
+ "vertical_alignment",
1031
+ "fill_color",
1032
+ "number_format",
1033
+ }
1034
+ )
1035
+ _DOCX_ALIGNMENT_MAP = {
1036
+ "left": None if WD_ALIGN_PARAGRAPH is None else WD_ALIGN_PARAGRAPH.LEFT,
1037
+ "center": None if WD_ALIGN_PARAGRAPH is None else WD_ALIGN_PARAGRAPH.CENTER,
1038
+ "right": None if WD_ALIGN_PARAGRAPH is None else WD_ALIGN_PARAGRAPH.RIGHT,
1039
+ "justify": None if WD_ALIGN_PARAGRAPH is None else WD_ALIGN_PARAGRAPH.JUSTIFY,
1040
+ }
1041
+ _DOCX_HIGHLIGHT_MAP = {
1042
+ "yellow": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.YELLOW,
1043
+ "green": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.BRIGHT_GREEN,
1044
+ "turquoise": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.TURQUOISE,
1045
+ "pink": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.PINK,
1046
+ "blue": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.BLUE,
1047
+ "red": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.RED,
1048
+ "dark_blue": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.DARK_BLUE,
1049
+ "teal": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.TEAL,
1050
+ "green_dark": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.GREEN,
1051
+ "violet": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.VIOLET,
1052
+ "dark_red": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.DARK_RED,
1053
+ "dark_yellow": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.DARK_YELLOW,
1054
+ "gray_50": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.GRAY_50,
1055
+ "gray_25": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.GRAY_25,
1056
+ "black": None if WD_COLOR_INDEX is None else WD_COLOR_INDEX.BLACK,
1057
+ }
1058
+ _DOCX_HIGHLIGHT_NAMES = {
1059
+ value: key for key, value in _DOCX_HIGHLIGHT_MAP.items() if value is not None
1060
+ }
1061
+
1062
+
1063
+ def _canonical_docx_locator(locator: str) -> tuple[str, tuple[str, ...]]:
1064
+ canonical = to_v2_locator(locator, file_type="docx")
1065
+ parsed = parse_locator(canonical)
1066
+ return canonical, parsed.components
1067
+
1068
+
1069
+ def _normalize_clear_fields(
1070
+ clear_fields: list[str] | tuple[str, ...],
1071
+ allowed: frozenset[str],
1072
+ ) -> tuple[str, ...]:
1073
+ normalized: list[str] = []
1074
+ seen: set[str] = set()
1075
+ for field_name in clear_fields:
1076
+ if field_name not in allowed:
1077
+ raise InvalidArgumentsError(
1078
+ f"Unknown style field in clear_fields: {field_name}"
1079
+ )
1080
+ if field_name not in seen:
1081
+ normalized.append(field_name)
1082
+ seen.add(field_name)
1083
+ return tuple(normalized)
1084
+
1085
+
1086
+ def _apply_docx_inline_style(
1087
+ run: Run, style: InlineStyle, clear_fields: tuple[str, ...]
1088
+ ) -> None:
1089
+ clear_set = set(clear_fields)
1090
+ if "bold" in clear_set:
1091
+ run.bold = None
1092
+ elif style.bold is not None:
1093
+ run.bold = style.bold
1094
+
1095
+ if "italic" in clear_set:
1096
+ run.italic = None
1097
+ elif style.italic is not None:
1098
+ run.italic = style.italic
1099
+
1100
+ if "underline" in clear_set:
1101
+ run.underline = None
1102
+ elif style.underline is not None:
1103
+ run.underline = style.underline
1104
+
1105
+ if "strike" in clear_set:
1106
+ run.font.strike = None
1107
+ elif style.strike is not None:
1108
+ run.font.strike = style.strike
1109
+
1110
+ if "font_name" in clear_set:
1111
+ run.font.name = None
1112
+ elif style.font_name is not None:
1113
+ run.font.name = style.font_name
1114
+
1115
+ if "font_size" in clear_set:
1116
+ run.font.size = None
1117
+ elif style.font_size is not None:
1118
+ if Pt is None:
1119
+ raise RuntimeError("python-docx is required for DOCX operations.")
1120
+ run.font.size = Pt(style.font_size)
1121
+
1122
+ if "font_color" in clear_set:
1123
+ run.font.color.rgb = None
1124
+ elif style.font_color is not None:
1125
+ run.font.color.rgb = RGBColor.from_string(
1126
+ _normalize_hex_color(style.font_color)
1127
+ )
1128
+
1129
+ if "highlight" in clear_set:
1130
+ run.font.highlight_color = None
1131
+ elif style.highlight is not None:
1132
+ run.font.highlight_color = _docx_highlight_value(style.highlight)
1133
+
1134
+
1135
+ def _apply_docx_block_style(
1136
+ paragraph,
1137
+ style: BlockStyle,
1138
+ clear_fields: tuple[str, ...],
1139
+ ) -> list[str]:
1140
+ paragraph_format = paragraph.paragraph_format
1141
+ clear_set = set(clear_fields)
1142
+ skipped_fields: list[str] = []
1143
+
1144
+ if "alignment" in clear_set:
1145
+ paragraph.alignment = None
1146
+ elif style.alignment is not None:
1147
+ paragraph.alignment = _docx_alignment_value(style.alignment)
1148
+
1149
+ if "left_indent" in clear_set:
1150
+ paragraph_format.left_indent = None
1151
+ elif style.left_indent is not None:
1152
+ paragraph_format.left_indent = Pt(style.left_indent)
1153
+
1154
+ if "right_indent" in clear_set:
1155
+ paragraph_format.right_indent = None
1156
+ elif style.right_indent is not None:
1157
+ paragraph_format.right_indent = Pt(style.right_indent)
1158
+
1159
+ if "spacing_before" in clear_set:
1160
+ paragraph_format.space_before = None
1161
+ elif style.spacing_before is not None:
1162
+ paragraph_format.space_before = Pt(style.spacing_before)
1163
+
1164
+ if "spacing_after" in clear_set:
1165
+ paragraph_format.space_after = None
1166
+ elif style.spacing_after is not None:
1167
+ paragraph_format.space_after = Pt(style.spacing_after)
1168
+
1169
+ if "line_spacing" in clear_set:
1170
+ paragraph_format.line_spacing = None
1171
+ elif style.line_spacing is not None:
1172
+ paragraph_format.line_spacing = style.line_spacing
1173
+
1174
+ for field_name in (
1175
+ "indent_level",
1176
+ "wrap_text",
1177
+ "vertical_alignment",
1178
+ "fill_color",
1179
+ "number_format",
1180
+ ):
1181
+ if getattr(style, field_name) is not None or field_name in clear_set:
1182
+ skipped_fields.append(field_name)
1183
+
1184
+ return skipped_fields
1185
+
1186
+
1187
+ def _docx_alignment_value(raw: str):
1188
+ normalized = raw.strip().lower()
1189
+ if normalized not in _DOCX_ALIGNMENT_MAP:
1190
+ raise InvalidArgumentsError(f"Unsupported DOCX alignment: {raw}")
1191
+ return _DOCX_ALIGNMENT_MAP[normalized]
1192
+
1193
+
1194
+ def _docx_highlight_value(raw: str):
1195
+ normalized = raw.strip().lower()
1196
+ if normalized not in _DOCX_HIGHLIGHT_MAP:
1197
+ raise InvalidArgumentsError(f"Unsupported DOCX highlight color: {raw}")
1198
+ return _DOCX_HIGHLIGHT_MAP[normalized]
1199
+
1200
+
1201
+ def _docx_highlight_name(value) -> str | None:
1202
+ return _DOCX_HIGHLIGHT_NAMES.get(value)
1203
+
1204
+
1205
+ def _docx_structural_style_name(role: str, level: int | None) -> str:
1206
+ normalized = role.strip().lower()
1207
+ if normalized == "heading":
1208
+ if level is None or level < 1 or level > 9:
1209
+ raise InvalidArgumentsError(
1210
+ "Heading structural role requires level between 1 and 9."
1211
+ )
1212
+ return f"Heading {level}"
1213
+ mapping = {
1214
+ "title": "Title",
1215
+ "body": "Normal",
1216
+ "table_header": "Table Heading",
1217
+ "caption": "Caption",
1218
+ }
1219
+ if normalized not in mapping:
1220
+ raise InvalidArgumentsError(f"Unsupported structural role: {role}")
1221
+ return mapping[normalized]
1222
+
1223
+
1224
+ def _normalize_hex_color(value: str) -> str:
1225
+ normalized = value.strip().lstrip("#").upper()
1226
+ if len(normalized) != 6 or any(
1227
+ character not in "0123456789ABCDEF" for character in normalized
1228
+ ):
1229
+ raise InvalidArgumentsError(f"Invalid RGB hex color: {value}")
1230
+ return normalized
1231
+
1232
+
1233
+ def _parse_int_component(raw: str, locator: str) -> int:
1234
+ try:
1235
+ return int(raw)
1236
+ except ValueError as exc:
1237
+ raise InvalidArgumentsError(f"Invalid DOCX locator: {locator}") from exc