offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,951 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from offagent.adapters import docx_adapter
8
+ from offagent.domain.locators import parse_locator, to_v2_locator
9
+ from offagent.domain.models import Capability, ChildSummary, ObjectPayload
10
+ from offagent.errors import InvalidArgumentsError, TargetNotFoundError
11
+
12
+ try:
13
+ from docx.enum.text import WD_BREAK
14
+ from docx.oxml.ns import qn
15
+ from docx.text.paragraph import Paragraph as DocxParagraph
16
+ except ModuleNotFoundError: # pragma: no cover - exercised through dependency checks
17
+ WD_BREAK = None
18
+ qn = None
19
+ DocxParagraph = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class _DocxTarget:
24
+ canonical_locator: str
25
+ object_type: str
26
+ paragraph_index: int | None = None
27
+ run_index: int | None = None
28
+ table_index: int | None = None
29
+ row_index: int | None = None
30
+ column_index: int | None = None
31
+ section_index: int | None = None
32
+ image_index: int | None = None
33
+ page_break_index: int | None = None
34
+
35
+
36
+ class DocxObjectResolver:
37
+ def get_object(self, document_path: Path, locator: str) -> ObjectPayload:
38
+ canonical = to_v2_locator(locator, file_type="docx")
39
+ document = docx_adapter._open_document(document_path)
40
+ target = _parse_docx_target(canonical)
41
+
42
+ if target.object_type == "document":
43
+ return _build_document_payload(document_path, document)
44
+ if target.object_type == "section":
45
+ return _build_section_payload(document_path, document, target)
46
+ if target.object_type == "paragraph":
47
+ return _build_paragraph_payload(document_path, document, target)
48
+ if target.object_type == "run":
49
+ return _build_run_payload(document_path, document, target)
50
+ if target.object_type == "table":
51
+ return _build_table_payload(document_path, document, target)
52
+ if target.object_type == "table_row":
53
+ return _build_table_row_payload(document_path, document, target)
54
+ if target.object_type == "table_cell":
55
+ return _build_table_cell_payload(document_path, document, target)
56
+ if target.object_type == "image":
57
+ return _build_image_payload(document_path, document, target)
58
+ if target.object_type == "page_break":
59
+ return _build_page_break_payload(document_path, document, target)
60
+ raise InvalidArgumentsError(f"Unsupported DOCX object locator: {locator}")
61
+
62
+ def list_children(
63
+ self,
64
+ document_path: Path,
65
+ locator: str,
66
+ *,
67
+ child_type: str | None = None,
68
+ limit: int | None = None,
69
+ ) -> list[ChildSummary]:
70
+ canonical = to_v2_locator(locator, file_type="docx")
71
+ document = docx_adapter._open_document(document_path)
72
+ target = _parse_docx_target(canonical)
73
+
74
+ if target.object_type == "document":
75
+ children = _document_children(document, child_type=child_type)
76
+ elif target.object_type == "section":
77
+ children = _section_children(document, target, child_type=child_type)
78
+ elif target.object_type == "paragraph":
79
+ children = _paragraph_children(document, target, child_type=child_type)
80
+ elif target.object_type == "table":
81
+ children = _table_children(document, target, child_type=child_type)
82
+ elif target.object_type == "table_row":
83
+ children = _table_row_children(document, target, child_type=child_type)
84
+ else:
85
+ children = ()
86
+
87
+ if limit is not None:
88
+ return list(children[:limit])
89
+ return list(children)
90
+
91
+ def resolve_capabilities(
92
+ self, document_path: Path, locator: str
93
+ ) -> frozenset[Capability]:
94
+ del document_path
95
+ canonical = to_v2_locator(locator, file_type="docx")
96
+ target = _parse_docx_target(canonical)
97
+ return _capabilities_for(target.object_type)
98
+
99
+
100
+ def set_paragraph_style(
101
+ document_path: Path,
102
+ locator: str,
103
+ style_name: str,
104
+ *,
105
+ output_path: Path,
106
+ ) -> tuple[str, str, dict[str, Any]]:
107
+ canonical = to_v2_locator(locator, file_type="docx")
108
+ document = docx_adapter._open_document(document_path)
109
+ target = _parse_docx_target(canonical)
110
+ if target.object_type != "paragraph":
111
+ raise InvalidArgumentsError(
112
+ "docx_set_paragraph_style requires a paragraph locator."
113
+ )
114
+
115
+ _require_style_name(document, style_name)
116
+ resolved = _resolve_paragraph_target(document, target)
117
+ resolved["paragraph"].style = style_name
118
+ document.save(output_path)
119
+ return (
120
+ canonical,
121
+ f"Applied paragraph style {style_name!r} to {canonical}.",
122
+ {"style_name": style_name},
123
+ )
124
+
125
+
126
+ def insert_page_break(
127
+ document_path: Path,
128
+ locator: str,
129
+ *,
130
+ output_path: Path,
131
+ ) -> tuple[str, str, dict[str, Any]]:
132
+ if DocxParagraph is None or WD_BREAK is None:
133
+ raise RuntimeError("python-docx is required for DOCX operations.")
134
+
135
+ canonical = to_v2_locator(locator, file_type="docx")
136
+ document = docx_adapter._open_document(document_path)
137
+ target = _parse_docx_target(canonical)
138
+ if target.object_type != "paragraph":
139
+ raise InvalidArgumentsError(
140
+ "docx_insert_page_break requires a paragraph locator."
141
+ )
142
+
143
+ resolved = _resolve_paragraph_target(document, target)
144
+ new_element = document.element.body.add_p()
145
+ resolved["paragraph"]._element.addnext(new_element)
146
+ paragraph = DocxParagraph(new_element, document)
147
+ paragraph.add_run().add_break(WD_BREAK.PAGE)
148
+
149
+ page_break = _page_break_for_paragraph(document, resolved["paragraph_index"] + 1)
150
+ document.save(output_path)
151
+ return (
152
+ f"docx:page_break:{page_break['page_break_index']}",
153
+ f"Inserted page break after {canonical}.",
154
+ {"paragraph_locator": f"docx:para:{page_break['paragraph_index']}"},
155
+ )
156
+
157
+
158
+ def add_table(
159
+ document_path: Path,
160
+ row_count: int,
161
+ column_count: int,
162
+ *,
163
+ position: object | None = None,
164
+ column_widths: list[int] | None = None,
165
+ style_name: str | None = None,
166
+ output_path: Path,
167
+ ) -> tuple[str, str, dict[str, Any]]:
168
+ if row_count < 1 or column_count < 1:
169
+ raise InvalidArgumentsError(
170
+ "docx_add_table requires positive row and column counts."
171
+ )
172
+
173
+ document = docx_adapter._open_document(document_path)
174
+ table = document.add_table(rows=row_count, cols=column_count)
175
+
176
+ if style_name is not None:
177
+ _require_style_name(document, style_name)
178
+ table.style = style_name
179
+
180
+ if column_widths is not None:
181
+ if len(column_widths) != column_count:
182
+ raise InvalidArgumentsError(
183
+ "column_widths must match the DOCX table column count."
184
+ )
185
+ for row in table.rows:
186
+ for width, cell in zip(column_widths, row.cells, strict=True):
187
+ cell.width = int(width)
188
+
189
+ anchor = _resolve_insert_anchor(document, position)
190
+ if anchor is not None:
191
+ anchor.addnext(table._element)
192
+
193
+ table_index = _table_index_for_element(document, table._element)
194
+ locator = f"docx:table:{table_index}"
195
+ document.save(output_path)
196
+ return (
197
+ locator,
198
+ f"Inserted DOCX table {locator} with {row_count} rows and {column_count} columns.",
199
+ {
200
+ "row_count": row_count,
201
+ "column_count": column_count,
202
+ "style_name": style_name,
203
+ "position": None if position is None else str(position),
204
+ },
205
+ )
206
+
207
+
208
+ def merge_table_cells(
209
+ document_path: Path,
210
+ start_locator: str,
211
+ end_locator: str,
212
+ *,
213
+ output_path: Path,
214
+ ) -> tuple[str, str, dict[str, Any]]:
215
+ canonical_start = to_v2_locator(start_locator, file_type="docx")
216
+ canonical_end = to_v2_locator(end_locator, file_type="docx")
217
+ start_target = _parse_docx_target(canonical_start)
218
+ end_target = _parse_docx_target(canonical_end)
219
+ if (
220
+ start_target.object_type != "table_cell"
221
+ or end_target.object_type != "table_cell"
222
+ ):
223
+ raise InvalidArgumentsError(
224
+ "docx_merge_table_cells requires DOCX table-cell locators."
225
+ )
226
+ if start_target.table_index != end_target.table_index:
227
+ raise InvalidArgumentsError(
228
+ "docx_merge_table_cells requires both cells to be in the same table."
229
+ )
230
+
231
+ document = docx_adapter._open_document(document_path)
232
+ start = _resolve_table_cell_target(document, start_target)
233
+ end = _resolve_table_cell_target(document, end_target)
234
+ min_row, max_row = sorted((start["row_index"], end["row_index"]))
235
+ min_col, max_col = sorted((start["column_index"], end["column_index"]))
236
+ if min_row > max_row or min_col > max_col:
237
+ raise InvalidArgumentsError(
238
+ "Table-cell locators must define a valid rectangular range."
239
+ )
240
+
241
+ start["table"].cell(min_row, min_col).merge(start["table"].cell(max_row, max_col))
242
+ document.save(output_path)
243
+ locator = f"docx:table:{start['table_index']}:row:{min_row}:cell:{min_col}"
244
+ return (
245
+ locator,
246
+ f"Merged DOCX table cells from {canonical_start} to {canonical_end}.",
247
+ {"start_locator": canonical_start, "end_locator": canonical_end},
248
+ )
249
+
250
+
251
+ def _build_document_payload(document_path: Path, document) -> ObjectPayload:
252
+ blocks = docx_adapter._iter_blocks(document)
253
+ paragraph_count = sum(1 for block_type, _ in blocks if block_type == "paragraph")
254
+ table_count = sum(1 for block_type, _ in blocks if block_type == "table")
255
+ preview = next(
256
+ (
257
+ block.text[:120]
258
+ for block_type, block in blocks
259
+ if block_type == "paragraph" and block.text.strip()
260
+ ),
261
+ "",
262
+ )
263
+ return ObjectPayload(
264
+ document=docx_adapter._document_ref(document_path),
265
+ locator="docx:document",
266
+ object_type="document",
267
+ preview=preview,
268
+ properties={
269
+ "section_count": len(document.sections),
270
+ "paragraph_count": paragraph_count,
271
+ "table_count": table_count,
272
+ "image_count": len(document.inline_shapes),
273
+ "page_break_count": len(_page_breaks(document)),
274
+ },
275
+ capabilities=_capability_tuple("document"),
276
+ child_summary=_document_children(document),
277
+ )
278
+
279
+
280
+ def _build_section_payload(
281
+ document_path: Path, document, target: _DocxTarget
282
+ ) -> ObjectPayload:
283
+ assert target.section_index is not None
284
+ try:
285
+ section = document.sections[target.section_index]
286
+ except IndexError as exc:
287
+ raise TargetNotFoundError(
288
+ f"Section {target.section_index} does not exist in the document."
289
+ ) from exc
290
+
291
+ return ObjectPayload(
292
+ document=docx_adapter._document_ref(document_path),
293
+ locator=target.canonical_locator,
294
+ object_type="section",
295
+ preview=f"Section {target.section_index}",
296
+ properties={
297
+ "section_index": target.section_index,
298
+ "start_type": str(section.start_type),
299
+ "page_width": int(section.page_width),
300
+ "page_height": int(section.page_height),
301
+ "left_margin": int(section.left_margin),
302
+ "right_margin": int(section.right_margin),
303
+ },
304
+ capabilities=_capability_tuple("section"),
305
+ parent_locator="docx:document",
306
+ child_summary=_section_children(document, target),
307
+ )
308
+
309
+
310
+ def _build_paragraph_payload(
311
+ document_path: Path, document, target: _DocxTarget
312
+ ) -> ObjectPayload:
313
+ resolved = _resolve_paragraph_target(document, target)
314
+ paragraph_model = docx_adapter._paragraph_model(
315
+ resolved["paragraph"],
316
+ resolved["block_index"],
317
+ resolved["paragraph_index"],
318
+ )
319
+ runs = tuple(docx_adapter._run_model(run) for run in resolved["paragraph"].runs)
320
+ return ObjectPayload(
321
+ document=docx_adapter._document_ref(document_path),
322
+ locator=target.canonical_locator,
323
+ object_type="paragraph",
324
+ preview=paragraph_model.preview,
325
+ properties={
326
+ "block_index": resolved["block_index"],
327
+ "paragraph_index": paragraph_model.paragraph_index,
328
+ "text": paragraph_model.text,
329
+ "style_name": paragraph_model.style_name,
330
+ "is_heading": paragraph_model.is_heading,
331
+ "runs": [
332
+ {
333
+ "text": run.text,
334
+ "bold": run.bold,
335
+ "italic": run.italic,
336
+ "underline": run.underline,
337
+ "strike": run.strike,
338
+ "font_name": run.font_name,
339
+ "font_size": run.font_size,
340
+ "color_rgb": run.color_rgb,
341
+ }
342
+ for run in runs
343
+ ],
344
+ },
345
+ capabilities=_capability_tuple("paragraph"),
346
+ parent_locator="docx:document",
347
+ child_summary=_paragraph_children(document, target),
348
+ )
349
+
350
+
351
+ def _build_run_payload(
352
+ document_path: Path, document, target: _DocxTarget
353
+ ) -> ObjectPayload:
354
+ resolved = _resolve_run_target(document, target)
355
+ run_model = docx_adapter._run_model(resolved["run"])
356
+ paragraph_locator = f"docx:para:{resolved['paragraph_index']}"
357
+ return ObjectPayload(
358
+ document=docx_adapter._document_ref(document_path),
359
+ locator=target.canonical_locator,
360
+ object_type="run",
361
+ preview=run_model.text[:120],
362
+ properties={
363
+ "paragraph_index": resolved["paragraph_index"],
364
+ "run_index": resolved["run_index"],
365
+ "text": run_model.text,
366
+ "bold": run_model.bold,
367
+ "italic": run_model.italic,
368
+ "underline": run_model.underline,
369
+ "strike": run_model.strike,
370
+ "font_name": run_model.font_name,
371
+ "font_size": run_model.font_size,
372
+ "color_rgb": run_model.color_rgb,
373
+ },
374
+ capabilities=_capability_tuple("run"),
375
+ parent_locator=paragraph_locator,
376
+ )
377
+
378
+
379
+ def _build_table_payload(
380
+ document_path: Path, document, target: _DocxTarget
381
+ ) -> ObjectPayload:
382
+ resolved = _resolve_table_target(document, target)
383
+ table_model = docx_adapter._table_model(
384
+ resolved["table"],
385
+ resolved["block_index"],
386
+ resolved["table_index"],
387
+ )
388
+ return ObjectPayload(
389
+ document=docx_adapter._document_ref(document_path),
390
+ locator=target.canonical_locator,
391
+ object_type="table",
392
+ preview=table_model.preview,
393
+ properties={
394
+ "block_index": resolved["block_index"],
395
+ "table_index": table_model.table_index,
396
+ "row_count": len(table_model.rows),
397
+ "column_count": max((len(row) for row in table_model.rows), default=0),
398
+ "rows": [list(row) for row in table_model.rows],
399
+ },
400
+ capabilities=_capability_tuple("table"),
401
+ parent_locator="docx:document",
402
+ child_summary=_table_children(document, target),
403
+ )
404
+
405
+
406
+ def _build_table_row_payload(
407
+ document_path: Path, document, target: _DocxTarget
408
+ ) -> ObjectPayload:
409
+ resolved = _resolve_table_row_target(document, target)
410
+ return ObjectPayload(
411
+ document=docx_adapter._document_ref(document_path),
412
+ locator=target.canonical_locator,
413
+ object_type="table_row",
414
+ preview=" | ".join(cell.text for cell in resolved["row"].cells)[:120],
415
+ properties={
416
+ "table_index": resolved["table_index"],
417
+ "row_index": resolved["row_index"],
418
+ "cell_count": len(resolved["row"].cells),
419
+ "cells": [cell.text for cell in resolved["row"].cells],
420
+ },
421
+ capabilities=_capability_tuple("table_row"),
422
+ parent_locator=f"docx:table:{resolved['table_index']}",
423
+ child_summary=_table_row_children(document, target),
424
+ )
425
+
426
+
427
+ def _build_table_cell_payload(
428
+ document_path: Path, document, target: _DocxTarget
429
+ ) -> ObjectPayload:
430
+ resolved = _resolve_table_cell_target(document, target)
431
+ cell = resolved["cell"]
432
+ return ObjectPayload(
433
+ document=docx_adapter._document_ref(document_path),
434
+ locator=target.canonical_locator,
435
+ object_type="table_cell",
436
+ preview=cell.text[:120],
437
+ properties={
438
+ "table_index": resolved["table_index"],
439
+ "row_index": resolved["row_index"],
440
+ "column_index": resolved["column_index"],
441
+ "text": cell.text,
442
+ "paragraph_count": len(cell.paragraphs),
443
+ },
444
+ capabilities=_capability_tuple("table_cell"),
445
+ parent_locator=f"docx:table:{resolved['table_index']}:row:{resolved['row_index']}",
446
+ )
447
+
448
+
449
+ def _build_image_payload(
450
+ document_path: Path, document, target: _DocxTarget
451
+ ) -> ObjectPayload:
452
+ assert target.image_index is not None
453
+ try:
454
+ shape = document.inline_shapes[target.image_index]
455
+ except IndexError as exc:
456
+ raise TargetNotFoundError(
457
+ f"Image {target.image_index} does not exist in the document."
458
+ ) from exc
459
+
460
+ return ObjectPayload(
461
+ document=docx_adapter._document_ref(document_path),
462
+ locator=target.canonical_locator,
463
+ object_type="image",
464
+ preview=f"Image {target.image_index}",
465
+ properties={
466
+ "image_index": target.image_index,
467
+ "width": int(shape.width),
468
+ "height": int(shape.height),
469
+ "shape_type": str(shape.type),
470
+ },
471
+ capabilities=_capability_tuple("image"),
472
+ parent_locator="docx:document",
473
+ )
474
+
475
+
476
+ def _build_page_break_payload(
477
+ document_path: Path, document, target: _DocxTarget
478
+ ) -> ObjectPayload:
479
+ assert target.page_break_index is not None
480
+ page_break = _resolve_page_break(document, target.page_break_index)
481
+ return ObjectPayload(
482
+ document=docx_adapter._document_ref(document_path),
483
+ locator=target.canonical_locator,
484
+ object_type="page_break",
485
+ preview="Page break",
486
+ properties={
487
+ "page_break_index": target.page_break_index,
488
+ "paragraph_index": page_break["paragraph_index"],
489
+ "run_index": page_break["run_index"],
490
+ },
491
+ capabilities=_capability_tuple("page_break"),
492
+ parent_locator=f"docx:para:{page_break['paragraph_index']}",
493
+ )
494
+
495
+
496
+ def _document_children(
497
+ document, *, child_type: str | None = None
498
+ ) -> tuple[ChildSummary, ...]:
499
+ children: list[ChildSummary] = []
500
+ normalized_child_type = _normalize_child_type(child_type)
501
+ if normalized_child_type in {None, "section"}:
502
+ for section_index, _ in enumerate(document.sections):
503
+ children.append(
504
+ ChildSummary(
505
+ locator=f"docx:section:{section_index}",
506
+ object_type="section",
507
+ preview=f"Section {section_index}",
508
+ capabilities=_capability_tuple("section"),
509
+ )
510
+ )
511
+
512
+ paragraph_index = 0
513
+ table_index = 0
514
+ for block_index, (block_type, block) in enumerate(
515
+ docx_adapter._iter_blocks(document)
516
+ ):
517
+ if block_type == "paragraph":
518
+ if normalized_child_type not in {None, "paragraph"}:
519
+ paragraph_index += 1
520
+ continue
521
+ paragraph_model = docx_adapter._paragraph_model(
522
+ block, block_index, paragraph_index
523
+ )
524
+ children.append(
525
+ ChildSummary(
526
+ locator=f"docx:para:{paragraph_index}",
527
+ object_type="paragraph",
528
+ preview=paragraph_model.preview,
529
+ capabilities=_capability_tuple("paragraph"),
530
+ )
531
+ )
532
+ paragraph_index += 1
533
+ continue
534
+
535
+ if normalized_child_type not in {None, "table"}:
536
+ table_index += 1
537
+ continue
538
+ table_model = docx_adapter._table_model(block, block_index, table_index)
539
+ children.append(
540
+ ChildSummary(
541
+ locator=f"docx:table:{table_index}",
542
+ object_type="table",
543
+ preview=table_model.preview,
544
+ capabilities=_capability_tuple("table"),
545
+ )
546
+ )
547
+ table_index += 1
548
+
549
+ if normalized_child_type in {None, "image"}:
550
+ for image_index, _ in enumerate(document.inline_shapes):
551
+ children.append(
552
+ ChildSummary(
553
+ locator=f"docx:image:{image_index}",
554
+ object_type="image",
555
+ preview=f"Image {image_index}",
556
+ capabilities=_capability_tuple("image"),
557
+ )
558
+ )
559
+
560
+ if normalized_child_type in {None, "page_break"}:
561
+ for page_break_index, _ in enumerate(_page_breaks(document)):
562
+ children.append(
563
+ ChildSummary(
564
+ locator=f"docx:page_break:{page_break_index}",
565
+ object_type="page_break",
566
+ preview="Page break",
567
+ capabilities=_capability_tuple("page_break"),
568
+ )
569
+ )
570
+
571
+ return tuple(children)
572
+
573
+
574
+ def _section_children(
575
+ document, target: _DocxTarget, *, child_type: str | None = None
576
+ ) -> tuple[ChildSummary, ...]:
577
+ if len(document.sections) == 1 and target.section_index == 0:
578
+ return _document_children(document, child_type=child_type)
579
+ return ()
580
+
581
+
582
+ def _paragraph_children(
583
+ document, target: _DocxTarget, *, child_type: str | None = None
584
+ ) -> tuple[ChildSummary, ...]:
585
+ resolved = _resolve_paragraph_target(document, target)
586
+ normalized_child_type = _normalize_child_type(child_type)
587
+ children: list[ChildSummary] = []
588
+
589
+ if normalized_child_type in {None, "run"}:
590
+ for run_index, run in enumerate(resolved["paragraph"].runs):
591
+ children.append(
592
+ ChildSummary(
593
+ locator=f"docx:para:{resolved['paragraph_index']}:run:{run_index}",
594
+ object_type="run",
595
+ preview=run.text[:120],
596
+ capabilities=_capability_tuple("run"),
597
+ )
598
+ )
599
+
600
+ if normalized_child_type in {None, "page_break"}:
601
+ for page_break in _page_breaks_in_paragraph(
602
+ resolved["paragraph"], resolved["paragraph_index"]
603
+ ):
604
+ children.append(
605
+ ChildSummary(
606
+ locator=f"docx:page_break:{page_break['page_break_index']}",
607
+ object_type="page_break",
608
+ preview="Page break",
609
+ capabilities=_capability_tuple("page_break"),
610
+ )
611
+ )
612
+
613
+ return tuple(children)
614
+
615
+
616
+ def _table_children(
617
+ document, target: _DocxTarget, *, child_type: str | None = None
618
+ ) -> tuple[ChildSummary, ...]:
619
+ resolved = _resolve_table_target(document, target)
620
+ normalized_child_type = _normalize_child_type(child_type)
621
+ if normalized_child_type not in {None, "table_row"}:
622
+ return ()
623
+ return tuple(
624
+ ChildSummary(
625
+ locator=f"docx:table:{resolved['table_index']}:row:{row_index}",
626
+ object_type="table_row",
627
+ preview=" | ".join(cell.text for cell in row.cells)[:120],
628
+ capabilities=_capability_tuple("table_row"),
629
+ )
630
+ for row_index, row in enumerate(resolved["table"].rows)
631
+ )
632
+
633
+
634
+ def _table_row_children(
635
+ document, target: _DocxTarget, *, child_type: str | None = None
636
+ ) -> tuple[ChildSummary, ...]:
637
+ resolved = _resolve_table_row_target(document, target)
638
+ normalized_child_type = _normalize_child_type(child_type)
639
+ if normalized_child_type not in {None, "table_cell"}:
640
+ return ()
641
+ return tuple(
642
+ ChildSummary(
643
+ locator=f"docx:table:{resolved['table_index']}:row:{resolved['row_index']}:cell:{column_index}",
644
+ object_type="table_cell",
645
+ preview=cell.text[:120],
646
+ capabilities=_capability_tuple("table_cell"),
647
+ )
648
+ for column_index, cell in enumerate(resolved["row"].cells)
649
+ )
650
+
651
+
652
+ def _resolve_paragraph_target(document, target: _DocxTarget) -> dict[str, Any]:
653
+ assert target.paragraph_index is not None
654
+ current_paragraph_index = 0
655
+ for block_index, (block_type, block) in enumerate(
656
+ docx_adapter._iter_blocks(document)
657
+ ):
658
+ if block_type != "paragraph":
659
+ continue
660
+ if current_paragraph_index == target.paragraph_index:
661
+ return {
662
+ "block_index": block_index,
663
+ "paragraph_index": current_paragraph_index,
664
+ "paragraph": block,
665
+ }
666
+ current_paragraph_index += 1
667
+ raise TargetNotFoundError(
668
+ f"Paragraph {target.paragraph_index} does not exist in the document."
669
+ )
670
+
671
+
672
+ def _resolve_run_target(document, target: _DocxTarget) -> dict[str, Any]:
673
+ resolved = _resolve_paragraph_target(document, target)
674
+ assert target.run_index is not None
675
+ try:
676
+ run = resolved["paragraph"].runs[target.run_index]
677
+ except IndexError as exc:
678
+ raise TargetNotFoundError(
679
+ f"Run {target.run_index} does not exist in paragraph {resolved['paragraph_index']}."
680
+ ) from exc
681
+ return {**resolved, "run_index": target.run_index, "run": run}
682
+
683
+
684
+ def _resolve_table_target(document, target: _DocxTarget) -> dict[str, Any]:
685
+ assert target.table_index is not None
686
+ current_table_index = 0
687
+ for block_index, (block_type, block) in enumerate(
688
+ docx_adapter._iter_blocks(document)
689
+ ):
690
+ if block_type != "table":
691
+ continue
692
+ if current_table_index == target.table_index:
693
+ return {
694
+ "block_index": block_index,
695
+ "table_index": current_table_index,
696
+ "table": block,
697
+ }
698
+ current_table_index += 1
699
+ raise TargetNotFoundError(
700
+ f"Table {target.table_index} does not exist in the document."
701
+ )
702
+
703
+
704
+ def _resolve_table_row_target(document, target: _DocxTarget) -> dict[str, Any]:
705
+ resolved = _resolve_table_target(document, target)
706
+ assert target.row_index is not None
707
+ try:
708
+ row = resolved["table"].rows[target.row_index]
709
+ except IndexError as exc:
710
+ raise TargetNotFoundError(
711
+ f"Row {target.row_index} does not exist in table {resolved['table_index']}."
712
+ ) from exc
713
+ return {**resolved, "row_index": target.row_index, "row": row}
714
+
715
+
716
+ def _resolve_table_cell_target(document, target: _DocxTarget) -> dict[str, Any]:
717
+ resolved = _resolve_table_row_target(document, target)
718
+ assert target.column_index is not None
719
+ try:
720
+ cell = resolved["row"].cells[target.column_index]
721
+ except IndexError as exc:
722
+ raise TargetNotFoundError(
723
+ f"Cell {target.column_index} does not exist in row {resolved['row_index']}."
724
+ ) from exc
725
+ return {**resolved, "column_index": target.column_index, "cell": cell}
726
+
727
+
728
+ def _resolve_page_break(document, page_break_index: int) -> dict[str, int]:
729
+ try:
730
+ return _page_breaks(document)[page_break_index]
731
+ except IndexError as exc:
732
+ raise TargetNotFoundError(
733
+ f"Page break {page_break_index} does not exist in the document."
734
+ ) from exc
735
+
736
+
737
+ def _page_breaks(document) -> list[dict[str, int]]:
738
+ page_breaks: list[dict[str, int]] = []
739
+ paragraph_index = 0
740
+ for block_type, block in docx_adapter._iter_blocks(document):
741
+ if block_type != "paragraph":
742
+ continue
743
+ page_breaks.extend(
744
+ _page_breaks_in_paragraph(
745
+ block, paragraph_index, base_index=len(page_breaks)
746
+ )
747
+ )
748
+ paragraph_index += 1
749
+ return page_breaks
750
+
751
+
752
+ def _page_breaks_in_paragraph(
753
+ paragraph, paragraph_index: int, *, base_index: int = 0
754
+ ) -> list[dict[str, int]]:
755
+ if qn is None:
756
+ return []
757
+
758
+ page_breaks: list[dict[str, int]] = []
759
+ for run_index, run in enumerate(paragraph.runs):
760
+ for br in run._element.findall(".//w:br", run._element.nsmap):
761
+ if br.get(qn("w:type")) != "page":
762
+ continue
763
+ page_breaks.append(
764
+ {
765
+ "page_break_index": base_index + len(page_breaks),
766
+ "paragraph_index": paragraph_index,
767
+ "run_index": run_index,
768
+ }
769
+ )
770
+ return page_breaks
771
+
772
+
773
+ def _page_break_for_paragraph(document, paragraph_index: int) -> dict[str, int]:
774
+ for page_break in _page_breaks(document):
775
+ if page_break["paragraph_index"] == paragraph_index:
776
+ return page_break
777
+ raise RuntimeError(
778
+ f"Failed to resolve inserted page break for paragraph {paragraph_index}."
779
+ )
780
+
781
+
782
+ def _parse_docx_target(locator: str) -> _DocxTarget:
783
+ parsed = parse_locator(locator)
784
+ components = parsed.components
785
+ if components == ("docx", "document"):
786
+ return _DocxTarget(locator, "document")
787
+ if len(components) == 3 and components[:2] == ("docx", "section"):
788
+ return _DocxTarget(
789
+ locator, "section", section_index=_require_index(components[2], locator)
790
+ )
791
+ if len(components) == 3 and components[:2] == ("docx", "para"):
792
+ return _DocxTarget(
793
+ locator, "paragraph", paragraph_index=_require_index(components[2], locator)
794
+ )
795
+ if (
796
+ len(components) == 5
797
+ and components[:2] == ("docx", "para")
798
+ and components[3] == "run"
799
+ ):
800
+ return _DocxTarget(
801
+ locator,
802
+ "run",
803
+ paragraph_index=_require_index(components[2], locator),
804
+ run_index=_require_index(components[4], locator),
805
+ )
806
+ if len(components) == 3 and components[:2] == ("docx", "table"):
807
+ return _DocxTarget(
808
+ locator, "table", table_index=_require_index(components[2], locator)
809
+ )
810
+ if (
811
+ len(components) == 5
812
+ and components[:2] == ("docx", "table")
813
+ and components[3] == "row"
814
+ ):
815
+ return _DocxTarget(
816
+ locator,
817
+ "table_row",
818
+ table_index=_require_index(components[2], locator),
819
+ row_index=_require_index(components[4], locator),
820
+ )
821
+ if (
822
+ len(components) == 7
823
+ and components[:2] == ("docx", "table")
824
+ and components[3] == "row"
825
+ and components[5] == "cell"
826
+ ):
827
+ return _DocxTarget(
828
+ locator,
829
+ "table_cell",
830
+ table_index=_require_index(components[2], locator),
831
+ row_index=_require_index(components[4], locator),
832
+ column_index=_require_index(components[6], locator),
833
+ )
834
+ if len(components) == 3 and components[:2] == ("docx", "image"):
835
+ return _DocxTarget(
836
+ locator, "image", image_index=_require_index(components[2], locator)
837
+ )
838
+ if len(components) == 3 and components[:2] == ("docx", "page_break"):
839
+ return _DocxTarget(
840
+ locator,
841
+ "page_break",
842
+ page_break_index=_require_index(components[2], locator),
843
+ )
844
+ raise InvalidArgumentsError(f"Unsupported DOCX locator: {locator}")
845
+
846
+
847
+ def _capabilities_for(object_type: str) -> frozenset[Capability]:
848
+ if object_type == "document":
849
+ return frozenset({Capability.READ, Capability.ADD_CHILD})
850
+ if object_type == "section":
851
+ return frozenset({Capability.READ, Capability.ADD_CHILD})
852
+ if object_type == "paragraph":
853
+ return frozenset(
854
+ {
855
+ Capability.READ,
856
+ Capability.UPDATE,
857
+ Capability.DELETE,
858
+ Capability.MOVE,
859
+ Capability.COPY,
860
+ Capability.STYLE,
861
+ }
862
+ )
863
+ if object_type == "run":
864
+ return frozenset(
865
+ {Capability.READ, Capability.UPDATE, Capability.DELETE, Capability.STYLE}
866
+ )
867
+ if object_type == "table":
868
+ return frozenset(
869
+ {Capability.READ, Capability.DELETE, Capability.MOVE, Capability.COPY}
870
+ )
871
+ if object_type == "table_row":
872
+ return frozenset(
873
+ {
874
+ Capability.READ,
875
+ Capability.UPDATE,
876
+ Capability.DELETE,
877
+ Capability.ADD_CHILD,
878
+ Capability.MOVE,
879
+ Capability.COPY,
880
+ }
881
+ )
882
+ if object_type == "table_cell":
883
+ return frozenset({Capability.READ, Capability.UPDATE, Capability.STYLE})
884
+ if object_type in {"image", "page_break"}:
885
+ return frozenset(
886
+ {Capability.READ, Capability.DELETE, Capability.MOVE, Capability.COPY}
887
+ )
888
+ return frozenset({Capability.READ})
889
+
890
+
891
+ def _capability_tuple(object_type: str) -> tuple[Capability, ...]:
892
+ return tuple(
893
+ sorted(_capabilities_for(object_type), key=lambda capability: capability.value)
894
+ )
895
+
896
+
897
+ def _normalize_child_type(child_type: str | None) -> str | None:
898
+ if child_type in {None, ""}:
899
+ return None
900
+ return child_type
901
+
902
+
903
+ def _require_style_name(document, style_name: str) -> None:
904
+ if any(getattr(style, "name", None) == style_name for style in document.styles):
905
+ return
906
+ raise InvalidArgumentsError(f"Unknown DOCX style: {style_name}")
907
+
908
+
909
+ def _resolve_insert_anchor(document, position: object | None):
910
+ if position is None:
911
+ return None
912
+
913
+ after_locator: str | None = None
914
+ if isinstance(position, str):
915
+ after_locator = position
916
+ elif isinstance(position, dict):
917
+ for key in ("after", "after_locator"):
918
+ value = position.get(key)
919
+ if value is not None:
920
+ after_locator = str(value)
921
+ break
922
+ if after_locator is None:
923
+ raise InvalidArgumentsError("DOCX insert position must be an after locator.")
924
+
925
+ canonical = to_v2_locator(after_locator, file_type="docx")
926
+ target = _parse_docx_target(canonical)
927
+ if target.object_type == "paragraph":
928
+ return _resolve_paragraph_target(document, target)["paragraph"]._element
929
+ if target.object_type == "table":
930
+ return _resolve_table_target(document, target)["table"]._element
931
+ raise InvalidArgumentsError(
932
+ "DOCX insert position must reference a paragraph or table."
933
+ )
934
+
935
+
936
+ def _table_index_for_element(document, table_element) -> int:
937
+ table_index = 0
938
+ for block_type, block in docx_adapter._iter_blocks(document):
939
+ if block_type != "table":
940
+ continue
941
+ if block._element == table_element:
942
+ return table_index
943
+ table_index += 1
944
+ raise RuntimeError("Failed to resolve inserted DOCX table index.")
945
+
946
+
947
+ def _require_index(raw: str, locator: str) -> int:
948
+ try:
949
+ return int(raw)
950
+ except ValueError as exc:
951
+ raise InvalidArgumentsError(f"Invalid DOCX locator: {locator}") from exc