offagent 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- offagent/__init__.py +3 -0
- offagent/__main__.py +5 -0
- offagent/adapters/__init__.py +1 -0
- offagent/adapters/docx_adapter.py +1237 -0
- offagent/adapters/embedding_provider.py +132 -0
- offagent/adapters/pptx_adapter.py +940 -0
- offagent/adapters/xlsx_adapter.py +1266 -0
- offagent/app/__init__.py +1 -0
- offagent/app/progress.py +52 -0
- offagent/app/services.py +4267 -0
- offagent/config.py +287 -0
- offagent/domain/__init__.py +1 -0
- offagent/domain/locators.py +444 -0
- offagent/domain/models.py +477 -0
- offagent/domain/text_fragments.py +136 -0
- offagent/errors.py +29 -0
- offagent/indexing/__init__.py +1 -0
- offagent/indexing/store.py +795 -0
- offagent/interfaces/__init__.py +1 -0
- offagent/interfaces/cli.py +438 -0
- offagent/interfaces/cli_output.py +139 -0
- offagent/interfaces/cli_progress.py +120 -0
- offagent/interfaces/mcp.py +1145 -0
- offagent/interfaces/mcp_converters.py +80 -0
- offagent/interfaces/mcp_models.py +923 -0
- offagent/objects/__init__.py +3 -0
- offagent/objects/base.py +26 -0
- offagent/objects/docx_objects.py +951 -0
- offagent/objects/pptx_objects.py +895 -0
- offagent/objects/xlsx_objects.py +962 -0
- offagent/path_policy.py +42 -0
- offagent/storage/__init__.py +1 -0
- offagent/storage/versioning.py +31 -0
- offagent-0.10.0.dist-info/METADATA +546 -0
- offagent-0.10.0.dist-info/RECORD +39 -0
- offagent-0.10.0.dist-info/WHEEL +5 -0
- offagent-0.10.0.dist-info/entry_points.txt +2 -0
- offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
- offagent-0.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,951 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from offagent.adapters import docx_adapter
|
|
8
|
+
from offagent.domain.locators import parse_locator, to_v2_locator
|
|
9
|
+
from offagent.domain.models import Capability, ChildSummary, ObjectPayload
|
|
10
|
+
from offagent.errors import InvalidArgumentsError, TargetNotFoundError
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from docx.enum.text import WD_BREAK
|
|
14
|
+
from docx.oxml.ns import qn
|
|
15
|
+
from docx.text.paragraph import Paragraph as DocxParagraph
|
|
16
|
+
except ModuleNotFoundError: # pragma: no cover - exercised through dependency checks
|
|
17
|
+
WD_BREAK = None
|
|
18
|
+
qn = None
|
|
19
|
+
DocxParagraph = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class _DocxTarget:
|
|
24
|
+
canonical_locator: str
|
|
25
|
+
object_type: str
|
|
26
|
+
paragraph_index: int | None = None
|
|
27
|
+
run_index: int | None = None
|
|
28
|
+
table_index: int | None = None
|
|
29
|
+
row_index: int | None = None
|
|
30
|
+
column_index: int | None = None
|
|
31
|
+
section_index: int | None = None
|
|
32
|
+
image_index: int | None = None
|
|
33
|
+
page_break_index: int | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DocxObjectResolver:
|
|
37
|
+
def get_object(self, document_path: Path, locator: str) -> ObjectPayload:
|
|
38
|
+
canonical = to_v2_locator(locator, file_type="docx")
|
|
39
|
+
document = docx_adapter._open_document(document_path)
|
|
40
|
+
target = _parse_docx_target(canonical)
|
|
41
|
+
|
|
42
|
+
if target.object_type == "document":
|
|
43
|
+
return _build_document_payload(document_path, document)
|
|
44
|
+
if target.object_type == "section":
|
|
45
|
+
return _build_section_payload(document_path, document, target)
|
|
46
|
+
if target.object_type == "paragraph":
|
|
47
|
+
return _build_paragraph_payload(document_path, document, target)
|
|
48
|
+
if target.object_type == "run":
|
|
49
|
+
return _build_run_payload(document_path, document, target)
|
|
50
|
+
if target.object_type == "table":
|
|
51
|
+
return _build_table_payload(document_path, document, target)
|
|
52
|
+
if target.object_type == "table_row":
|
|
53
|
+
return _build_table_row_payload(document_path, document, target)
|
|
54
|
+
if target.object_type == "table_cell":
|
|
55
|
+
return _build_table_cell_payload(document_path, document, target)
|
|
56
|
+
if target.object_type == "image":
|
|
57
|
+
return _build_image_payload(document_path, document, target)
|
|
58
|
+
if target.object_type == "page_break":
|
|
59
|
+
return _build_page_break_payload(document_path, document, target)
|
|
60
|
+
raise InvalidArgumentsError(f"Unsupported DOCX object locator: {locator}")
|
|
61
|
+
|
|
62
|
+
def list_children(
|
|
63
|
+
self,
|
|
64
|
+
document_path: Path,
|
|
65
|
+
locator: str,
|
|
66
|
+
*,
|
|
67
|
+
child_type: str | None = None,
|
|
68
|
+
limit: int | None = None,
|
|
69
|
+
) -> list[ChildSummary]:
|
|
70
|
+
canonical = to_v2_locator(locator, file_type="docx")
|
|
71
|
+
document = docx_adapter._open_document(document_path)
|
|
72
|
+
target = _parse_docx_target(canonical)
|
|
73
|
+
|
|
74
|
+
if target.object_type == "document":
|
|
75
|
+
children = _document_children(document, child_type=child_type)
|
|
76
|
+
elif target.object_type == "section":
|
|
77
|
+
children = _section_children(document, target, child_type=child_type)
|
|
78
|
+
elif target.object_type == "paragraph":
|
|
79
|
+
children = _paragraph_children(document, target, child_type=child_type)
|
|
80
|
+
elif target.object_type == "table":
|
|
81
|
+
children = _table_children(document, target, child_type=child_type)
|
|
82
|
+
elif target.object_type == "table_row":
|
|
83
|
+
children = _table_row_children(document, target, child_type=child_type)
|
|
84
|
+
else:
|
|
85
|
+
children = ()
|
|
86
|
+
|
|
87
|
+
if limit is not None:
|
|
88
|
+
return list(children[:limit])
|
|
89
|
+
return list(children)
|
|
90
|
+
|
|
91
|
+
def resolve_capabilities(
|
|
92
|
+
self, document_path: Path, locator: str
|
|
93
|
+
) -> frozenset[Capability]:
|
|
94
|
+
del document_path
|
|
95
|
+
canonical = to_v2_locator(locator, file_type="docx")
|
|
96
|
+
target = _parse_docx_target(canonical)
|
|
97
|
+
return _capabilities_for(target.object_type)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def set_paragraph_style(
|
|
101
|
+
document_path: Path,
|
|
102
|
+
locator: str,
|
|
103
|
+
style_name: str,
|
|
104
|
+
*,
|
|
105
|
+
output_path: Path,
|
|
106
|
+
) -> tuple[str, str, dict[str, Any]]:
|
|
107
|
+
canonical = to_v2_locator(locator, file_type="docx")
|
|
108
|
+
document = docx_adapter._open_document(document_path)
|
|
109
|
+
target = _parse_docx_target(canonical)
|
|
110
|
+
if target.object_type != "paragraph":
|
|
111
|
+
raise InvalidArgumentsError(
|
|
112
|
+
"docx_set_paragraph_style requires a paragraph locator."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
_require_style_name(document, style_name)
|
|
116
|
+
resolved = _resolve_paragraph_target(document, target)
|
|
117
|
+
resolved["paragraph"].style = style_name
|
|
118
|
+
document.save(output_path)
|
|
119
|
+
return (
|
|
120
|
+
canonical,
|
|
121
|
+
f"Applied paragraph style {style_name!r} to {canonical}.",
|
|
122
|
+
{"style_name": style_name},
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def insert_page_break(
|
|
127
|
+
document_path: Path,
|
|
128
|
+
locator: str,
|
|
129
|
+
*,
|
|
130
|
+
output_path: Path,
|
|
131
|
+
) -> tuple[str, str, dict[str, Any]]:
|
|
132
|
+
if DocxParagraph is None or WD_BREAK is None:
|
|
133
|
+
raise RuntimeError("python-docx is required for DOCX operations.")
|
|
134
|
+
|
|
135
|
+
canonical = to_v2_locator(locator, file_type="docx")
|
|
136
|
+
document = docx_adapter._open_document(document_path)
|
|
137
|
+
target = _parse_docx_target(canonical)
|
|
138
|
+
if target.object_type != "paragraph":
|
|
139
|
+
raise InvalidArgumentsError(
|
|
140
|
+
"docx_insert_page_break requires a paragraph locator."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
resolved = _resolve_paragraph_target(document, target)
|
|
144
|
+
new_element = document.element.body.add_p()
|
|
145
|
+
resolved["paragraph"]._element.addnext(new_element)
|
|
146
|
+
paragraph = DocxParagraph(new_element, document)
|
|
147
|
+
paragraph.add_run().add_break(WD_BREAK.PAGE)
|
|
148
|
+
|
|
149
|
+
page_break = _page_break_for_paragraph(document, resolved["paragraph_index"] + 1)
|
|
150
|
+
document.save(output_path)
|
|
151
|
+
return (
|
|
152
|
+
f"docx:page_break:{page_break['page_break_index']}",
|
|
153
|
+
f"Inserted page break after {canonical}.",
|
|
154
|
+
{"paragraph_locator": f"docx:para:{page_break['paragraph_index']}"},
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def add_table(
|
|
159
|
+
document_path: Path,
|
|
160
|
+
row_count: int,
|
|
161
|
+
column_count: int,
|
|
162
|
+
*,
|
|
163
|
+
position: object | None = None,
|
|
164
|
+
column_widths: list[int] | None = None,
|
|
165
|
+
style_name: str | None = None,
|
|
166
|
+
output_path: Path,
|
|
167
|
+
) -> tuple[str, str, dict[str, Any]]:
|
|
168
|
+
if row_count < 1 or column_count < 1:
|
|
169
|
+
raise InvalidArgumentsError(
|
|
170
|
+
"docx_add_table requires positive row and column counts."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
document = docx_adapter._open_document(document_path)
|
|
174
|
+
table = document.add_table(rows=row_count, cols=column_count)
|
|
175
|
+
|
|
176
|
+
if style_name is not None:
|
|
177
|
+
_require_style_name(document, style_name)
|
|
178
|
+
table.style = style_name
|
|
179
|
+
|
|
180
|
+
if column_widths is not None:
|
|
181
|
+
if len(column_widths) != column_count:
|
|
182
|
+
raise InvalidArgumentsError(
|
|
183
|
+
"column_widths must match the DOCX table column count."
|
|
184
|
+
)
|
|
185
|
+
for row in table.rows:
|
|
186
|
+
for width, cell in zip(column_widths, row.cells, strict=True):
|
|
187
|
+
cell.width = int(width)
|
|
188
|
+
|
|
189
|
+
anchor = _resolve_insert_anchor(document, position)
|
|
190
|
+
if anchor is not None:
|
|
191
|
+
anchor.addnext(table._element)
|
|
192
|
+
|
|
193
|
+
table_index = _table_index_for_element(document, table._element)
|
|
194
|
+
locator = f"docx:table:{table_index}"
|
|
195
|
+
document.save(output_path)
|
|
196
|
+
return (
|
|
197
|
+
locator,
|
|
198
|
+
f"Inserted DOCX table {locator} with {row_count} rows and {column_count} columns.",
|
|
199
|
+
{
|
|
200
|
+
"row_count": row_count,
|
|
201
|
+
"column_count": column_count,
|
|
202
|
+
"style_name": style_name,
|
|
203
|
+
"position": None if position is None else str(position),
|
|
204
|
+
},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def merge_table_cells(
|
|
209
|
+
document_path: Path,
|
|
210
|
+
start_locator: str,
|
|
211
|
+
end_locator: str,
|
|
212
|
+
*,
|
|
213
|
+
output_path: Path,
|
|
214
|
+
) -> tuple[str, str, dict[str, Any]]:
|
|
215
|
+
canonical_start = to_v2_locator(start_locator, file_type="docx")
|
|
216
|
+
canonical_end = to_v2_locator(end_locator, file_type="docx")
|
|
217
|
+
start_target = _parse_docx_target(canonical_start)
|
|
218
|
+
end_target = _parse_docx_target(canonical_end)
|
|
219
|
+
if (
|
|
220
|
+
start_target.object_type != "table_cell"
|
|
221
|
+
or end_target.object_type != "table_cell"
|
|
222
|
+
):
|
|
223
|
+
raise InvalidArgumentsError(
|
|
224
|
+
"docx_merge_table_cells requires DOCX table-cell locators."
|
|
225
|
+
)
|
|
226
|
+
if start_target.table_index != end_target.table_index:
|
|
227
|
+
raise InvalidArgumentsError(
|
|
228
|
+
"docx_merge_table_cells requires both cells to be in the same table."
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
document = docx_adapter._open_document(document_path)
|
|
232
|
+
start = _resolve_table_cell_target(document, start_target)
|
|
233
|
+
end = _resolve_table_cell_target(document, end_target)
|
|
234
|
+
min_row, max_row = sorted((start["row_index"], end["row_index"]))
|
|
235
|
+
min_col, max_col = sorted((start["column_index"], end["column_index"]))
|
|
236
|
+
if min_row > max_row or min_col > max_col:
|
|
237
|
+
raise InvalidArgumentsError(
|
|
238
|
+
"Table-cell locators must define a valid rectangular range."
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
start["table"].cell(min_row, min_col).merge(start["table"].cell(max_row, max_col))
|
|
242
|
+
document.save(output_path)
|
|
243
|
+
locator = f"docx:table:{start['table_index']}:row:{min_row}:cell:{min_col}"
|
|
244
|
+
return (
|
|
245
|
+
locator,
|
|
246
|
+
f"Merged DOCX table cells from {canonical_start} to {canonical_end}.",
|
|
247
|
+
{"start_locator": canonical_start, "end_locator": canonical_end},
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_document_payload(document_path: Path, document) -> ObjectPayload:
|
|
252
|
+
blocks = docx_adapter._iter_blocks(document)
|
|
253
|
+
paragraph_count = sum(1 for block_type, _ in blocks if block_type == "paragraph")
|
|
254
|
+
table_count = sum(1 for block_type, _ in blocks if block_type == "table")
|
|
255
|
+
preview = next(
|
|
256
|
+
(
|
|
257
|
+
block.text[:120]
|
|
258
|
+
for block_type, block in blocks
|
|
259
|
+
if block_type == "paragraph" and block.text.strip()
|
|
260
|
+
),
|
|
261
|
+
"",
|
|
262
|
+
)
|
|
263
|
+
return ObjectPayload(
|
|
264
|
+
document=docx_adapter._document_ref(document_path),
|
|
265
|
+
locator="docx:document",
|
|
266
|
+
object_type="document",
|
|
267
|
+
preview=preview,
|
|
268
|
+
properties={
|
|
269
|
+
"section_count": len(document.sections),
|
|
270
|
+
"paragraph_count": paragraph_count,
|
|
271
|
+
"table_count": table_count,
|
|
272
|
+
"image_count": len(document.inline_shapes),
|
|
273
|
+
"page_break_count": len(_page_breaks(document)),
|
|
274
|
+
},
|
|
275
|
+
capabilities=_capability_tuple("document"),
|
|
276
|
+
child_summary=_document_children(document),
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _build_section_payload(
|
|
281
|
+
document_path: Path, document, target: _DocxTarget
|
|
282
|
+
) -> ObjectPayload:
|
|
283
|
+
assert target.section_index is not None
|
|
284
|
+
try:
|
|
285
|
+
section = document.sections[target.section_index]
|
|
286
|
+
except IndexError as exc:
|
|
287
|
+
raise TargetNotFoundError(
|
|
288
|
+
f"Section {target.section_index} does not exist in the document."
|
|
289
|
+
) from exc
|
|
290
|
+
|
|
291
|
+
return ObjectPayload(
|
|
292
|
+
document=docx_adapter._document_ref(document_path),
|
|
293
|
+
locator=target.canonical_locator,
|
|
294
|
+
object_type="section",
|
|
295
|
+
preview=f"Section {target.section_index}",
|
|
296
|
+
properties={
|
|
297
|
+
"section_index": target.section_index,
|
|
298
|
+
"start_type": str(section.start_type),
|
|
299
|
+
"page_width": int(section.page_width),
|
|
300
|
+
"page_height": int(section.page_height),
|
|
301
|
+
"left_margin": int(section.left_margin),
|
|
302
|
+
"right_margin": int(section.right_margin),
|
|
303
|
+
},
|
|
304
|
+
capabilities=_capability_tuple("section"),
|
|
305
|
+
parent_locator="docx:document",
|
|
306
|
+
child_summary=_section_children(document, target),
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _build_paragraph_payload(
|
|
311
|
+
document_path: Path, document, target: _DocxTarget
|
|
312
|
+
) -> ObjectPayload:
|
|
313
|
+
resolved = _resolve_paragraph_target(document, target)
|
|
314
|
+
paragraph_model = docx_adapter._paragraph_model(
|
|
315
|
+
resolved["paragraph"],
|
|
316
|
+
resolved["block_index"],
|
|
317
|
+
resolved["paragraph_index"],
|
|
318
|
+
)
|
|
319
|
+
runs = tuple(docx_adapter._run_model(run) for run in resolved["paragraph"].runs)
|
|
320
|
+
return ObjectPayload(
|
|
321
|
+
document=docx_adapter._document_ref(document_path),
|
|
322
|
+
locator=target.canonical_locator,
|
|
323
|
+
object_type="paragraph",
|
|
324
|
+
preview=paragraph_model.preview,
|
|
325
|
+
properties={
|
|
326
|
+
"block_index": resolved["block_index"],
|
|
327
|
+
"paragraph_index": paragraph_model.paragraph_index,
|
|
328
|
+
"text": paragraph_model.text,
|
|
329
|
+
"style_name": paragraph_model.style_name,
|
|
330
|
+
"is_heading": paragraph_model.is_heading,
|
|
331
|
+
"runs": [
|
|
332
|
+
{
|
|
333
|
+
"text": run.text,
|
|
334
|
+
"bold": run.bold,
|
|
335
|
+
"italic": run.italic,
|
|
336
|
+
"underline": run.underline,
|
|
337
|
+
"strike": run.strike,
|
|
338
|
+
"font_name": run.font_name,
|
|
339
|
+
"font_size": run.font_size,
|
|
340
|
+
"color_rgb": run.color_rgb,
|
|
341
|
+
}
|
|
342
|
+
for run in runs
|
|
343
|
+
],
|
|
344
|
+
},
|
|
345
|
+
capabilities=_capability_tuple("paragraph"),
|
|
346
|
+
parent_locator="docx:document",
|
|
347
|
+
child_summary=_paragraph_children(document, target),
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _build_run_payload(
|
|
352
|
+
document_path: Path, document, target: _DocxTarget
|
|
353
|
+
) -> ObjectPayload:
|
|
354
|
+
resolved = _resolve_run_target(document, target)
|
|
355
|
+
run_model = docx_adapter._run_model(resolved["run"])
|
|
356
|
+
paragraph_locator = f"docx:para:{resolved['paragraph_index']}"
|
|
357
|
+
return ObjectPayload(
|
|
358
|
+
document=docx_adapter._document_ref(document_path),
|
|
359
|
+
locator=target.canonical_locator,
|
|
360
|
+
object_type="run",
|
|
361
|
+
preview=run_model.text[:120],
|
|
362
|
+
properties={
|
|
363
|
+
"paragraph_index": resolved["paragraph_index"],
|
|
364
|
+
"run_index": resolved["run_index"],
|
|
365
|
+
"text": run_model.text,
|
|
366
|
+
"bold": run_model.bold,
|
|
367
|
+
"italic": run_model.italic,
|
|
368
|
+
"underline": run_model.underline,
|
|
369
|
+
"strike": run_model.strike,
|
|
370
|
+
"font_name": run_model.font_name,
|
|
371
|
+
"font_size": run_model.font_size,
|
|
372
|
+
"color_rgb": run_model.color_rgb,
|
|
373
|
+
},
|
|
374
|
+
capabilities=_capability_tuple("run"),
|
|
375
|
+
parent_locator=paragraph_locator,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _build_table_payload(
|
|
380
|
+
document_path: Path, document, target: _DocxTarget
|
|
381
|
+
) -> ObjectPayload:
|
|
382
|
+
resolved = _resolve_table_target(document, target)
|
|
383
|
+
table_model = docx_adapter._table_model(
|
|
384
|
+
resolved["table"],
|
|
385
|
+
resolved["block_index"],
|
|
386
|
+
resolved["table_index"],
|
|
387
|
+
)
|
|
388
|
+
return ObjectPayload(
|
|
389
|
+
document=docx_adapter._document_ref(document_path),
|
|
390
|
+
locator=target.canonical_locator,
|
|
391
|
+
object_type="table",
|
|
392
|
+
preview=table_model.preview,
|
|
393
|
+
properties={
|
|
394
|
+
"block_index": resolved["block_index"],
|
|
395
|
+
"table_index": table_model.table_index,
|
|
396
|
+
"row_count": len(table_model.rows),
|
|
397
|
+
"column_count": max((len(row) for row in table_model.rows), default=0),
|
|
398
|
+
"rows": [list(row) for row in table_model.rows],
|
|
399
|
+
},
|
|
400
|
+
capabilities=_capability_tuple("table"),
|
|
401
|
+
parent_locator="docx:document",
|
|
402
|
+
child_summary=_table_children(document, target),
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _build_table_row_payload(
|
|
407
|
+
document_path: Path, document, target: _DocxTarget
|
|
408
|
+
) -> ObjectPayload:
|
|
409
|
+
resolved = _resolve_table_row_target(document, target)
|
|
410
|
+
return ObjectPayload(
|
|
411
|
+
document=docx_adapter._document_ref(document_path),
|
|
412
|
+
locator=target.canonical_locator,
|
|
413
|
+
object_type="table_row",
|
|
414
|
+
preview=" | ".join(cell.text for cell in resolved["row"].cells)[:120],
|
|
415
|
+
properties={
|
|
416
|
+
"table_index": resolved["table_index"],
|
|
417
|
+
"row_index": resolved["row_index"],
|
|
418
|
+
"cell_count": len(resolved["row"].cells),
|
|
419
|
+
"cells": [cell.text for cell in resolved["row"].cells],
|
|
420
|
+
},
|
|
421
|
+
capabilities=_capability_tuple("table_row"),
|
|
422
|
+
parent_locator=f"docx:table:{resolved['table_index']}",
|
|
423
|
+
child_summary=_table_row_children(document, target),
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _build_table_cell_payload(
|
|
428
|
+
document_path: Path, document, target: _DocxTarget
|
|
429
|
+
) -> ObjectPayload:
|
|
430
|
+
resolved = _resolve_table_cell_target(document, target)
|
|
431
|
+
cell = resolved["cell"]
|
|
432
|
+
return ObjectPayload(
|
|
433
|
+
document=docx_adapter._document_ref(document_path),
|
|
434
|
+
locator=target.canonical_locator,
|
|
435
|
+
object_type="table_cell",
|
|
436
|
+
preview=cell.text[:120],
|
|
437
|
+
properties={
|
|
438
|
+
"table_index": resolved["table_index"],
|
|
439
|
+
"row_index": resolved["row_index"],
|
|
440
|
+
"column_index": resolved["column_index"],
|
|
441
|
+
"text": cell.text,
|
|
442
|
+
"paragraph_count": len(cell.paragraphs),
|
|
443
|
+
},
|
|
444
|
+
capabilities=_capability_tuple("table_cell"),
|
|
445
|
+
parent_locator=f"docx:table:{resolved['table_index']}:row:{resolved['row_index']}",
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _build_image_payload(
|
|
450
|
+
document_path: Path, document, target: _DocxTarget
|
|
451
|
+
) -> ObjectPayload:
|
|
452
|
+
assert target.image_index is not None
|
|
453
|
+
try:
|
|
454
|
+
shape = document.inline_shapes[target.image_index]
|
|
455
|
+
except IndexError as exc:
|
|
456
|
+
raise TargetNotFoundError(
|
|
457
|
+
f"Image {target.image_index} does not exist in the document."
|
|
458
|
+
) from exc
|
|
459
|
+
|
|
460
|
+
return ObjectPayload(
|
|
461
|
+
document=docx_adapter._document_ref(document_path),
|
|
462
|
+
locator=target.canonical_locator,
|
|
463
|
+
object_type="image",
|
|
464
|
+
preview=f"Image {target.image_index}",
|
|
465
|
+
properties={
|
|
466
|
+
"image_index": target.image_index,
|
|
467
|
+
"width": int(shape.width),
|
|
468
|
+
"height": int(shape.height),
|
|
469
|
+
"shape_type": str(shape.type),
|
|
470
|
+
},
|
|
471
|
+
capabilities=_capability_tuple("image"),
|
|
472
|
+
parent_locator="docx:document",
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _build_page_break_payload(
|
|
477
|
+
document_path: Path, document, target: _DocxTarget
|
|
478
|
+
) -> ObjectPayload:
|
|
479
|
+
assert target.page_break_index is not None
|
|
480
|
+
page_break = _resolve_page_break(document, target.page_break_index)
|
|
481
|
+
return ObjectPayload(
|
|
482
|
+
document=docx_adapter._document_ref(document_path),
|
|
483
|
+
locator=target.canonical_locator,
|
|
484
|
+
object_type="page_break",
|
|
485
|
+
preview="Page break",
|
|
486
|
+
properties={
|
|
487
|
+
"page_break_index": target.page_break_index,
|
|
488
|
+
"paragraph_index": page_break["paragraph_index"],
|
|
489
|
+
"run_index": page_break["run_index"],
|
|
490
|
+
},
|
|
491
|
+
capabilities=_capability_tuple("page_break"),
|
|
492
|
+
parent_locator=f"docx:para:{page_break['paragraph_index']}",
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _document_children(
|
|
497
|
+
document, *, child_type: str | None = None
|
|
498
|
+
) -> tuple[ChildSummary, ...]:
|
|
499
|
+
children: list[ChildSummary] = []
|
|
500
|
+
normalized_child_type = _normalize_child_type(child_type)
|
|
501
|
+
if normalized_child_type in {None, "section"}:
|
|
502
|
+
for section_index, _ in enumerate(document.sections):
|
|
503
|
+
children.append(
|
|
504
|
+
ChildSummary(
|
|
505
|
+
locator=f"docx:section:{section_index}",
|
|
506
|
+
object_type="section",
|
|
507
|
+
preview=f"Section {section_index}",
|
|
508
|
+
capabilities=_capability_tuple("section"),
|
|
509
|
+
)
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
paragraph_index = 0
|
|
513
|
+
table_index = 0
|
|
514
|
+
for block_index, (block_type, block) in enumerate(
|
|
515
|
+
docx_adapter._iter_blocks(document)
|
|
516
|
+
):
|
|
517
|
+
if block_type == "paragraph":
|
|
518
|
+
if normalized_child_type not in {None, "paragraph"}:
|
|
519
|
+
paragraph_index += 1
|
|
520
|
+
continue
|
|
521
|
+
paragraph_model = docx_adapter._paragraph_model(
|
|
522
|
+
block, block_index, paragraph_index
|
|
523
|
+
)
|
|
524
|
+
children.append(
|
|
525
|
+
ChildSummary(
|
|
526
|
+
locator=f"docx:para:{paragraph_index}",
|
|
527
|
+
object_type="paragraph",
|
|
528
|
+
preview=paragraph_model.preview,
|
|
529
|
+
capabilities=_capability_tuple("paragraph"),
|
|
530
|
+
)
|
|
531
|
+
)
|
|
532
|
+
paragraph_index += 1
|
|
533
|
+
continue
|
|
534
|
+
|
|
535
|
+
if normalized_child_type not in {None, "table"}:
|
|
536
|
+
table_index += 1
|
|
537
|
+
continue
|
|
538
|
+
table_model = docx_adapter._table_model(block, block_index, table_index)
|
|
539
|
+
children.append(
|
|
540
|
+
ChildSummary(
|
|
541
|
+
locator=f"docx:table:{table_index}",
|
|
542
|
+
object_type="table",
|
|
543
|
+
preview=table_model.preview,
|
|
544
|
+
capabilities=_capability_tuple("table"),
|
|
545
|
+
)
|
|
546
|
+
)
|
|
547
|
+
table_index += 1
|
|
548
|
+
|
|
549
|
+
if normalized_child_type in {None, "image"}:
|
|
550
|
+
for image_index, _ in enumerate(document.inline_shapes):
|
|
551
|
+
children.append(
|
|
552
|
+
ChildSummary(
|
|
553
|
+
locator=f"docx:image:{image_index}",
|
|
554
|
+
object_type="image",
|
|
555
|
+
preview=f"Image {image_index}",
|
|
556
|
+
capabilities=_capability_tuple("image"),
|
|
557
|
+
)
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
if normalized_child_type in {None, "page_break"}:
|
|
561
|
+
for page_break_index, _ in enumerate(_page_breaks(document)):
|
|
562
|
+
children.append(
|
|
563
|
+
ChildSummary(
|
|
564
|
+
locator=f"docx:page_break:{page_break_index}",
|
|
565
|
+
object_type="page_break",
|
|
566
|
+
preview="Page break",
|
|
567
|
+
capabilities=_capability_tuple("page_break"),
|
|
568
|
+
)
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
return tuple(children)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _section_children(
|
|
575
|
+
document, target: _DocxTarget, *, child_type: str | None = None
|
|
576
|
+
) -> tuple[ChildSummary, ...]:
|
|
577
|
+
if len(document.sections) == 1 and target.section_index == 0:
|
|
578
|
+
return _document_children(document, child_type=child_type)
|
|
579
|
+
return ()
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _paragraph_children(
|
|
583
|
+
document, target: _DocxTarget, *, child_type: str | None = None
|
|
584
|
+
) -> tuple[ChildSummary, ...]:
|
|
585
|
+
resolved = _resolve_paragraph_target(document, target)
|
|
586
|
+
normalized_child_type = _normalize_child_type(child_type)
|
|
587
|
+
children: list[ChildSummary] = []
|
|
588
|
+
|
|
589
|
+
if normalized_child_type in {None, "run"}:
|
|
590
|
+
for run_index, run in enumerate(resolved["paragraph"].runs):
|
|
591
|
+
children.append(
|
|
592
|
+
ChildSummary(
|
|
593
|
+
locator=f"docx:para:{resolved['paragraph_index']}:run:{run_index}",
|
|
594
|
+
object_type="run",
|
|
595
|
+
preview=run.text[:120],
|
|
596
|
+
capabilities=_capability_tuple("run"),
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
if normalized_child_type in {None, "page_break"}:
|
|
601
|
+
for page_break in _page_breaks_in_paragraph(
|
|
602
|
+
resolved["paragraph"], resolved["paragraph_index"]
|
|
603
|
+
):
|
|
604
|
+
children.append(
|
|
605
|
+
ChildSummary(
|
|
606
|
+
locator=f"docx:page_break:{page_break['page_break_index']}",
|
|
607
|
+
object_type="page_break",
|
|
608
|
+
preview="Page break",
|
|
609
|
+
capabilities=_capability_tuple("page_break"),
|
|
610
|
+
)
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
return tuple(children)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def _table_children(
|
|
617
|
+
document, target: _DocxTarget, *, child_type: str | None = None
|
|
618
|
+
) -> tuple[ChildSummary, ...]:
|
|
619
|
+
resolved = _resolve_table_target(document, target)
|
|
620
|
+
normalized_child_type = _normalize_child_type(child_type)
|
|
621
|
+
if normalized_child_type not in {None, "table_row"}:
|
|
622
|
+
return ()
|
|
623
|
+
return tuple(
|
|
624
|
+
ChildSummary(
|
|
625
|
+
locator=f"docx:table:{resolved['table_index']}:row:{row_index}",
|
|
626
|
+
object_type="table_row",
|
|
627
|
+
preview=" | ".join(cell.text for cell in row.cells)[:120],
|
|
628
|
+
capabilities=_capability_tuple("table_row"),
|
|
629
|
+
)
|
|
630
|
+
for row_index, row in enumerate(resolved["table"].rows)
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _table_row_children(
|
|
635
|
+
document, target: _DocxTarget, *, child_type: str | None = None
|
|
636
|
+
) -> tuple[ChildSummary, ...]:
|
|
637
|
+
resolved = _resolve_table_row_target(document, target)
|
|
638
|
+
normalized_child_type = _normalize_child_type(child_type)
|
|
639
|
+
if normalized_child_type not in {None, "table_cell"}:
|
|
640
|
+
return ()
|
|
641
|
+
return tuple(
|
|
642
|
+
ChildSummary(
|
|
643
|
+
locator=f"docx:table:{resolved['table_index']}:row:{resolved['row_index']}:cell:{column_index}",
|
|
644
|
+
object_type="table_cell",
|
|
645
|
+
preview=cell.text[:120],
|
|
646
|
+
capabilities=_capability_tuple("table_cell"),
|
|
647
|
+
)
|
|
648
|
+
for column_index, cell in enumerate(resolved["row"].cells)
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def _resolve_paragraph_target(document, target: _DocxTarget) -> dict[str, Any]:
|
|
653
|
+
assert target.paragraph_index is not None
|
|
654
|
+
current_paragraph_index = 0
|
|
655
|
+
for block_index, (block_type, block) in enumerate(
|
|
656
|
+
docx_adapter._iter_blocks(document)
|
|
657
|
+
):
|
|
658
|
+
if block_type != "paragraph":
|
|
659
|
+
continue
|
|
660
|
+
if current_paragraph_index == target.paragraph_index:
|
|
661
|
+
return {
|
|
662
|
+
"block_index": block_index,
|
|
663
|
+
"paragraph_index": current_paragraph_index,
|
|
664
|
+
"paragraph": block,
|
|
665
|
+
}
|
|
666
|
+
current_paragraph_index += 1
|
|
667
|
+
raise TargetNotFoundError(
|
|
668
|
+
f"Paragraph {target.paragraph_index} does not exist in the document."
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _resolve_run_target(document, target: _DocxTarget) -> dict[str, Any]:
|
|
673
|
+
resolved = _resolve_paragraph_target(document, target)
|
|
674
|
+
assert target.run_index is not None
|
|
675
|
+
try:
|
|
676
|
+
run = resolved["paragraph"].runs[target.run_index]
|
|
677
|
+
except IndexError as exc:
|
|
678
|
+
raise TargetNotFoundError(
|
|
679
|
+
f"Run {target.run_index} does not exist in paragraph {resolved['paragraph_index']}."
|
|
680
|
+
) from exc
|
|
681
|
+
return {**resolved, "run_index": target.run_index, "run": run}
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def _resolve_table_target(document, target: _DocxTarget) -> dict[str, Any]:
|
|
685
|
+
assert target.table_index is not None
|
|
686
|
+
current_table_index = 0
|
|
687
|
+
for block_index, (block_type, block) in enumerate(
|
|
688
|
+
docx_adapter._iter_blocks(document)
|
|
689
|
+
):
|
|
690
|
+
if block_type != "table":
|
|
691
|
+
continue
|
|
692
|
+
if current_table_index == target.table_index:
|
|
693
|
+
return {
|
|
694
|
+
"block_index": block_index,
|
|
695
|
+
"table_index": current_table_index,
|
|
696
|
+
"table": block,
|
|
697
|
+
}
|
|
698
|
+
current_table_index += 1
|
|
699
|
+
raise TargetNotFoundError(
|
|
700
|
+
f"Table {target.table_index} does not exist in the document."
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _resolve_table_row_target(document, target: _DocxTarget) -> dict[str, Any]:
|
|
705
|
+
resolved = _resolve_table_target(document, target)
|
|
706
|
+
assert target.row_index is not None
|
|
707
|
+
try:
|
|
708
|
+
row = resolved["table"].rows[target.row_index]
|
|
709
|
+
except IndexError as exc:
|
|
710
|
+
raise TargetNotFoundError(
|
|
711
|
+
f"Row {target.row_index} does not exist in table {resolved['table_index']}."
|
|
712
|
+
) from exc
|
|
713
|
+
return {**resolved, "row_index": target.row_index, "row": row}
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def _resolve_table_cell_target(document, target: _DocxTarget) -> dict[str, Any]:
|
|
717
|
+
resolved = _resolve_table_row_target(document, target)
|
|
718
|
+
assert target.column_index is not None
|
|
719
|
+
try:
|
|
720
|
+
cell = resolved["row"].cells[target.column_index]
|
|
721
|
+
except IndexError as exc:
|
|
722
|
+
raise TargetNotFoundError(
|
|
723
|
+
f"Cell {target.column_index} does not exist in row {resolved['row_index']}."
|
|
724
|
+
) from exc
|
|
725
|
+
return {**resolved, "column_index": target.column_index, "cell": cell}
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _resolve_page_break(document, page_break_index: int) -> dict[str, int]:
|
|
729
|
+
try:
|
|
730
|
+
return _page_breaks(document)[page_break_index]
|
|
731
|
+
except IndexError as exc:
|
|
732
|
+
raise TargetNotFoundError(
|
|
733
|
+
f"Page break {page_break_index} does not exist in the document."
|
|
734
|
+
) from exc
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def _page_breaks(document) -> list[dict[str, int]]:
|
|
738
|
+
page_breaks: list[dict[str, int]] = []
|
|
739
|
+
paragraph_index = 0
|
|
740
|
+
for block_type, block in docx_adapter._iter_blocks(document):
|
|
741
|
+
if block_type != "paragraph":
|
|
742
|
+
continue
|
|
743
|
+
page_breaks.extend(
|
|
744
|
+
_page_breaks_in_paragraph(
|
|
745
|
+
block, paragraph_index, base_index=len(page_breaks)
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
paragraph_index += 1
|
|
749
|
+
return page_breaks
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _page_breaks_in_paragraph(
|
|
753
|
+
paragraph, paragraph_index: int, *, base_index: int = 0
|
|
754
|
+
) -> list[dict[str, int]]:
|
|
755
|
+
if qn is None:
|
|
756
|
+
return []
|
|
757
|
+
|
|
758
|
+
page_breaks: list[dict[str, int]] = []
|
|
759
|
+
for run_index, run in enumerate(paragraph.runs):
|
|
760
|
+
for br in run._element.findall(".//w:br", run._element.nsmap):
|
|
761
|
+
if br.get(qn("w:type")) != "page":
|
|
762
|
+
continue
|
|
763
|
+
page_breaks.append(
|
|
764
|
+
{
|
|
765
|
+
"page_break_index": base_index + len(page_breaks),
|
|
766
|
+
"paragraph_index": paragraph_index,
|
|
767
|
+
"run_index": run_index,
|
|
768
|
+
}
|
|
769
|
+
)
|
|
770
|
+
return page_breaks
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
def _page_break_for_paragraph(document, paragraph_index: int) -> dict[str, int]:
|
|
774
|
+
for page_break in _page_breaks(document):
|
|
775
|
+
if page_break["paragraph_index"] == paragraph_index:
|
|
776
|
+
return page_break
|
|
777
|
+
raise RuntimeError(
|
|
778
|
+
f"Failed to resolve inserted page break for paragraph {paragraph_index}."
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def _parse_docx_target(locator: str) -> _DocxTarget:
|
|
783
|
+
parsed = parse_locator(locator)
|
|
784
|
+
components = parsed.components
|
|
785
|
+
if components == ("docx", "document"):
|
|
786
|
+
return _DocxTarget(locator, "document")
|
|
787
|
+
if len(components) == 3 and components[:2] == ("docx", "section"):
|
|
788
|
+
return _DocxTarget(
|
|
789
|
+
locator, "section", section_index=_require_index(components[2], locator)
|
|
790
|
+
)
|
|
791
|
+
if len(components) == 3 and components[:2] == ("docx", "para"):
|
|
792
|
+
return _DocxTarget(
|
|
793
|
+
locator, "paragraph", paragraph_index=_require_index(components[2], locator)
|
|
794
|
+
)
|
|
795
|
+
if (
|
|
796
|
+
len(components) == 5
|
|
797
|
+
and components[:2] == ("docx", "para")
|
|
798
|
+
and components[3] == "run"
|
|
799
|
+
):
|
|
800
|
+
return _DocxTarget(
|
|
801
|
+
locator,
|
|
802
|
+
"run",
|
|
803
|
+
paragraph_index=_require_index(components[2], locator),
|
|
804
|
+
run_index=_require_index(components[4], locator),
|
|
805
|
+
)
|
|
806
|
+
if len(components) == 3 and components[:2] == ("docx", "table"):
|
|
807
|
+
return _DocxTarget(
|
|
808
|
+
locator, "table", table_index=_require_index(components[2], locator)
|
|
809
|
+
)
|
|
810
|
+
if (
|
|
811
|
+
len(components) == 5
|
|
812
|
+
and components[:2] == ("docx", "table")
|
|
813
|
+
and components[3] == "row"
|
|
814
|
+
):
|
|
815
|
+
return _DocxTarget(
|
|
816
|
+
locator,
|
|
817
|
+
"table_row",
|
|
818
|
+
table_index=_require_index(components[2], locator),
|
|
819
|
+
row_index=_require_index(components[4], locator),
|
|
820
|
+
)
|
|
821
|
+
if (
|
|
822
|
+
len(components) == 7
|
|
823
|
+
and components[:2] == ("docx", "table")
|
|
824
|
+
and components[3] == "row"
|
|
825
|
+
and components[5] == "cell"
|
|
826
|
+
):
|
|
827
|
+
return _DocxTarget(
|
|
828
|
+
locator,
|
|
829
|
+
"table_cell",
|
|
830
|
+
table_index=_require_index(components[2], locator),
|
|
831
|
+
row_index=_require_index(components[4], locator),
|
|
832
|
+
column_index=_require_index(components[6], locator),
|
|
833
|
+
)
|
|
834
|
+
if len(components) == 3 and components[:2] == ("docx", "image"):
|
|
835
|
+
return _DocxTarget(
|
|
836
|
+
locator, "image", image_index=_require_index(components[2], locator)
|
|
837
|
+
)
|
|
838
|
+
if len(components) == 3 and components[:2] == ("docx", "page_break"):
|
|
839
|
+
return _DocxTarget(
|
|
840
|
+
locator,
|
|
841
|
+
"page_break",
|
|
842
|
+
page_break_index=_require_index(components[2], locator),
|
|
843
|
+
)
|
|
844
|
+
raise InvalidArgumentsError(f"Unsupported DOCX locator: {locator}")
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
def _capabilities_for(object_type: str) -> frozenset[Capability]:
|
|
848
|
+
if object_type == "document":
|
|
849
|
+
return frozenset({Capability.READ, Capability.ADD_CHILD})
|
|
850
|
+
if object_type == "section":
|
|
851
|
+
return frozenset({Capability.READ, Capability.ADD_CHILD})
|
|
852
|
+
if object_type == "paragraph":
|
|
853
|
+
return frozenset(
|
|
854
|
+
{
|
|
855
|
+
Capability.READ,
|
|
856
|
+
Capability.UPDATE,
|
|
857
|
+
Capability.DELETE,
|
|
858
|
+
Capability.MOVE,
|
|
859
|
+
Capability.COPY,
|
|
860
|
+
Capability.STYLE,
|
|
861
|
+
}
|
|
862
|
+
)
|
|
863
|
+
if object_type == "run":
|
|
864
|
+
return frozenset(
|
|
865
|
+
{Capability.READ, Capability.UPDATE, Capability.DELETE, Capability.STYLE}
|
|
866
|
+
)
|
|
867
|
+
if object_type == "table":
|
|
868
|
+
return frozenset(
|
|
869
|
+
{Capability.READ, Capability.DELETE, Capability.MOVE, Capability.COPY}
|
|
870
|
+
)
|
|
871
|
+
if object_type == "table_row":
|
|
872
|
+
return frozenset(
|
|
873
|
+
{
|
|
874
|
+
Capability.READ,
|
|
875
|
+
Capability.UPDATE,
|
|
876
|
+
Capability.DELETE,
|
|
877
|
+
Capability.ADD_CHILD,
|
|
878
|
+
Capability.MOVE,
|
|
879
|
+
Capability.COPY,
|
|
880
|
+
}
|
|
881
|
+
)
|
|
882
|
+
if object_type == "table_cell":
|
|
883
|
+
return frozenset({Capability.READ, Capability.UPDATE, Capability.STYLE})
|
|
884
|
+
if object_type in {"image", "page_break"}:
|
|
885
|
+
return frozenset(
|
|
886
|
+
{Capability.READ, Capability.DELETE, Capability.MOVE, Capability.COPY}
|
|
887
|
+
)
|
|
888
|
+
return frozenset({Capability.READ})
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def _capability_tuple(object_type: str) -> tuple[Capability, ...]:
|
|
892
|
+
return tuple(
|
|
893
|
+
sorted(_capabilities_for(object_type), key=lambda capability: capability.value)
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def _normalize_child_type(child_type: str | None) -> str | None:
|
|
898
|
+
if child_type in {None, ""}:
|
|
899
|
+
return None
|
|
900
|
+
return child_type
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def _require_style_name(document, style_name: str) -> None:
|
|
904
|
+
if any(getattr(style, "name", None) == style_name for style in document.styles):
|
|
905
|
+
return
|
|
906
|
+
raise InvalidArgumentsError(f"Unknown DOCX style: {style_name}")
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def _resolve_insert_anchor(document, position: object | None):
|
|
910
|
+
if position is None:
|
|
911
|
+
return None
|
|
912
|
+
|
|
913
|
+
after_locator: str | None = None
|
|
914
|
+
if isinstance(position, str):
|
|
915
|
+
after_locator = position
|
|
916
|
+
elif isinstance(position, dict):
|
|
917
|
+
for key in ("after", "after_locator"):
|
|
918
|
+
value = position.get(key)
|
|
919
|
+
if value is not None:
|
|
920
|
+
after_locator = str(value)
|
|
921
|
+
break
|
|
922
|
+
if after_locator is None:
|
|
923
|
+
raise InvalidArgumentsError("DOCX insert position must be an after locator.")
|
|
924
|
+
|
|
925
|
+
canonical = to_v2_locator(after_locator, file_type="docx")
|
|
926
|
+
target = _parse_docx_target(canonical)
|
|
927
|
+
if target.object_type == "paragraph":
|
|
928
|
+
return _resolve_paragraph_target(document, target)["paragraph"]._element
|
|
929
|
+
if target.object_type == "table":
|
|
930
|
+
return _resolve_table_target(document, target)["table"]._element
|
|
931
|
+
raise InvalidArgumentsError(
|
|
932
|
+
"DOCX insert position must reference a paragraph or table."
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
def _table_index_for_element(document, table_element) -> int:
|
|
937
|
+
table_index = 0
|
|
938
|
+
for block_type, block in docx_adapter._iter_blocks(document):
|
|
939
|
+
if block_type != "table":
|
|
940
|
+
continue
|
|
941
|
+
if block._element == table_element:
|
|
942
|
+
return table_index
|
|
943
|
+
table_index += 1
|
|
944
|
+
raise RuntimeError("Failed to resolve inserted DOCX table index.")
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def _require_index(raw: str, locator: str) -> int:
|
|
948
|
+
try:
|
|
949
|
+
return int(raw)
|
|
950
|
+
except ValueError as exc:
|
|
951
|
+
raise InvalidArgumentsError(f"Invalid DOCX locator: {locator}") from exc
|