offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,940 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from offagent.domain.locators import parse_locator, to_v2_locator
7
+ from offagent.domain.models import (
8
+ BlockStyle,
9
+ DocumentRef,
10
+ InlineFragment,
11
+ InlineStyle,
12
+ IndexedItem,
13
+ PresentationSlideSummary,
14
+ PptxTextBlockNode,
15
+ SectionPayload,
16
+ SlideBundle,
17
+ SlideTextBlock,
18
+ StructureSection,
19
+ TextContainerSnapshot,
20
+ VisibleTextRange,
21
+ )
22
+ from offagent.domain.text_fragments import (
23
+ apply_style_to_range,
24
+ fragment_text,
25
+ normalize_fragments,
26
+ )
27
+ from offagent.errors import (
28
+ InvalidArgumentsError,
29
+ TargetNotEditableError as BaseTargetNotEditableError,
30
+ )
31
+ from offagent.errors import TargetNotFoundError
32
+
33
+ try:
34
+ from pptx import Presentation
35
+ from pptx.dml.color import RGBColor
36
+ from pptx.enum.text import MSO_ANCHOR, PP_ALIGN
37
+ from pptx.util import Pt
38
+ except ModuleNotFoundError: # pragma: no cover - exercised through dependency checks
39
+ Presentation = None
40
+ RGBColor = None
41
+ MSO_ANCHOR = None
42
+ PP_ALIGN = None
43
+ Pt = None
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class ResolvedShape:
48
+ slide_number: int
49
+ shape_id: int
50
+ shape_index: int
51
+ shape_name: str | None
52
+ is_placeholder: bool
53
+ text: str
54
+
55
+
56
+ class TargetNotEditableError(BaseTargetNotEditableError):
57
+ """Raised when a requested PPTX target exists but is not a text frame."""
58
+
59
+
60
+ def extract_document(document_path: Path) -> list[IndexedItem]:
61
+ presentation = _open_presentation(document_path)
62
+ items: list[IndexedItem] = []
63
+
64
+ for slide_number, slide in enumerate(presentation.slides, start=1):
65
+ for shape_index, shape in enumerate(slide.shapes):
66
+ if not getattr(shape, "has_text_frame", False):
67
+ continue
68
+
69
+ item_id = make_item_id(slide_number, shape.shape_id)
70
+ text = _text_frame_text(shape.text_frame)
71
+ items.append(
72
+ IndexedItem(
73
+ item_id=item_id,
74
+ item_type="slide_text_shape",
75
+ locator=item_id,
76
+ preview=text[:120],
77
+ content_text=text,
78
+ metadata={
79
+ "slide_number": slide_number,
80
+ "shape_id": shape.shape_id,
81
+ "shape_index": shape_index,
82
+ "shape_name": getattr(shape, "name", None),
83
+ "text_frame_text": text,
84
+ "is_placeholder": bool(getattr(shape, "is_placeholder", False)),
85
+ },
86
+ )
87
+ )
88
+
89
+ return items
90
+
91
+
92
+ def build_embedding_text(item: IndexedItem, document_path: Path) -> str:
93
+ del document_path
94
+ return item.content_text
95
+
96
+
97
+ def read_text_shape(document_path: Path, item_id: str) -> str:
98
+ resolved = resolve_shape(document_path, item_id)
99
+ return resolved.text
100
+
101
+
102
+ def replace_text_shape(
103
+ document_path: Path, item_id: str, text: str, output_path: Path | None = None
104
+ ) -> Path:
105
+ presentation = _open_presentation(document_path)
106
+ shape = _resolve_shape(presentation, item_id)
107
+ text_frame = _require_text_frame(shape)
108
+ text_frame.clear()
109
+ text_frame.paragraphs[0].text = text
110
+ target_path = _target_path(document_path, output_path)
111
+ presentation.save(target_path)
112
+ return target_path
113
+
114
+
115
+ def append_text_shape(
116
+ document_path: Path, item_id: str, text: str, output_path: Path | None = None
117
+ ) -> Path:
118
+ presentation = _open_presentation(document_path)
119
+ shape = _resolve_shape(presentation, item_id)
120
+ text_frame = _require_text_frame(shape)
121
+ text_frame.text = f"{_text_frame_text(text_frame)}{text}"
122
+ target_path = _target_path(document_path, output_path)
123
+ presentation.save(target_path)
124
+ return target_path
125
+
126
+
127
+ def make_slide_locator(slide_number: int) -> str:
128
+ return f"slide:{slide_number}"
129
+
130
+
131
+ def resolve_shape(document_path: Path, item_id: str) -> ResolvedShape:
132
+ presentation = _open_presentation(document_path)
133
+ shape = _resolve_shape(presentation, item_id)
134
+ text_frame = _require_text_frame(shape)
135
+ slide_number, shape_id = parse_item_id(item_id)
136
+ return ResolvedShape(
137
+ slide_number=slide_number,
138
+ shape_id=shape_id,
139
+ shape_index=_shape_index(shape),
140
+ shape_name=getattr(shape, "name", None),
141
+ is_placeholder=bool(getattr(shape, "is_placeholder", False)),
142
+ text=_text_frame_text(text_frame),
143
+ )
144
+
145
+
146
+ def resolve_structure(document_path: Path) -> tuple[StructureSection, ...]:
147
+ presentation = _open_presentation(document_path)
148
+ sections: list[StructureSection] = []
149
+
150
+ for slide_number, slide in enumerate(presentation.slides, start=1):
151
+ text_blocks = _slide_text_blocks(slide)
152
+ preview = next((block.text for block in text_blocks if block.text), "")
153
+ locator = (
154
+ make_item_id(slide_number, text_blocks[0].shape_id)
155
+ if text_blocks
156
+ else make_slide_locator(slide_number)
157
+ )
158
+ sections.append(
159
+ StructureSection(
160
+ locator=locator,
161
+ section_type="slide",
162
+ preview=preview[:120],
163
+ metadata={
164
+ "slide_number": slide_number,
165
+ "shape_count": len(slide.shapes),
166
+ "text_block_count": len(text_blocks),
167
+ },
168
+ )
169
+ )
170
+
171
+ return tuple(sections)
172
+
173
+
174
+ def get_section(document_path: Path, locator: str) -> SectionPayload:
175
+ slide_number = _slide_number_from_locator(locator)
176
+ bundle = get_slide_bundle(document_path, slide_number)
177
+ return SectionPayload(
178
+ document=bundle.document,
179
+ locator=locator
180
+ if locator.startswith("slide:")
181
+ else make_slide_locator(slide_number),
182
+ section_type="slide",
183
+ preview=bundle.preview,
184
+ metadata=bundle.metadata,
185
+ slide_number=bundle.slide_number,
186
+ notes_text=bundle.notes_text,
187
+ text_blocks=tuple(
188
+ PptxTextBlockNode(
189
+ locator=make_item_id(slide_number, block.shape_id),
190
+ position=block.position,
191
+ shape_id=block.shape_id,
192
+ shape_name=block.shape_name,
193
+ preview=block.preview,
194
+ text=block.text,
195
+ metadata=block.metadata,
196
+ )
197
+ for block in bundle.text_blocks
198
+ ),
199
+ )
200
+
201
+
202
+ def read_node(document_path: Path, locator: str) -> tuple[str, str, dict[str, object]]:
203
+ normalized = locator.strip()
204
+ if normalized.startswith("slide:") and ":shape:" not in normalized:
205
+ slide_number = _slide_number_from_locator(normalized)
206
+ bundle = get_slide_bundle(document_path, slide_number)
207
+ text = "\n\n".join(block.text for block in bundle.text_blocks if block.text)
208
+ return (
209
+ "slide",
210
+ text,
211
+ {
212
+ "slide_number": slide_number,
213
+ "notes_text": bundle.notes_text,
214
+ "text_block_count": len(bundle.text_blocks),
215
+ },
216
+ )
217
+
218
+ resolved = resolve_shape(document_path, normalized)
219
+ return (
220
+ "slide_text_shape",
221
+ resolved.text,
222
+ {
223
+ "slide_number": resolved.slide_number,
224
+ "shape_id": resolved.shape_id,
225
+ "shape_index": resolved.shape_index,
226
+ "shape_name": resolved.shape_name,
227
+ "is_placeholder": resolved.is_placeholder,
228
+ },
229
+ )
230
+
231
+
232
+ def write_node(
233
+ document_path: Path, locator: str, text: str, output_path: Path | None = None
234
+ ) -> Path:
235
+ normalized = locator.strip()
236
+ if normalized.startswith("slide:") and ":shape:" not in normalized:
237
+ shape_locator = _first_text_shape_locator(
238
+ document_path, _slide_number_from_locator(normalized)
239
+ )
240
+ if shape_locator is None:
241
+ raise TargetNotEditableError("slide has no editable text shapes")
242
+ return replace_text_shape(document_path, shape_locator, text, output_path)
243
+ return replace_text_shape(document_path, normalized, text, output_path)
244
+
245
+
246
+ def get_presentation_structure(
247
+ document_path: Path,
248
+ ) -> tuple[PresentationSlideSummary, ...]:
249
+ presentation = _open_presentation(document_path)
250
+ slides: list[PresentationSlideSummary] = []
251
+
252
+ for slide_number, slide in enumerate(presentation.slides, start=1):
253
+ text_blocks = _slide_text_blocks(slide)
254
+ preview = next((block.text for block in text_blocks if block.text), "")
255
+ slides.append(
256
+ PresentationSlideSummary(
257
+ slide_number=slide_number,
258
+ preview=preview[:120],
259
+ metadata={
260
+ "slide_number": slide_number,
261
+ "shape_count": len(slide.shapes),
262
+ "text_block_count": len(text_blocks),
263
+ },
264
+ )
265
+ )
266
+
267
+ return tuple(slides)
268
+
269
+
270
+ def get_slide_bundle(document_path: Path, slide_number: int) -> SlideBundle:
271
+ presentation = _open_presentation(document_path)
272
+ slide = _resolve_slide(presentation, slide_number)
273
+ text_blocks = _slide_text_blocks(slide)
274
+ preview = next((block.text for block in text_blocks if block.text), "")
275
+ return SlideBundle(
276
+ document=_document_ref(document_path),
277
+ slide_number=slide_number,
278
+ preview=preview[:120],
279
+ notes_text=_notes_text(slide),
280
+ metadata={
281
+ "slide_number": slide_number,
282
+ "shape_count": len(slide.shapes),
283
+ "text_block_count": len(text_blocks),
284
+ },
285
+ text_blocks=tuple(text_blocks),
286
+ )
287
+
288
+
289
+ def get_slide_notes(document_path: Path, slide_number: int) -> str:
290
+ presentation = _open_presentation(document_path)
291
+ slide = _resolve_slide(presentation, slide_number)
292
+ return _notes_text(slide)
293
+
294
+
295
+ def create_pptx(output_path: Path) -> Path:
296
+ presentation = _open_empty_presentation()
297
+ presentation.save(output_path)
298
+ return output_path
299
+
300
+
301
+ def add_slide(document_path: Path, output_path: Path | None = None) -> tuple[Path, str]:
302
+ presentation = _open_presentation(document_path)
303
+ layout = _default_slide_layout(presentation)
304
+ presentation.slides.add_slide(layout)
305
+ slide_number = len(presentation.slides)
306
+ target_path = _target_path(document_path, output_path)
307
+ presentation.save(target_path)
308
+ return target_path, f"pptx:slide:{slide_number}"
309
+
310
+
311
+ def add_textbox(
312
+ document_path: Path,
313
+ slide_locator: str,
314
+ text: str,
315
+ left: int | None = None,
316
+ top: int | None = None,
317
+ width: int | None = None,
318
+ height: int | None = None,
319
+ output_path: Path | None = None,
320
+ ) -> tuple[Path, str]:
321
+ presentation = _open_presentation(document_path)
322
+ slide_number = _slide_number_from_any_locator(slide_locator)
323
+ slide = _resolve_slide(presentation, slide_number)
324
+ resolved_left, resolved_top, resolved_width, resolved_height = (
325
+ _default_textbox_geometry(
326
+ presentation,
327
+ left=left,
328
+ top=top,
329
+ width=width,
330
+ height=height,
331
+ )
332
+ )
333
+ shape = slide.shapes.add_textbox(
334
+ resolved_left, resolved_top, resolved_width, resolved_height
335
+ )
336
+ shape.text_frame.text = text
337
+ locator = f"pptx:slide:{slide_number}:shape:{shape.shape_id}"
338
+ target_path = _target_path(document_path, output_path)
339
+ presentation.save(target_path)
340
+ return target_path, locator
341
+
342
+
343
+ def read_paragraph_fragments(
344
+ document_path: Path, locator: str
345
+ ) -> TextContainerSnapshot:
346
+ presentation = _open_presentation(document_path)
347
+ target = _resolve_paragraph_container(presentation, locator)
348
+ fragments = _read_pptx_paragraph_fragments(target["paragraph"])
349
+ return TextContainerSnapshot(
350
+ locator=target["paragraph_locator"],
351
+ object_type="paragraph",
352
+ text=fragment_text(fragments),
353
+ fragments=fragments,
354
+ metadata={
355
+ "slide_number": target["slide_number"],
356
+ "shape_id": target["shape_id"],
357
+ "paragraph_index": target["paragraph_index"],
358
+ },
359
+ )
360
+
361
+
362
+ def rewrite_paragraph_fragments(
363
+ document_path: Path,
364
+ locator: str,
365
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
366
+ output_path: Path | None = None,
367
+ ) -> tuple[Path, str, TextContainerSnapshot]:
368
+ presentation = _open_presentation(document_path)
369
+ target = _resolve_paragraph_container(presentation, locator)
370
+ normalized = normalize_fragments(fragments)
371
+ _rewrite_pptx_paragraph(target["paragraph"], normalized)
372
+ target_path = _target_path(document_path, output_path)
373
+ presentation.save(target_path)
374
+ snapshot = TextContainerSnapshot(
375
+ locator=target["paragraph_locator"],
376
+ object_type="paragraph",
377
+ text=fragment_text(normalized),
378
+ fragments=normalized,
379
+ metadata={
380
+ "slide_number": target["slide_number"],
381
+ "shape_id": target["shape_id"],
382
+ "paragraph_index": target["paragraph_index"],
383
+ },
384
+ )
385
+ return target_path, target["paragraph_locator"], snapshot
386
+
387
+
388
+ def style_run(
389
+ document_path: Path,
390
+ locator: str,
391
+ style: InlineStyle,
392
+ clear_fields: list[str] | tuple[str, ...],
393
+ output_path: Path | None = None,
394
+ ) -> tuple[Path, str, dict[str, object]]:
395
+ presentation = _open_presentation(document_path)
396
+ target = _resolve_text_target(presentation, locator, require_run=True)
397
+ clear_set = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
398
+ skipped_fields = _apply_pptx_inline_style(target["run"], style, clear_set)
399
+ target_path = _target_path(document_path, output_path)
400
+ presentation.save(target_path)
401
+ return (
402
+ target_path,
403
+ target["shape_locator"],
404
+ {"cleared_fields": clear_set, "skipped_fields": skipped_fields},
405
+ )
406
+
407
+
408
+ def style_paragraph_range(
409
+ document_path: Path,
410
+ locator: str,
411
+ text_range: VisibleTextRange,
412
+ style: InlineStyle,
413
+ clear_fields: list[str] | tuple[str, ...],
414
+ output_path: Path | None = None,
415
+ ) -> tuple[Path, str, dict[str, object]]:
416
+ snapshot = read_paragraph_fragments(document_path, locator)
417
+ clear_set = _normalize_clear_fields(clear_fields, _INLINE_STYLE_FIELDS)
418
+ styled = apply_style_to_range(
419
+ snapshot.fragments, text_range, style=style, clear_fields=clear_set
420
+ )
421
+ target_path, paragraph_locator, rewritten = rewrite_paragraph_fragments(
422
+ document_path,
423
+ locator,
424
+ styled,
425
+ output_path=output_path,
426
+ )
427
+ return (
428
+ target_path,
429
+ paragraph_locator,
430
+ {
431
+ "cleared_fields": clear_set,
432
+ "range": {"start": text_range.start, "end": text_range.end},
433
+ "text": rewritten.text,
434
+ },
435
+ )
436
+
437
+
438
+ def style_paragraph(
439
+ document_path: Path,
440
+ locator: str,
441
+ style: BlockStyle,
442
+ clear_fields: list[str] | tuple[str, ...],
443
+ output_path: Path | None = None,
444
+ ) -> tuple[Path, str, dict[str, object]]:
445
+ presentation = _open_presentation(document_path)
446
+ target = _resolve_text_target(presentation, locator, require_run=False)
447
+ clear_set = _normalize_clear_fields(clear_fields, _BLOCK_STYLE_FIELDS)
448
+ skipped_fields = _apply_pptx_block_style(target["paragraph"], style, clear_set)
449
+ target_path = _target_path(document_path, output_path)
450
+ presentation.save(target_path)
451
+ return (
452
+ target_path,
453
+ target["shape_locator"],
454
+ {"cleared_fields": clear_set, "skipped_fields": skipped_fields},
455
+ )
456
+
457
+
458
+ def parse_item_id(item_id: str) -> tuple[int, int]:
459
+ parts = item_id.split(":")
460
+ if len(parts) != 4 or parts[0] != "slide" or parts[2] != "shape":
461
+ raise InvalidArgumentsError(f"Unsupported PPTX item id: {item_id}")
462
+
463
+ try:
464
+ slide_number = int(parts[1])
465
+ shape_id = int(parts[3])
466
+ except ValueError as exc:
467
+ raise InvalidArgumentsError(f"Invalid PPTX item id: {item_id}") from exc
468
+
469
+ if slide_number < 1:
470
+ raise InvalidArgumentsError(f"Invalid PPTX slide number: {slide_number}")
471
+
472
+ return slide_number, shape_id
473
+
474
+
475
+ def make_item_id(slide_number: int, shape_id: int) -> str:
476
+ return f"slide:{slide_number}:shape:{shape_id}"
477
+
478
+
479
+ def _open_presentation(document_path: Path):
480
+ if Presentation is None:
481
+ raise RuntimeError("python-pptx is required for PPTX operations.")
482
+ return Presentation(str(document_path))
483
+
484
+
485
+ def _open_empty_presentation():
486
+ if Presentation is None:
487
+ raise RuntimeError("python-pptx is required for PPTX operations.")
488
+ return Presentation()
489
+
490
+
491
+ def _document_ref(document_path: Path) -> DocumentRef:
492
+ resolved_path = document_path.resolve()
493
+ stat = resolved_path.stat()
494
+ return DocumentRef(
495
+ document_id=resolved_path.as_posix(),
496
+ path=resolved_path,
497
+ file_type="pptx",
498
+ display_name=resolved_path.name,
499
+ modified_time=stat.st_mtime,
500
+ )
501
+
502
+
503
+ def _resolve_shape(presentation, item_id: str):
504
+ slide_number, shape_id = parse_item_id(item_id)
505
+ slide = _resolve_slide(presentation, slide_number)
506
+
507
+ for shape in slide.shapes:
508
+ if shape.shape_id == shape_id:
509
+ return shape
510
+
511
+ raise TargetNotFoundError(
512
+ f"Shape {shape_id} does not exist on slide {slide_number}."
513
+ )
514
+
515
+
516
+ def _resolve_slide(presentation, slide_number: int):
517
+ if slide_number < 1:
518
+ raise InvalidArgumentsError(f"Invalid PPTX slide number: {slide_number}")
519
+
520
+ try:
521
+ return presentation.slides[slide_number - 1]
522
+ except IndexError as exc:
523
+ raise TargetNotFoundError(
524
+ f"Slide {slide_number} does not exist in the presentation."
525
+ ) from exc
526
+
527
+
528
+ def _require_text_frame(shape):
529
+ if not getattr(shape, "has_text_frame", False):
530
+ raise TargetNotEditableError("target not editable")
531
+ return shape.text_frame
532
+
533
+
534
+ def _text_frame_text(text_frame) -> str:
535
+ return "\n".join(paragraph.text for paragraph in text_frame.paragraphs)
536
+
537
+
538
+ def _shape_index(shape) -> int:
539
+ return shape.element.getparent().index(shape.element)
540
+
541
+
542
+ def _target_path(document_path: Path, output_path: Path | None) -> Path:
543
+ return document_path if output_path is None else output_path
544
+
545
+
546
+ def _default_slide_layout(presentation):
547
+ if not presentation.slide_layouts:
548
+ raise RuntimeError("Presentation has no slide layouts.")
549
+ for layout in presentation.slide_layouts:
550
+ if getattr(layout, "name", "").lower() == "blank":
551
+ return layout
552
+ if len(presentation.slide_layouts) > 6:
553
+ return presentation.slide_layouts[6]
554
+ return presentation.slide_layouts[-1]
555
+
556
+
557
+ def _slide_number_from_any_locator(locator: str) -> int:
558
+ canonical = to_v2_locator(locator, file_type="pptx")
559
+ parts = parse_locator(canonical).components
560
+ if len(parts) >= 3 and parts[:2] == ("pptx", "slide"):
561
+ return _parse_index(parts[2], locator, label="slide")
562
+ raise InvalidArgumentsError(f"Unsupported PPTX slide locator: {locator}")
563
+
564
+
565
+ def _default_textbox_geometry(
566
+ presentation,
567
+ *,
568
+ left: int | None,
569
+ top: int | None,
570
+ width: int | None,
571
+ height: int | None,
572
+ ) -> tuple[int, int, int, int]:
573
+ slide_width = int(presentation.slide_width)
574
+ slide_height = int(presentation.slide_height)
575
+ resolved_width = width if width is not None else int(slide_width * 0.55)
576
+ resolved_height = height if height is not None else int(slide_height * 0.2)
577
+ resolved_left = (
578
+ left if left is not None else int((slide_width - resolved_width) / 2)
579
+ )
580
+ resolved_top = top if top is not None else int((slide_height - resolved_height) / 2)
581
+ return resolved_left, resolved_top, resolved_width, resolved_height
582
+
583
+
584
+ def _resolve_text_target(
585
+ presentation, locator: str, *, require_run: bool
586
+ ) -> dict[str, object]:
587
+ canonical = to_v2_locator(locator, file_type="pptx")
588
+ parts = parse_locator(canonical).components
589
+ if len(parts) < 5 or parts[:2] != ("pptx", "slide"):
590
+ raise InvalidArgumentsError(f"Unsupported PPTX text locator: {locator}")
591
+ if parts[3] not in {"shape", "text_shape"}:
592
+ raise TargetNotEditableError("target not editable")
593
+
594
+ slide_number = _parse_index(parts[2], locator, label="slide")
595
+ shape_id = _parse_index(parts[4], locator, label="shape")
596
+ shape = _resolve_shape(presentation, make_item_id(slide_number, shape_id))
597
+ text_frame = _require_text_frame(shape)
598
+ paragraph_index = 0
599
+ run_index = 0
600
+ if len(parts) >= 7:
601
+ if parts[5] != "para":
602
+ raise InvalidArgumentsError(f"Unsupported PPTX text locator: {locator}")
603
+ paragraph_index = _parse_index(parts[6], locator, label="paragraph")
604
+ try:
605
+ paragraph = text_frame.paragraphs[paragraph_index]
606
+ except IndexError as exc:
607
+ raise TargetNotFoundError(
608
+ f"Paragraph {paragraph_index} does not exist in PPTX shape {shape_id} on slide {slide_number}."
609
+ ) from exc
610
+
611
+ run = None
612
+ if len(parts) >= 9:
613
+ if parts[7] != "run":
614
+ raise InvalidArgumentsError(f"Unsupported PPTX text locator: {locator}")
615
+ run_index = _parse_index(parts[8], locator, label="run")
616
+ if require_run:
617
+ if not paragraph.runs:
618
+ run = paragraph.add_run()
619
+ else:
620
+ try:
621
+ run = paragraph.runs[run_index]
622
+ except IndexError as exc:
623
+ raise TargetNotFoundError(
624
+ f"Run {run_index} does not exist in PPTX paragraph {paragraph_index} on slide {slide_number}."
625
+ ) from exc
626
+
627
+ return {
628
+ "canonical_locator": canonical,
629
+ "shape_locator": f"pptx:slide:{slide_number}:shape:{shape_id}",
630
+ "slide_number": slide_number,
631
+ "shape_id": shape_id,
632
+ "paragraph_index": paragraph_index,
633
+ "run_index": run_index,
634
+ "paragraph": paragraph,
635
+ "run": run,
636
+ }
637
+
638
+
639
+ def _resolve_paragraph_container(presentation, locator: str) -> dict[str, object]:
640
+ target = _resolve_text_target(presentation, locator, require_run=False)
641
+ canonical = str(target["canonical_locator"])
642
+ paragraph_locator = (
643
+ canonical
644
+ if ":para:" in canonical and ":run:" not in canonical
645
+ else f"pptx:slide:{target['slide_number']}:text_shape:{target['shape_id']}:para:{target['paragraph_index']}"
646
+ )
647
+ if ":run:" in canonical:
648
+ paragraph_locator = canonical.rsplit(":run:", maxsplit=1)[0]
649
+ if ":para:" not in canonical:
650
+ text_frame = _require_text_frame(
651
+ _resolve_shape(
652
+ presentation, make_item_id(target["slide_number"], target["shape_id"])
653
+ )
654
+ )
655
+ if len(text_frame.paragraphs) != 1:
656
+ raise TargetNotEditableError(
657
+ "PPTX range-based partial formatting requires a paragraph locator or a single-paragraph text shape."
658
+ )
659
+ return {**target, "paragraph_locator": paragraph_locator}
660
+
661
+
662
+ def _slide_text_blocks(slide) -> list[SlideTextBlock]:
663
+ blocks: list[SlideTextBlock] = []
664
+ for position, shape in enumerate(slide.shapes):
665
+ if not getattr(shape, "has_text_frame", False):
666
+ continue
667
+ text = _text_frame_text(shape.text_frame)
668
+ blocks.append(
669
+ SlideTextBlock(
670
+ position=position,
671
+ shape_id=shape.shape_id,
672
+ shape_name=getattr(shape, "name", None),
673
+ preview=text[:120],
674
+ text=text,
675
+ metadata={
676
+ "shape_index": position,
677
+ "is_placeholder": bool(getattr(shape, "is_placeholder", False)),
678
+ },
679
+ )
680
+ )
681
+ return blocks
682
+
683
+
684
+ def _slide_number_from_locator(locator: str) -> int:
685
+ normalized = locator.strip()
686
+ if normalized.startswith("slide:") and ":shape:" in normalized:
687
+ slide_number, _ = parse_item_id(normalized)
688
+ return slide_number
689
+ parts = normalized.split(":")
690
+ if len(parts) == 2 and parts[0] == "slide":
691
+ try:
692
+ slide_number = int(parts[1])
693
+ except ValueError as exc:
694
+ raise InvalidArgumentsError(f"Invalid slide locator: {locator}") from exc
695
+ if slide_number < 1:
696
+ raise InvalidArgumentsError(f"Invalid PPTX slide number: {slide_number}")
697
+ return slide_number
698
+ raise InvalidArgumentsError(f"Unsupported PPTX locator: {locator}")
699
+
700
+
701
+ def _first_text_shape_locator(document_path: Path, slide_number: int) -> str | None:
702
+ bundle = get_slide_bundle(document_path, slide_number)
703
+ if not bundle.text_blocks:
704
+ return None
705
+ return make_item_id(slide_number, bundle.text_blocks[0].shape_id)
706
+
707
+
708
+ def _notes_text(slide) -> str:
709
+ notes_slide = getattr(slide, "notes_slide", None)
710
+ if notes_slide is None:
711
+ return ""
712
+ text_frame = getattr(notes_slide, "notes_text_frame", None)
713
+ if text_frame is None:
714
+ return ""
715
+ lines = [
716
+ paragraph.text for paragraph in text_frame.paragraphs if paragraph.text.strip()
717
+ ]
718
+ return "\n".join(lines)
719
+
720
+
721
+ def _read_pptx_paragraph_fragments(paragraph) -> tuple[InlineFragment, ...]:
722
+ if not paragraph.runs:
723
+ return ()
724
+ return normalize_fragments(
725
+ [
726
+ InlineFragment(
727
+ text=run.text,
728
+ style=_capture_pptx_inline_style(run),
729
+ )
730
+ for run in paragraph.runs
731
+ ]
732
+ )
733
+
734
+
735
+ def _rewrite_pptx_paragraph(
736
+ paragraph,
737
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
738
+ ) -> None:
739
+ paragraph.clear()
740
+ normalized = normalize_fragments(fragments)
741
+ if not normalized:
742
+ paragraph.add_run().text = ""
743
+ return
744
+ for fragment in normalized:
745
+ run = paragraph.add_run()
746
+ run.text = fragment.text
747
+ _apply_pptx_inline_style(run, fragment.style, ())
748
+
749
+
750
+ def _capture_pptx_inline_style(run) -> InlineStyle:
751
+ font = run.font
752
+ font_size = None
753
+ if font.size is not None:
754
+ font_size = font.size.pt
755
+ font_color = None
756
+ if getattr(font.color, "rgb", None) is not None:
757
+ font_color = str(font.color.rgb)
758
+ return InlineStyle(
759
+ bold=getattr(font, "bold", None),
760
+ italic=getattr(font, "italic", None),
761
+ underline=getattr(font, "underline", None),
762
+ strike=getattr(font, "strike", None),
763
+ font_name=getattr(font, "name", None),
764
+ font_size=font_size,
765
+ font_color=font_color,
766
+ )
767
+
768
+
769
+ _INLINE_STYLE_FIELDS = frozenset(
770
+ {
771
+ "bold",
772
+ "italic",
773
+ "underline",
774
+ "strike",
775
+ "font_name",
776
+ "font_size",
777
+ "font_color",
778
+ "highlight",
779
+ }
780
+ )
781
+ _BLOCK_STYLE_FIELDS = frozenset(
782
+ {
783
+ "alignment",
784
+ "indent_level",
785
+ "left_indent",
786
+ "right_indent",
787
+ "spacing_before",
788
+ "spacing_after",
789
+ "line_spacing",
790
+ "wrap_text",
791
+ "vertical_alignment",
792
+ "fill_color",
793
+ "number_format",
794
+ }
795
+ )
796
+ _PPTX_ALIGNMENT_MAP = {
797
+ "left": None if PP_ALIGN is None else PP_ALIGN.LEFT,
798
+ "center": None if PP_ALIGN is None else PP_ALIGN.CENTER,
799
+ "right": None if PP_ALIGN is None else PP_ALIGN.RIGHT,
800
+ "justify": None if PP_ALIGN is None else PP_ALIGN.JUSTIFY,
801
+ }
802
+ _PPTX_VERTICAL_ALIGNMENT_MAP = {
803
+ "top": None if MSO_ANCHOR is None else MSO_ANCHOR.TOP,
804
+ "center": None if MSO_ANCHOR is None else MSO_ANCHOR.MIDDLE,
805
+ "bottom": None if MSO_ANCHOR is None else MSO_ANCHOR.BOTTOM,
806
+ }
807
+
808
+
809
+ def _normalize_clear_fields(
810
+ clear_fields: list[str] | tuple[str, ...],
811
+ allowed: frozenset[str],
812
+ ) -> tuple[str, ...]:
813
+ normalized: list[str] = []
814
+ seen: set[str] = set()
815
+ for field_name in clear_fields:
816
+ if field_name not in allowed:
817
+ raise InvalidArgumentsError(
818
+ f"Unknown style field in clear_fields: {field_name}"
819
+ )
820
+ if field_name not in seen:
821
+ normalized.append(field_name)
822
+ seen.add(field_name)
823
+ return tuple(normalized)
824
+
825
+
826
+ def _apply_pptx_inline_style(
827
+ run, style: InlineStyle, clear_fields: tuple[str, ...]
828
+ ) -> list[str]:
829
+ clear_set = set(clear_fields)
830
+ font = run.font
831
+ skipped_fields: list[str] = []
832
+
833
+ if "bold" in clear_set:
834
+ font.bold = None
835
+ elif style.bold is not None:
836
+ font.bold = style.bold
837
+
838
+ if "italic" in clear_set:
839
+ font.italic = None
840
+ elif style.italic is not None:
841
+ font.italic = style.italic
842
+
843
+ if "underline" in clear_set:
844
+ font.underline = None
845
+ elif style.underline is not None:
846
+ font.underline = style.underline
847
+
848
+ if "strike" in clear_set:
849
+ font.strike = None
850
+ elif style.strike is not None:
851
+ font.strike = style.strike
852
+
853
+ if "font_name" in clear_set:
854
+ font.name = None
855
+ elif style.font_name is not None:
856
+ font.name = style.font_name
857
+
858
+ if "font_size" in clear_set:
859
+ font.size = None
860
+ elif style.font_size is not None:
861
+ font.size = Pt(style.font_size)
862
+
863
+ if "font_color" in clear_set:
864
+ font.color.rgb = None
865
+ elif style.font_color is not None:
866
+ font.color.rgb = RGBColor.from_string(_normalize_hex_color(style.font_color))
867
+
868
+ if style.highlight is not None or "highlight" in clear_set:
869
+ skipped_fields.append("highlight")
870
+
871
+ return skipped_fields
872
+
873
+
874
+ def _apply_pptx_block_style(
875
+ paragraph, style: BlockStyle, clear_fields: tuple[str, ...]
876
+ ) -> list[str]:
877
+ clear_set = set(clear_fields)
878
+ skipped_fields: list[str] = []
879
+
880
+ if "alignment" in clear_set:
881
+ paragraph.alignment = None
882
+ elif style.alignment is not None:
883
+ paragraph.alignment = _pptx_alignment_value(style.alignment)
884
+
885
+ if "indent_level" in clear_set:
886
+ paragraph.level = 0
887
+ elif style.indent_level is not None:
888
+ paragraph.level = style.indent_level
889
+
890
+ if "spacing_before" in clear_set:
891
+ paragraph.space_before = None
892
+ elif style.spacing_before is not None:
893
+ paragraph.space_before = Pt(style.spacing_before)
894
+
895
+ if "spacing_after" in clear_set:
896
+ paragraph.space_after = None
897
+ elif style.spacing_after is not None:
898
+ paragraph.space_after = Pt(style.spacing_after)
899
+
900
+ if "line_spacing" in clear_set:
901
+ paragraph.line_spacing = None
902
+ elif style.line_spacing is not None:
903
+ paragraph.line_spacing = style.line_spacing
904
+
905
+ for field_name in ("left_indent", "right_indent", "fill_color", "number_format"):
906
+ if getattr(style, field_name) is not None or field_name in clear_set:
907
+ skipped_fields.append(field_name)
908
+
909
+ if style.wrap_text is not None or "wrap_text" in clear_set:
910
+ skipped_fields.append("wrap_text")
911
+
912
+ if style.vertical_alignment is not None or "vertical_alignment" in clear_set:
913
+ skipped_fields.append("vertical_alignment")
914
+
915
+ return skipped_fields
916
+
917
+
918
+ def _pptx_alignment_value(raw: str):
919
+ normalized = raw.strip().lower()
920
+ if normalized not in _PPTX_ALIGNMENT_MAP:
921
+ raise InvalidArgumentsError(f"Unsupported PPTX alignment: {raw}")
922
+ return _PPTX_ALIGNMENT_MAP[normalized]
923
+
924
+
925
+ def _normalize_hex_color(value: str) -> str:
926
+ normalized = value.strip().lstrip("#").upper()
927
+ if len(normalized) != 6 or any(
928
+ character not in "0123456789ABCDEF" for character in normalized
929
+ ):
930
+ raise InvalidArgumentsError(f"Invalid RGB hex color: {value}")
931
+ return normalized
932
+
933
+
934
+ def _parse_index(raw: str, locator: str, *, label: str) -> int:
935
+ try:
936
+ return int(raw)
937
+ except ValueError as exc:
938
+ raise InvalidArgumentsError(
939
+ f"Invalid PPTX {label} index in locator: {locator}"
940
+ ) from exc