offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,477 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import StrEnum
5
+ from pathlib import Path
6
+ from typing import Any, Literal
7
+
8
+ FileType = Literal["docx", "pptx", "xlsx"]
9
+ OperationType = Literal["replace_text", "append_text", "write_value"]
10
+ SearchMode = Literal["keyword", "semantic", "hybrid"]
11
+ MatchMode = Literal["keyword", "semantic", "hybrid"]
12
+
13
+
14
+ class Capability(StrEnum):
15
+ READ = "read"
16
+ UPDATE = "update"
17
+ DELETE = "delete"
18
+ ADD_CHILD = "add_child"
19
+ MOVE = "move"
20
+ COPY = "copy"
21
+ STYLE = "style"
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class DocumentRef:
26
+ document_id: str
27
+ path: Path
28
+ file_type: FileType
29
+ display_name: str
30
+ modified_time: float
31
+ content_hash: str | None = None
32
+ item_count: int | None = None
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class ItemRef:
37
+ document_id: str
38
+ item_id: str
39
+ item_type: str
40
+ locator: str
41
+ preview: str
42
+ metadata: dict[str, Any] = field(default_factory=dict)
43
+ content_text: str | None = None
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class SearchHit:
48
+ document_id: str
49
+ item_id: str
50
+ score: float
51
+ matched_text: str
52
+ locator: str
53
+ item_type: str
54
+ preview: str
55
+ document_path: Path | None = None
56
+ display_name: str | None = None
57
+ match_mode: MatchMode | None = None
58
+ scores: dict[str, float] | None = None
59
+ metadata: dict[str, Any] = field(default_factory=dict)
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class IndexedItem:
64
+ item_id: str
65
+ item_type: str
66
+ locator: str
67
+ preview: str
68
+ content_text: str
69
+ metadata: dict[str, Any] = field(default_factory=dict)
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class XlsxRowEmbeddingCell:
74
+ item_id: str
75
+ coordinate: str
76
+ display_text: str
77
+ preview: str
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class XlsxRowEmbedding:
82
+ sheet_name: str
83
+ row_number: int
84
+ text: str
85
+ preview: str
86
+ representative_item_id: str
87
+ contributing_cells: tuple[XlsxRowEmbeddingCell, ...]
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class InlineStyle:
92
+ bold: bool | None = None
93
+ italic: bool | None = None
94
+ underline: bool | None = None
95
+ strike: bool | None = None
96
+ font_name: str | None = None
97
+ font_size: float | None = None
98
+ font_color: str | None = None
99
+ highlight: str | None = None
100
+
101
+
102
+ @dataclass(frozen=True)
103
+ class VisibleTextRange:
104
+ start: int
105
+ end: int
106
+
107
+
108
+ @dataclass(frozen=True)
109
+ class InlineFragment:
110
+ text: str
111
+ style: InlineStyle = field(default_factory=InlineStyle)
112
+
113
+
114
+ @dataclass(frozen=True)
115
+ class TextContainerSnapshot:
116
+ locator: str
117
+ object_type: str
118
+ text: str
119
+ fragments: tuple[InlineFragment, ...]
120
+ metadata: dict[str, Any] = field(default_factory=dict)
121
+
122
+
123
+ @dataclass(frozen=True)
124
+ class BlockStyle:
125
+ alignment: str | None = None
126
+ indent_level: int | None = None
127
+ left_indent: float | None = None
128
+ right_indent: float | None = None
129
+ spacing_before: float | None = None
130
+ spacing_after: float | None = None
131
+ line_spacing: float | None = None
132
+ wrap_text: bool | None = None
133
+ vertical_alignment: str | None = None
134
+ fill_color: str | None = None
135
+ number_format: str | None = None
136
+
137
+
138
+ @dataclass(frozen=True)
139
+ class PatchOperation:
140
+ patch_id: str
141
+ document_id: str
142
+ item_id: str
143
+ operation_type: OperationType
144
+ payload: dict[str, Any] = field(default_factory=dict)
145
+ dry_run: bool = False
146
+ output_path: Path | None = None
147
+
148
+
149
+ @dataclass(frozen=True)
150
+ class StructureUnit:
151
+ position: int
152
+ unit_type: str
153
+ preview: str
154
+ metadata: dict[str, Any] = field(default_factory=dict)
155
+
156
+
157
+ @dataclass(frozen=True)
158
+ class DocumentStructure:
159
+ document: DocumentRef
160
+ units: tuple[StructureUnit, ...]
161
+
162
+
163
+ @dataclass(frozen=True)
164
+ class PresentationSlideSummary:
165
+ slide_number: int
166
+ preview: str
167
+ metadata: dict[str, Any] = field(default_factory=dict)
168
+
169
+
170
+ @dataclass(frozen=True)
171
+ class PresentationStructure:
172
+ document: DocumentRef
173
+ slides: tuple[PresentationSlideSummary, ...]
174
+
175
+
176
+ @dataclass(frozen=True)
177
+ class SlideTextBlock:
178
+ position: int
179
+ shape_id: int
180
+ shape_name: str | None
181
+ preview: str
182
+ text: str
183
+ metadata: dict[str, Any] = field(default_factory=dict)
184
+
185
+
186
+ @dataclass(frozen=True)
187
+ class SlideBundle:
188
+ document: DocumentRef
189
+ slide_number: int
190
+ preview: str
191
+ notes_text: str
192
+ metadata: dict[str, Any] = field(default_factory=dict)
193
+ text_blocks: tuple[SlideTextBlock, ...] = ()
194
+
195
+
196
+ @dataclass(frozen=True)
197
+ class SlideNotes:
198
+ document_id: str
199
+ slide_number: int
200
+ notes_text: str
201
+
202
+
203
+ @dataclass(frozen=True)
204
+ class DocxParagraph:
205
+ block_index: int
206
+ paragraph_index: int
207
+ text: str
208
+ style_name: str | None
209
+ is_heading: bool
210
+ preview: str
211
+ metadata: dict[str, Any] = field(default_factory=dict)
212
+
213
+
214
+ @dataclass(frozen=True)
215
+ class DocxTable:
216
+ block_index: int
217
+ table_index: int
218
+ rows: tuple[tuple[str, ...], ...]
219
+ preview: str
220
+ metadata: dict[str, Any] = field(default_factory=dict)
221
+
222
+
223
+ @dataclass(frozen=True)
224
+ class DocumentBlock:
225
+ block_index: int
226
+ block_type: str
227
+ preview: str
228
+ metadata: dict[str, Any] = field(default_factory=dict)
229
+
230
+
231
+ @dataclass(frozen=True)
232
+ class DocumentBlocks:
233
+ document: DocumentRef
234
+ blocks: tuple[DocumentBlock, ...]
235
+
236
+
237
+ @dataclass(frozen=True)
238
+ class ParagraphCollection:
239
+ document: DocumentRef
240
+ paragraphs: tuple[DocxParagraph, ...]
241
+
242
+
243
+ @dataclass(frozen=True)
244
+ class TableCollection:
245
+ document: DocumentRef
246
+ tables: tuple[DocxTable, ...]
247
+
248
+
249
+ @dataclass(frozen=True)
250
+ class BlockBundle:
251
+ document: DocumentRef
252
+ block: DocumentBlock
253
+ paragraph: DocxParagraph | None = None
254
+ table: DocxTable | None = None
255
+
256
+
257
+ @dataclass(frozen=True)
258
+ class WorksheetSummary:
259
+ position: int
260
+ sheet_name: str
261
+ preview: str
262
+ metadata: dict[str, Any] = field(default_factory=dict)
263
+
264
+
265
+ @dataclass(frozen=True)
266
+ class WorkbookStructure:
267
+ document: DocumentRef
268
+ sheets: tuple[WorksheetSummary, ...]
269
+
270
+
271
+ @dataclass(frozen=True)
272
+ class SheetCell:
273
+ coordinate: str
274
+ row: int
275
+ column: int
276
+ display_value: str
277
+ metadata: dict[str, Any] = field(default_factory=dict)
278
+
279
+
280
+ @dataclass(frozen=True)
281
+ class SheetSnapshot:
282
+ document: DocumentRef
283
+ sheet_name: str
284
+ cells: tuple[SheetCell, ...]
285
+ metadata: dict[str, Any] = field(default_factory=dict)
286
+
287
+
288
+ @dataclass(frozen=True)
289
+ class StructuredTarget:
290
+ target_type: str
291
+ identifier: str
292
+ preview: str
293
+ metadata: dict[str, Any] = field(default_factory=dict)
294
+
295
+
296
+ @dataclass(frozen=True)
297
+ class StructuredWriteResult:
298
+ document_path: Path
299
+ output_path: Path
300
+ target: StructuredTarget
301
+ summary: str
302
+
303
+
304
+ @dataclass(frozen=True)
305
+ class StructureSection:
306
+ locator: str
307
+ section_type: str
308
+ preview: str
309
+ metadata: dict[str, Any] = field(default_factory=dict)
310
+
311
+
312
+ @dataclass(frozen=True)
313
+ class StructureCollection:
314
+ document: DocumentRef
315
+ sections: tuple[StructureSection, ...]
316
+
317
+
318
+ @dataclass(frozen=True)
319
+ class DocxRun:
320
+ text: str
321
+ bold: bool | None
322
+ italic: bool | None
323
+ underline: bool | None
324
+ strike: bool | None
325
+ font_name: str | None
326
+ font_size: int | None
327
+ color_rgb: str | None
328
+
329
+
330
+ @dataclass(frozen=True)
331
+ class DocxTableCell:
332
+ locator: str
333
+ row_index: int
334
+ column_index: int
335
+ text: str
336
+ metadata: dict[str, Any] = field(default_factory=dict)
337
+
338
+
339
+ @dataclass(frozen=True)
340
+ class PptxTextBlockNode:
341
+ locator: str
342
+ position: int
343
+ shape_id: int
344
+ shape_name: str | None
345
+ preview: str
346
+ text: str
347
+ metadata: dict[str, Any] = field(default_factory=dict)
348
+
349
+
350
+ @dataclass(frozen=True)
351
+ class XlsxSectionCell:
352
+ locator: str
353
+ coordinate: str
354
+ row: int
355
+ column: int
356
+ display_value: str
357
+ formula: str | None
358
+ metadata: dict[str, Any] = field(default_factory=dict)
359
+
360
+
361
+ @dataclass(frozen=True)
362
+ class SectionPayload:
363
+ document: DocumentRef
364
+ locator: str
365
+ section_type: str
366
+ preview: str
367
+ metadata: dict[str, Any] = field(default_factory=dict)
368
+ block_type: str | None = None
369
+ text: str | None = None
370
+ style_name: str | None = None
371
+ is_heading: bool | None = None
372
+ runs: tuple[DocxRun, ...] = ()
373
+ rows: tuple[tuple[str, ...], ...] = ()
374
+ table_cells: tuple[DocxTableCell, ...] = ()
375
+ slide_number: int | None = None
376
+ notes_text: str | None = None
377
+ text_blocks: tuple[PptxTextBlockNode, ...] = ()
378
+ sheet_name: str | None = None
379
+ cells: tuple[XlsxSectionCell, ...] = ()
380
+
381
+
382
+ @dataclass(frozen=True)
383
+ class NodePayload:
384
+ document_id: str
385
+ node_id: str
386
+ item_type: str
387
+ text: str
388
+ metadata: dict[str, Any] = field(default_factory=dict)
389
+
390
+
391
+ @dataclass(frozen=True)
392
+ class NodeWriteResult:
393
+ document_path: Path
394
+ output_path: Path
395
+ document_id: str
396
+ node_id: str
397
+ new_text: str
398
+ previous_text: str
399
+
400
+
401
+ @dataclass(frozen=True)
402
+ class InsertContentResult:
403
+ document_path: Path
404
+ output_path: Path
405
+ document_id: str
406
+ new_node_id: str
407
+ preview: str
408
+
409
+
410
+ @dataclass(frozen=True)
411
+ class XlsxInsertRowsResult:
412
+ document_path: Path
413
+ output_path: Path
414
+ document_id: str
415
+ rows_inserted: int
416
+ first_row_locator: str
417
+
418
+
419
+ @dataclass(frozen=True)
420
+ class DocxTableEntry:
421
+ locator: str
422
+ table_index: int
423
+ rows: tuple[tuple[str, ...], ...]
424
+ preview: str
425
+ metadata: dict[str, Any] = field(default_factory=dict)
426
+
427
+
428
+ @dataclass(frozen=True)
429
+ class DocxTablesResult:
430
+ document: DocumentRef
431
+ tables: tuple[DocxTableEntry, ...]
432
+
433
+
434
+ @dataclass(frozen=True)
435
+ class ChildSummary:
436
+ locator: str
437
+ object_type: str
438
+ preview: str
439
+ capabilities: tuple[Capability, ...] = ()
440
+ metadata: dict[str, Any] = field(default_factory=dict)
441
+
442
+
443
+ @dataclass(frozen=True)
444
+ class ObjectPayload:
445
+ document: DocumentRef
446
+ locator: str
447
+ object_type: str
448
+ preview: str
449
+ properties: dict[str, Any] = field(default_factory=dict)
450
+ capabilities: tuple[Capability, ...] = ()
451
+ parent_locator: str | None = None
452
+ child_summary: tuple[ChildSummary, ...] = ()
453
+ metadata: dict[str, Any] = field(default_factory=dict)
454
+
455
+
456
+ @dataclass(frozen=True)
457
+ class MutationResult:
458
+ document_path: Path
459
+ output_path: Path | None
460
+ document_id: str
461
+ locator: str | None
462
+ object_type: str
463
+ summary: str
464
+ capabilities: tuple[Capability, ...] = ()
465
+ parent_locator: str | None = None
466
+ metadata: dict[str, Any] = field(default_factory=dict)
467
+
468
+
469
+ @dataclass(frozen=True)
470
+ class BatchResult:
471
+ document_path: Path
472
+ output_path: Path | None
473
+ document_id: str
474
+ summary: str
475
+ dry_run: bool = False
476
+ operations: tuple[MutationResult, ...] = ()
477
+ metadata: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import replace
4
+
5
+ from offagent.domain.models import InlineFragment, InlineStyle, VisibleTextRange
6
+ from offagent.errors import InvalidArgumentsError
7
+
8
+
9
+ def normalize_fragments(
10
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
11
+ ) -> tuple[InlineFragment, ...]:
12
+ merged: list[InlineFragment] = []
13
+ for fragment in fragments:
14
+ if not fragment.text:
15
+ continue
16
+ if merged and merged[-1].style == fragment.style:
17
+ previous = merged[-1]
18
+ merged[-1] = InlineFragment(
19
+ text=f"{previous.text}{fragment.text}", style=previous.style
20
+ )
21
+ continue
22
+ merged.append(fragment)
23
+ return tuple(merged)
24
+
25
+
26
+ def split_fragments_at_offsets(
27
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
28
+ offsets: list[int] | tuple[int, ...],
29
+ ) -> tuple[InlineFragment, ...]:
30
+ normalized_offsets = sorted({offset for offset in offsets if offset > 0})
31
+ if not normalized_offsets:
32
+ return normalize_fragments(fragments)
33
+
34
+ split: list[InlineFragment] = []
35
+ current_offset = 0
36
+ offset_index = 0
37
+ for fragment in normalize_fragments(fragments):
38
+ fragment_start = current_offset
39
+ fragment_end = current_offset + len(fragment.text)
40
+ start = 0
41
+ while (
42
+ offset_index < len(normalized_offsets)
43
+ and normalized_offsets[offset_index] < fragment_end
44
+ ):
45
+ split_at = normalized_offsets[offset_index]
46
+ if split_at > fragment_start:
47
+ relative = split_at - fragment_start
48
+ if relative > start:
49
+ split.append(
50
+ InlineFragment(
51
+ text=fragment.text[start:relative], style=fragment.style
52
+ )
53
+ )
54
+ start = relative
55
+ offset_index += 1
56
+ if start < len(fragment.text):
57
+ split.append(
58
+ InlineFragment(text=fragment.text[start:], style=fragment.style)
59
+ )
60
+ current_offset = fragment_end
61
+ return tuple(split)
62
+
63
+
64
+ def validate_visible_text_range(
65
+ text_range: VisibleTextRange, *, text_length: int
66
+ ) -> None:
67
+ if text_range.start < 0 or text_range.end < 0:
68
+ raise InvalidArgumentsError(
69
+ "Visible-text ranges must use non-negative offsets."
70
+ )
71
+ if text_range.end <= text_range.start:
72
+ raise InvalidArgumentsError("Visible-text ranges must have end > start.")
73
+ if text_range.end > text_length:
74
+ raise InvalidArgumentsError(
75
+ f"Visible-text range {text_range.start}:{text_range.end} exceeds container length {text_length}."
76
+ )
77
+
78
+
79
+ def apply_style_to_range(
80
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
81
+ text_range: VisibleTextRange,
82
+ *,
83
+ style: InlineStyle,
84
+ clear_fields: list[str] | tuple[str, ...],
85
+ ) -> tuple[InlineFragment, ...]:
86
+ normalized = normalize_fragments(fragments)
87
+ text_length = sum(len(fragment.text) for fragment in normalized)
88
+ validate_visible_text_range(text_range, text_length=text_length)
89
+ split = split_fragments_at_offsets(normalized, (text_range.start, text_range.end))
90
+
91
+ updated: list[InlineFragment] = []
92
+ cursor = 0
93
+ for fragment in split:
94
+ next_cursor = cursor + len(fragment.text)
95
+ if cursor >= text_range.start and next_cursor <= text_range.end:
96
+ updated.append(
97
+ InlineFragment(
98
+ text=fragment.text,
99
+ style=merge_inline_style(fragment.style, style, clear_fields),
100
+ )
101
+ )
102
+ else:
103
+ updated.append(fragment)
104
+ cursor = next_cursor
105
+ return normalize_fragments(updated)
106
+
107
+
108
+ def merge_inline_style(
109
+ base: InlineStyle,
110
+ patch: InlineStyle,
111
+ clear_fields: list[str] | tuple[str, ...],
112
+ ) -> InlineStyle:
113
+ values = base.__dict__.copy()
114
+ clear_set = set(clear_fields)
115
+ for field_name in clear_set:
116
+ values[field_name] = None
117
+ for field_name, value in patch.__dict__.items():
118
+ if field_name in clear_set:
119
+ continue
120
+ if value is not None:
121
+ values[field_name] = value
122
+ return InlineStyle(**values)
123
+
124
+
125
+ def style_is_empty(style: InlineStyle) -> bool:
126
+ return all(value is None for value in style.__dict__.values())
127
+
128
+
129
+ def fragment_text(
130
+ fragments: list[InlineFragment] | tuple[InlineFragment, ...],
131
+ ) -> str:
132
+ return "".join(fragment.text for fragment in fragments)
133
+
134
+
135
+ def clone_fragment(fragment: InlineFragment) -> InlineFragment:
136
+ return InlineFragment(text=fragment.text, style=replace(fragment.style))
offagent/errors.py ADDED
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class OffagentError(RuntimeError):
5
+ """Base runtime error for Office Agent failures."""
6
+
7
+
8
+ class InvalidArgumentsError(ValueError):
9
+ """Raised when user input is syntactically or semantically invalid."""
10
+
11
+
12
+ class TargetNotFoundError(LookupError):
13
+ """Raised when a document, item, or indexed target cannot be resolved."""
14
+
15
+
16
+ class TargetNotEditableError(OffagentError):
17
+ """Raised when a target exists but the requested edit is unsupported."""
18
+
19
+
20
+ class PolicyRefusedError(OffagentError):
21
+ """Raised when a configured path policy refuses an operation."""
22
+
23
+
24
+ class StaleLocatorError(TargetNotFoundError):
25
+ """Raised when a previously indexed target no longer resolves safely."""
26
+
27
+
28
+ class NoEmbeddingsError(TargetNotFoundError):
29
+ """Raised when semantic retrieval is requested without indexed embeddings."""
@@ -0,0 +1 @@
1
+ """Index storage primitives."""