offagent 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. offagent-0.10.0/LICENSE +21 -0
  2. offagent-0.10.0/PKG-INFO +546 -0
  3. offagent-0.10.0/README.md +526 -0
  4. offagent-0.10.0/pyproject.toml +40 -0
  5. offagent-0.10.0/setup.cfg +4 -0
  6. offagent-0.10.0/src/offagent/__init__.py +3 -0
  7. offagent-0.10.0/src/offagent/__main__.py +5 -0
  8. offagent-0.10.0/src/offagent/adapters/__init__.py +1 -0
  9. offagent-0.10.0/src/offagent/adapters/docx_adapter.py +1237 -0
  10. offagent-0.10.0/src/offagent/adapters/embedding_provider.py +132 -0
  11. offagent-0.10.0/src/offagent/adapters/pptx_adapter.py +940 -0
  12. offagent-0.10.0/src/offagent/adapters/xlsx_adapter.py +1266 -0
  13. offagent-0.10.0/src/offagent/app/__init__.py +1 -0
  14. offagent-0.10.0/src/offagent/app/progress.py +52 -0
  15. offagent-0.10.0/src/offagent/app/services.py +4267 -0
  16. offagent-0.10.0/src/offagent/config.py +287 -0
  17. offagent-0.10.0/src/offagent/domain/__init__.py +1 -0
  18. offagent-0.10.0/src/offagent/domain/locators.py +444 -0
  19. offagent-0.10.0/src/offagent/domain/models.py +477 -0
  20. offagent-0.10.0/src/offagent/domain/text_fragments.py +136 -0
  21. offagent-0.10.0/src/offagent/errors.py +29 -0
  22. offagent-0.10.0/src/offagent/indexing/__init__.py +1 -0
  23. offagent-0.10.0/src/offagent/indexing/store.py +795 -0
  24. offagent-0.10.0/src/offagent/interfaces/__init__.py +1 -0
  25. offagent-0.10.0/src/offagent/interfaces/cli.py +438 -0
  26. offagent-0.10.0/src/offagent/interfaces/cli_output.py +139 -0
  27. offagent-0.10.0/src/offagent/interfaces/cli_progress.py +120 -0
  28. offagent-0.10.0/src/offagent/interfaces/mcp.py +1145 -0
  29. offagent-0.10.0/src/offagent/interfaces/mcp_converters.py +80 -0
  30. offagent-0.10.0/src/offagent/interfaces/mcp_models.py +923 -0
  31. offagent-0.10.0/src/offagent/objects/__init__.py +3 -0
  32. offagent-0.10.0/src/offagent/objects/base.py +26 -0
  33. offagent-0.10.0/src/offagent/objects/docx_objects.py +951 -0
  34. offagent-0.10.0/src/offagent/objects/pptx_objects.py +895 -0
  35. offagent-0.10.0/src/offagent/objects/xlsx_objects.py +962 -0
  36. offagent-0.10.0/src/offagent/path_policy.py +42 -0
  37. offagent-0.10.0/src/offagent/storage/__init__.py +1 -0
  38. offagent-0.10.0/src/offagent/storage/versioning.py +31 -0
  39. offagent-0.10.0/src/offagent.egg-info/PKG-INFO +546 -0
  40. offagent-0.10.0/src/offagent.egg-info/SOURCES.txt +79 -0
  41. offagent-0.10.0/src/offagent.egg-info/dependency_links.txt +1 -0
  42. offagent-0.10.0/src/offagent.egg-info/entry_points.txt +2 -0
  43. offagent-0.10.0/src/offagent.egg-info/requires.txt +9 -0
  44. offagent-0.10.0/src/offagent.egg-info/top_level.txt +1 -0
  45. offagent-0.10.0/tests/test_config.py +94 -0
  46. offagent-0.10.0/tests/test_discovery.py +25 -0
  47. offagent-0.10.0/tests/test_doctor.py +60 -0
  48. offagent-0.10.0/tests/test_document_authoring_services.py +294 -0
  49. offagent-0.10.0/tests/test_docx_adapter.py +37 -0
  50. offagent-0.10.0/tests/test_docx_cli.py +105 -0
  51. offagent-0.10.0/tests/test_docx_objects.py +204 -0
  52. offagent-0.10.0/tests/test_docx_services.py +83 -0
  53. offagent-0.10.0/tests/test_embedding_provider.py +17 -0
  54. offagent-0.10.0/tests/test_interface_parity.py +147 -0
  55. offagent-0.10.0/tests/test_locators.py +125 -0
  56. offagent-0.10.0/tests/test_mcp.py +412 -0
  57. offagent-0.10.0/tests/test_mcp_acceptance.py +389 -0
  58. offagent-0.10.0/tests/test_mcp_v2.py +480 -0
  59. offagent-0.10.0/tests/test_object_models.py +82 -0
  60. offagent-0.10.0/tests/test_object_services.py +57 -0
  61. offagent-0.10.0/tests/test_object_services_write.py +255 -0
  62. offagent-0.10.0/tests/test_partial_formatting_services.py +271 -0
  63. offagent-0.10.0/tests/test_policy.py +71 -0
  64. offagent-0.10.0/tests/test_pptx_adapter.py +36 -0
  65. offagent-0.10.0/tests/test_pptx_cli.py +101 -0
  66. offagent-0.10.0/tests/test_pptx_objects.py +171 -0
  67. offagent-0.10.0/tests/test_pptx_services.py +71 -0
  68. offagent-0.10.0/tests/test_progress_reporting.py +141 -0
  69. offagent-0.10.0/tests/test_semantic_adapters.py +111 -0
  70. offagent-0.10.0/tests/test_semantic_mcp.py +269 -0
  71. offagent-0.10.0/tests/test_semantic_services.py +142 -0
  72. offagent-0.10.0/tests/test_store.py +136 -0
  73. offagent-0.10.0/tests/test_summaries.py +21 -0
  74. offagent-0.10.0/tests/test_text_fragments.py +61 -0
  75. offagent-0.10.0/tests/test_vector_mcp.py +104 -0
  76. offagent-0.10.0/tests/test_vector_services.py +156 -0
  77. offagent-0.10.0/tests/test_versioning.py +19 -0
  78. offagent-0.10.0/tests/test_xlsx_adapter.py +86 -0
  79. offagent-0.10.0/tests/test_xlsx_cli.py +133 -0
  80. offagent-0.10.0/tests/test_xlsx_objects.py +185 -0
  81. offagent-0.10.0/tests/test_xlsx_services.py +106 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Iwan van der Kleijn
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,546 @@
1
+ Metadata-Version: 2.4
2
+ Name: offagent
3
+ Version: 0.10.0
4
+ Summary: Local-first Office document tooling with a shared CLI and MCP-ready core
5
+ Author: Iwan van der Kleijn
6
+ License: MIT
7
+ Requires-Python: >=3.13
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: fastembed<1,>=0.3
11
+ Requires-Dist: mcp<2,>=1
12
+ Requires-Dist: python-docx<2,>=1.1
13
+ Requires-Dist: python-pptx<2,>=1.0
14
+ Requires-Dist: openpyxl<4,>=3.1
15
+ Requires-Dist: pydantic<3,>=2.0
16
+ Requires-Dist: python-dotenv<2,>=1.0
17
+ Requires-Dist: rich<14,>=13
18
+ Requires-Dist: typer<1,>=0.12
19
+ Dynamic: license-file
20
+
21
+ # Office-Agent
22
+ [![Python](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/)
23
+ [![MCP](https://img.shields.io/badge/MCP-Server-orange.svg)](https://gofastmcp.com/getting-started/welcome)
24
+ [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
25
+ [![FOSS Pluralism](https://img.shields.io/badge/FOSS-Pluralism-purple.svg)](FOSS_PLURALISM_MANIFESTO.md)
26
+ [![OpenSpec](https://img.shields.io/badge/OpenSpec-Used-black.svg)](https://openspec.dev/)
27
+
28
+ Office-Agent (`offagent`) is a local-first Office document tool with a shared application core and a CLI entrypoint, `office-agent`.
29
+
30
+ Detailed technical implementation notes are available in [technical-implementation.md](./technical-implementation.md).
31
+
32
+ ![Office Agent](./images/office-agent-small.png)
33
+
34
+ Office-Agent supports local-first workflows for `.docx`, `.pptx`, and `.xlsx` documents through both a CLI and an MCP server. The app indexes document structure into a local SQLite + FTS5 store, can optionally persist embeddings for semantic and hybrid retrieval, and routes all reads and writes through one shared service layer with path-policy enforcement, stale-locator checks, versioned outputs, and automatic reindexing.
35
+
36
+ ## Current Features
37
+
38
+ ### Core App
39
+
40
+ - TOML configuration with environment variable overrides
41
+ - `office-agent doctor` runtime checks for imports, SQLite, FTS5, index writability, configured roots, and vector-search readiness
42
+ - document discovery and indexing for `.docx`, `.pptx`, and `.xlsx`
43
+ - allowed-root guards for indexed reads and output-root guards for writes
44
+ - human-readable, JSON, and quiet CLI output modes
45
+ - default versioned write outputs, with optional in-place overwrite when explicitly enabled
46
+ - keyword, semantic, and hybrid search modes over indexed content
47
+
48
+ ### Structured Document Model
49
+
50
+ - typed locators for DOCX, PPTX, and XLSX objects, with legacy locator compatibility where needed
51
+ - object traversal with `get_object` and `list_children`
52
+ - per-object capability reporting for `read`, `update`, `delete`, `add_child`, `move`, `copy`, and `style`
53
+ - generic object lifecycle operations: `create_object`, `update_object`, `move_object`, `copy_object`, `batch_edit`, and `delete_object`
54
+ - stale-locator detection across object and node workflows
55
+
56
+ ### DOCX Support
57
+
58
+ - paragraph, run, section, table, table row, table cell, image, and page-break object resolution
59
+ - paragraph-level indexing and search with stable `para:<n>` compatibility
60
+ - node reads and writes for paragraphs and table cells
61
+ - paragraph insertion through the existing content-insert workflow
62
+ - DOCX escape hatches for paragraph style changes, page-break insertion, table creation, and table-cell merging
63
+
64
+ ### PPTX Support
65
+
66
+ - presentation, slide, notes, shape, text shape, image shape, table, table row, table cell, and group-shape object resolution
67
+ - text-frame indexing and search with `slide:<n>:shape:<id>` compatibility
68
+ - node reads and writes for editable text-bearing targets
69
+ - slide move and copy operations through the generic object layer
70
+ - PPTX escape hatches for adding slides, duplicating slides, changing slide layouts, and inserting text shapes
71
+
72
+ ### XLSX Support
73
+
74
+ - workbook, worksheet, row, column, cell, range, table, merged range, formula cell, and named-range object resolution
75
+ - cell-level indexing and search with `sheet:<name>!<coordinate>` compatibility
76
+ - direct cell writes plus guarded append semantics for string-compatible cells
77
+ - generic object deletion for worksheets, rows, columns, cells, and ranges where supported
78
+ - XLSX escape hatches for range writes, row insertion, column insertion, formula writes, and cell merging
79
+
80
+ ### MCP Server
81
+
82
+ - stdio MCP server implemented with FastMCP
83
+ - document indexing and refresh tools
84
+ - canonical V2 search via `search_objects`, with deprecated `search_documents` alias compatibility
85
+ - structure and section tools, node read/write tools, object traversal tools, generic object mutation tools, and format-specific escape hatches
86
+ - structured MCP responses backed by the same application services used by the CLI
87
+
88
+ ### Vector Search
89
+
90
+ - optional embedding generation during indexing and reindexing
91
+ - SQLite-backed embedding side tables keyed to indexed document items
92
+ - semantic retrieval over stored vectors
93
+ - hybrid retrieval that combines keyword and semantic scores
94
+ - `match_mode` and per-source score metadata in JSON and MCP search results
95
+
96
+ ## Installation
97
+
98
+ The project uses `uv` in development.
99
+
100
+ ```bash
101
+ uv sync
102
+ ```
103
+
104
+ Run the CLI with:
105
+
106
+ ```bash
107
+ uv run office-agent --help
108
+ ```
109
+
110
+ ## Configuration
111
+
112
+ The CLI accepts an optional config file via `--config <path>`.
113
+
114
+ Example `office-agent.toml`:
115
+
116
+ ```toml
117
+ [offagent]
118
+ index_path = ".offagent/index.sqlite3"
119
+ document_roots = ["./docs"]
120
+ allowed_roots = ["./docs", "./edited"]
121
+ output_directory = "./edited"
122
+ output_roots = ["./edited", "./docs"]
123
+ allow_inplace_overwrite = true
124
+ embedding_model = "BAAI/bge-small-en-v1.5"
125
+ embedding_dimensions = 384
126
+ vector_search_top_k = 20
127
+ hybrid_keyword_weight = 0.4
128
+ hybrid_semantic_weight = 0.6
129
+ ```
130
+
131
+ Environment variables override file settings:
132
+
133
+ - `OFFAGENT_CONFIG`
134
+ - `OFFAGENT_INDEX_PATH`
135
+ - `OFFAGENT_DOCUMENT_ROOTS`
136
+ - `OFFAGENT_ALLOWED_ROOTS`
137
+ - `OFFAGENT_OUTPUT_DIRECTORY`
138
+ - `OFFAGENT_OUTPUT_ROOTS`
139
+ - `OFFAGENT_ALLOW_INPLACE_OVERWRITE`
140
+ - `OFFAGENT_EMBEDDING_MODEL`
141
+ - `OFFAGENT_EMBEDDING_DIMENSIONS`
142
+ - `OFFAGENT_VECTOR_SEARCH_TOP_K`
143
+ - `OFFAGENT_HYBRID_KEYWORD_WEIGHT`
144
+ - `OFFAGENT_HYBRID_SEMANTIC_WEIGHT`
145
+
146
+ `OFFAGENT_DOCUMENT_ROOTS` uses the platform path separator, for example `:` on macOS/Linux.
147
+
148
+ Configuration fields:
149
+
150
+ - `index_path`: SQLite index file location
151
+ - `document_roots`: directories scanned by discovery and directory-based indexing
152
+ - `allowed_roots`: canonical roots allowed for indexed read operations; empty means no read guard
153
+ - `output_directory`: optional directory for versioned write outputs; defaults to the source document directory
154
+ - `output_roots`: canonical roots allowed for write outputs; if omitted and `output_directory` is set, this defaults to that directory
155
+ - `allow_inplace_overwrite`: when `true`, write commands may use `--output-mode inplace`; default is `true`
156
+ - `embedding_model`: embedding model name used when generating or querying vectors
157
+ - `embedding_dimensions`: expected vector dimensionality for the configured model
158
+ - `vector_search_top_k`: candidate pool size used by semantic and hybrid retrieval
159
+ - `hybrid_keyword_weight`: keyword contribution to hybrid ranking
160
+ - `hybrid_semantic_weight`: semantic contribution to hybrid ranking
161
+
162
+ ## CLI
163
+
164
+ All commands accept `--config <path>` after the command name. Example:
165
+
166
+ ```bash
167
+ uv run office-agent search "supplier shall" --config office-agent.toml
168
+ ```
169
+
170
+ Structured commands also accept:
171
+
172
+ - `--json`: emit machine-readable JSON with no extra prose
173
+ - `--quiet`: suppress successful stdout output
174
+
175
+ `--json` and `--quiet` are mutually exclusive.
176
+
177
+ Write commands also accept `--output-mode <mode>` where `<mode>` is:
178
+
179
+ - `versioned`: default; writes `<name>.edited.<timestamp>.<ext>` and reindexes the new file
180
+ - `inplace`: overwrites the source file, but only if `allow_inplace_overwrite = true`
181
+
182
+ Common exit codes:
183
+
184
+ - `0`: success
185
+ - `1`: other runtime failure
186
+ - `2`: invalid arguments or incompatible flag combinations
187
+ - `3`: not-found target, missing indexed item/document, or stale-locator write failure
188
+ - `4`: not-editable target
189
+ - `5`: policy-refused operation
190
+
191
+ ### Doctor
192
+
193
+ Checks runtime health, including vector-search readiness.
194
+
195
+ Options:
196
+
197
+ - `--config <path>`: optional config file path
198
+
199
+ ```bash
200
+ uv run office-agent doctor
201
+ uv run office-agent doctor --config office-agent.toml
202
+ ```
203
+
204
+ ### Index And Reindex
205
+
206
+ `index` indexes one file or all supported Office files under a directory. `reindex` currently re-runs the same indexing flow. Add `--with-embeddings` to generate and persist embeddings during the run.
207
+
208
+ Arguments:
209
+
210
+ - `path`: file or directory to index
211
+
212
+ Options:
213
+
214
+ - `--with-embeddings`: generate embeddings for indexed items during this run
215
+ - `--config <path>`: optional config file path
216
+
217
+ ```bash
218
+ uv run office-agent index ./docs
219
+ uv run office-agent index ./docs --with-embeddings
220
+ uv run office-agent index ./docs/sample.docx
221
+ uv run office-agent index ./docs/sample.pptx
222
+ uv run office-agent index ./docs/sample.xlsx
223
+ uv run office-agent reindex ./docs/sample.docx
224
+ uv run office-agent reindex ./docs/sample.docx --with-embeddings
225
+ uv run office-agent reindex ./docs/sample.pptx
226
+ uv run office-agent reindex ./docs/sample.xlsx
227
+ ```
228
+
229
+ ### Search
230
+
231
+ Searches indexed content through SQLite FTS5, stored embeddings, or both.
232
+
233
+ Arguments:
234
+
235
+ - `query`: search text
236
+
237
+ Options:
238
+
239
+ - `--type <docx|pptx|xlsx>`: restrict search to one document type
240
+ - `--doc <path>`: restrict search to one indexed document path
241
+ - `--mode <keyword|semantic|hybrid>`: retrieval mode; default `keyword`
242
+ - `--config <path>`: optional config file path
243
+
244
+ ```bash
245
+ uv run office-agent search "supplier shall"
246
+ uv run office-agent search "supplier shall" --mode semantic
247
+ uv run office-agent search "supplier shall" --mode hybrid --json
248
+ uv run office-agent search "supplier shall" --type docx
249
+ uv run office-agent search "supplier shall" --type pptx
250
+ uv run office-agent search "supplier shall" --type xlsx
251
+ uv run office-agent search "supplier shall" --type docx --doc ./docs/sample.docx
252
+ uv run office-agent search "supplier shall" --type pptx --doc ./docs/sample.pptx
253
+ uv run office-agent search "supplier shall" --type xlsx --doc ./docs/sample.xlsx
254
+ ```
255
+
256
+ Notes:
257
+
258
+ - `keyword` behaves like the original FTS-only search path
259
+ - `semantic` requires prior indexing with `--with-embeddings`
260
+ - `hybrid` combines keyword and semantic candidates and returns merged ranking metadata
261
+ - JSON output includes `match_mode` and `scores`; semantic and hybrid human-readable output also displays that metadata
262
+
263
+ ### List
264
+
265
+ Lists indexed documents with document id, path, type, modified time, and indexed item count.
266
+
267
+ ```bash
268
+ uv run office-agent list
269
+ uv run office-agent list --json
270
+ ```
271
+
272
+ ### Show
273
+
274
+ Shows one indexed document summary, or one indexed item within that document.
275
+
276
+ ```bash
277
+ uv run office-agent show --doc ./docs/sample.docx
278
+ uv run office-agent show --doc ./docs/sample.docx --item para:3
279
+ uv run office-agent show --doc ./docs/sample.xlsx --item sheet:Budget2026!B12 --json
280
+ ```
281
+
282
+ ### Locate
283
+
284
+ Resolves a direct item reference for one document.
285
+
286
+ Options:
287
+
288
+ - `--doc <path>`: required document path
289
+ - `--paragraph <n>`: DOCX-only paragraph lookup
290
+ - `--slide <n>`: PPTX-only slide lookup
291
+ - `--shape <id>`: optional PPTX shape narrowing; only valid with `--slide`
292
+ - `--sheet <name>`: XLSX-only worksheet lookup
293
+ - `--cell <coordinate>`: XLSX-only cell coordinate; only valid with `--sheet`
294
+ - `--config <path>`: optional config file path
295
+
296
+ Valid option combinations:
297
+
298
+ - DOCX: `--doc` + `--paragraph`
299
+ - PPTX: `--doc` + `--slide` with optional `--shape`
300
+ - XLSX: `--doc` + `--sheet` + `--cell`
301
+
302
+ ```bash
303
+ uv run office-agent locate --doc ./docs/sample.docx --paragraph 3
304
+ uv run office-agent locate --doc ./docs/sample.pptx --slide 1
305
+ uv run office-agent locate --doc ./docs/sample.pptx --slide 1 --shape 7
306
+ uv run office-agent locate --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12
307
+ ```
308
+
309
+ ### Read
310
+
311
+ Reads the current source content for one indexed item.
312
+
313
+ Options:
314
+
315
+ - `--doc <path>`: required document path
316
+ - `--item <item-id>`: required item id
317
+ - `--config <path>`: optional config file path
318
+
319
+ ```bash
320
+ uv run office-agent read --doc ./docs/sample.docx --item para:3
321
+ uv run office-agent read --doc ./docs/sample.pptx --item slide:1:shape:7
322
+ uv run office-agent read --doc ./docs/sample.xlsx --item sheet:Budget2026!B12
323
+ ```
324
+
325
+ ### Replace
326
+
327
+ Replaces the full text of one DOCX paragraph or PPTX text shape.
328
+
329
+ Options:
330
+
331
+ - `--doc <path>`: required document path
332
+ - `--item <item-id>`: required item id
333
+ - `--text <value>`: replacement text
334
+ - `--output-mode <versioned|inplace>`: write mode; default `versioned`
335
+ - `--config <path>`: optional config file path
336
+
337
+ Notes:
338
+
339
+ - `replace` is supported for DOCX and PPTX only
340
+ - XLSX uses `write-cell` instead of `replace`
341
+
342
+ ```bash
343
+ uv run office-agent replace --doc ./docs/sample.docx --item para:3 --text "Updated paragraph text."
344
+ uv run office-agent replace --doc ./docs/sample.pptx --item slide:1:shape:7 --text "Updated slide text."
345
+ uv run office-agent replace --doc ./docs/sample.docx --item para:3 --text "Updated paragraph text." --output-mode versioned
346
+ ```
347
+
348
+ ### Append
349
+
350
+ Appends text to one supported target.
351
+
352
+ Options:
353
+
354
+ - `--doc <path>`: required document path
355
+ - `--item <item-id>`: required item id
356
+ - `--text <value>`: appended text
357
+ - `--output-mode <versioned|inplace>`: write mode; default `versioned`
358
+ - `--config <path>`: optional config file path
359
+
360
+ Notes:
361
+
362
+ - DOCX appends to the target paragraph
363
+ - PPTX appends to the target text frame
364
+ - XLSX appends only to empty or string-compatible cells
365
+ - stale-locator failures exit with code `3`
366
+
367
+ ```bash
368
+ uv run office-agent append --doc ./docs/sample.docx --item para:3 --text " Additional text."
369
+ uv run office-agent append --doc ./docs/sample.pptx --item slide:1:shape:7 --text "\nAdditional slide text."
370
+ uv run office-agent append --doc ./docs/sample.xlsx --item sheet:Notes!A1 --text " Additional note."
371
+ ```
372
+
373
+ ### Write Cell
374
+
375
+ Writes one XLSX cell value directly.
376
+
377
+ Options:
378
+
379
+ - `--doc <path>`: required `.xlsx` document path
380
+ - `--sheet <name>`: required worksheet name
381
+ - `--cell <coordinate>`: required cell coordinate
382
+ - `--value <value>`: new cell value
383
+ - `--output-mode <versioned|inplace>`: write mode; default `versioned`
384
+ - `--config <path>`: optional config file path
385
+
386
+ ```bash
387
+ uv run office-agent write-cell --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12 --value "125000"
388
+ uv run office-agent write-cell --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12 --value "125000" --output-mode inplace
389
+ ```
390
+
391
+ ### MCP
392
+
393
+ Starts the FastMCP server on stdio.
394
+
395
+ Options:
396
+
397
+ - `--config <path>`: optional config file path
398
+
399
+ ```bash
400
+ uv run office-agent mcp
401
+ uv run office-agent mcp --config office-agent.toml
402
+ ```
403
+
404
+ The MCP server exposes these tools:
405
+
406
+ - `index_documents(paths)`
407
+ - `refresh_document(document_id)`
408
+ - `list_documents()`
409
+ - `create_document(format, output_path, initial_sheet_name=None, output_mode="versioned")`
410
+ - `add_content_block(document_id, block_type, properties, output_mode="versioned")`
411
+ - `search_objects(query, file_type=None, document_id=None, mode="keyword", limit=20)`
412
+ - `search_documents(...)` as a deprecated alias for pre-V2 consumers
413
+ - `get_structure(document_id)`
414
+ - `get_section(document_id, section_id, cell_range=None)`
415
+ - `get_node(document_id, node_id)`
416
+ - `write_node(document_id, node_id, content, output_mode="versioned")`
417
+ - `insert_content(document_id, content, style_name=None, after_node_id=None, output_mode="versioned")`
418
+ - `style_inline(document_id, locator, style, range=None, clear_fields=None, output_mode="versioned")`
419
+ - `style_block(document_id, locator, style, clear_fields=None, output_mode="versioned")`
420
+ - `set_structural_role(document_id, locator, role, level=None, output_mode="versioned")`
421
+ - `docx_get_tables(document_id)`
422
+ - `get_object(document_id, locator)`
423
+ - `list_children(document_id, locator, child_type=None, limit=None)`
424
+ - `create_object(document_id, parent_locator, object_type, properties, segments=None, range=None, position=None, output_mode="versioned")`
425
+ - `update_object(document_id, locator, properties, segments=None, range=None, output_mode="versioned")`
426
+ - `move_object(document_id, locator, new_parent_locator, position=None, output_mode="versioned")`
427
+ - `copy_object(document_id, locator, target_parent_locator, position=None, output_mode="versioned")`
428
+ - `batch_edit(document_id, operations, output_mode="versioned", dry_run=False)`
429
+ - `delete_object(document_id, locator, output_mode="versioned")`
430
+ - `docx_set_paragraph_style(document_id, locator, style_name, output_mode="versioned")`
431
+ - `docx_insert_page_break(document_id, locator, output_mode="versioned")`
432
+ - `docx_add_table(document_id, row_count, column_count, position=None, column_widths=None, style_name=None, output_mode="versioned")`
433
+ - `docx_merge_table_cells(document_id, start_locator, end_locator, output_mode="versioned")`
434
+ - `pptx_add_slide(document_id, layout_index=None, layout_name=None, output_mode="versioned")`
435
+ - `pptx_duplicate_slide(document_id, locator, position=None, output_mode="versioned")`
436
+ - `pptx_set_slide_layout(document_id, locator, layout_index=None, layout_name=None, output_mode="versioned")`
437
+ - `pptx_add_text_shape(document_id, locator, text, left, top, width, height, output_mode="versioned")`
438
+ - `xlsx_write_range(document_id, locator, values, output_mode="versioned")`
439
+ - `xlsx_insert_rows(...)` for both append-rows compatibility mode and worksheet row insertion mode
440
+ - `xlsx_insert_columns(document_id, locator, column_index, count, output_mode="versioned")`
441
+ - `xlsx_set_formula(document_id, locator, formula, output_mode="versioned")`
442
+ - `xlsx_merge_cells(document_id, locator, output_mode="versioned")`
443
+
444
+ Notes:
445
+
446
+ - `search_objects` is the canonical MCP search entrypoint; `search_documents` remains for compatibility.
447
+ - `create_object` and `update_object` support additive `segments` inputs for partial-formatting workflows.
448
+ - `style_inline` supports visible-text `range` inputs for partial inline styling across supported DOCX, PPTX, and XLSX targets.
449
+
450
+ ## Example query session
451
+
452
+ ![QUery session](./images/mcp-query.png)
453
+
454
+ ## Item Id Reference
455
+
456
+ Use these item-id formats with `read`, `replace`, and `append`:
457
+
458
+ - DOCX: `para:<n>`
459
+ - PPTX: `slide:<slide-number>:shape:<shape-id>`
460
+ - XLSX: `sheet:<worksheet-name>!<cell-coordinate>`
461
+
462
+ ## Write Behavior
463
+
464
+ By default, all write commands create a versioned output file:
465
+
466
+ - `sample.docx` -> `sample.edited.20260318-214512345678.docx`
467
+ - `deck.pptx` -> `deck.edited.20260318-214512345678.pptx`
468
+ - `budget.xlsx` -> `budget.edited.20260318-214512345678.xlsx`
469
+
470
+ After a successful write:
471
+
472
+ - the output file is automatically reindexed
473
+ - CLI output reports the written output path
474
+ - search results can resolve against the new version immediately
475
+ - managed output roots are treated as readable workflow roots so versioned-write follow-up commands continue to work
476
+
477
+ If the source file changed after indexing:
478
+
479
+ - the service compares the current file hash to the indexed `content_hash`
480
+ - it tries to re-resolve the same item id against the current file
481
+ - if that fails, the command exits with stale-locator error code `3`
482
+
483
+ ## Development
484
+
485
+ Run the test suite with:
486
+
487
+ ```bash
488
+ uv run pytest
489
+ ```
490
+
491
+ This repository also includes an example skill for Office-document workflows at [`.github/skills/office-agent`](./.github/skills/office-agent/). The location is VSCode specific but it should work for Claude Code and Codex as well.
492
+
493
+ Acceptance coverage uses checked-in golden Office fixtures under [`tests/fixtures`](./tests/fixtures/), including DOCX, PPTX, and XLSX documents exercised by the CLI and FastMCP integration suites.
494
+
495
+ The test suite is written in `pytest` and includes:
496
+
497
+ - configuration and diagnostics coverage
498
+ - path-policy coverage
499
+ - SQLite store coverage
500
+ - DOCX adapter tests
501
+ - DOCX service workflow tests
502
+ - PPTX adapter tests
503
+ - PPTX service workflow tests
504
+ - XLSX adapter tests
505
+ - XLSX service workflow tests
506
+ - CLI round-trip and acceptance tests for DOCX, PPTX, and XLSX
507
+ - FastMCP stdio integration and acceptance tests for the MCP tool surface
508
+ - CLI/MCP parity tests for the core index → search → locate → read → replace workflow
509
+
510
+ ## Project Status
511
+
512
+ Implemented:
513
+
514
+ - shared CLI/application-core structure
515
+ - configuration and doctor checks
516
+ - allowed-root and output-root policy enforcement
517
+ - SQLite index bootstrap and FTS5-backed search
518
+ - embedding-backed semantic and hybrid retrieval with mode-aware CLI/MCP output
519
+ - DOCX paragraph extraction and editing workflow
520
+ - PPTX text-shape extraction and editing workflow
521
+ - XLSX cell extraction and editing workflow
522
+ - indexed document summary commands (`list` and `show`)
523
+ - JSON and quiet CLI output modes
524
+ - versioned write outputs with automatic reindex
525
+ - stale-locator protection for write commands
526
+ - MCP interface over stdio with FastMCP
527
+
528
+ Not implemented yet:
529
+
530
+ - SSE or streamable HTTP MCP transports
531
+ - MCP resources and prompts
532
+ - richer locator resolution beyond the current direct DOCX, PPTX, and XLSX flows
533
+
534
+
535
+ ## Principles of Participation
536
+
537
+ Everyone is invited and welcome to contribute: open issues, propose pull requests, share ideas, or help improve documentation. Participation is open to all, regardless of background or viewpoint.
538
+
539
+ This project follows the [FOSS Pluralism Manifesto](./FOSS_PLURALISM_MANIFESTO.md),
540
+ which affirms respect for people, freedom to critique ideas, and space for diverse perspectives.
541
+
542
+ ## License and Copyright
543
+
544
+ Copyright (c) 2026, Iwan van der Kleijn
545
+
546
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.