offagent 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- offagent-0.10.0/LICENSE +21 -0
- offagent-0.10.0/PKG-INFO +546 -0
- offagent-0.10.0/README.md +526 -0
- offagent-0.10.0/pyproject.toml +40 -0
- offagent-0.10.0/setup.cfg +4 -0
- offagent-0.10.0/src/offagent/__init__.py +3 -0
- offagent-0.10.0/src/offagent/__main__.py +5 -0
- offagent-0.10.0/src/offagent/adapters/__init__.py +1 -0
- offagent-0.10.0/src/offagent/adapters/docx_adapter.py +1237 -0
- offagent-0.10.0/src/offagent/adapters/embedding_provider.py +132 -0
- offagent-0.10.0/src/offagent/adapters/pptx_adapter.py +940 -0
- offagent-0.10.0/src/offagent/adapters/xlsx_adapter.py +1266 -0
- offagent-0.10.0/src/offagent/app/__init__.py +1 -0
- offagent-0.10.0/src/offagent/app/progress.py +52 -0
- offagent-0.10.0/src/offagent/app/services.py +4267 -0
- offagent-0.10.0/src/offagent/config.py +287 -0
- offagent-0.10.0/src/offagent/domain/__init__.py +1 -0
- offagent-0.10.0/src/offagent/domain/locators.py +444 -0
- offagent-0.10.0/src/offagent/domain/models.py +477 -0
- offagent-0.10.0/src/offagent/domain/text_fragments.py +136 -0
- offagent-0.10.0/src/offagent/errors.py +29 -0
- offagent-0.10.0/src/offagent/indexing/__init__.py +1 -0
- offagent-0.10.0/src/offagent/indexing/store.py +795 -0
- offagent-0.10.0/src/offagent/interfaces/__init__.py +1 -0
- offagent-0.10.0/src/offagent/interfaces/cli.py +438 -0
- offagent-0.10.0/src/offagent/interfaces/cli_output.py +139 -0
- offagent-0.10.0/src/offagent/interfaces/cli_progress.py +120 -0
- offagent-0.10.0/src/offagent/interfaces/mcp.py +1145 -0
- offagent-0.10.0/src/offagent/interfaces/mcp_converters.py +80 -0
- offagent-0.10.0/src/offagent/interfaces/mcp_models.py +923 -0
- offagent-0.10.0/src/offagent/objects/__init__.py +3 -0
- offagent-0.10.0/src/offagent/objects/base.py +26 -0
- offagent-0.10.0/src/offagent/objects/docx_objects.py +951 -0
- offagent-0.10.0/src/offagent/objects/pptx_objects.py +895 -0
- offagent-0.10.0/src/offagent/objects/xlsx_objects.py +962 -0
- offagent-0.10.0/src/offagent/path_policy.py +42 -0
- offagent-0.10.0/src/offagent/storage/__init__.py +1 -0
- offagent-0.10.0/src/offagent/storage/versioning.py +31 -0
- offagent-0.10.0/src/offagent.egg-info/PKG-INFO +546 -0
- offagent-0.10.0/src/offagent.egg-info/SOURCES.txt +79 -0
- offagent-0.10.0/src/offagent.egg-info/dependency_links.txt +1 -0
- offagent-0.10.0/src/offagent.egg-info/entry_points.txt +2 -0
- offagent-0.10.0/src/offagent.egg-info/requires.txt +9 -0
- offagent-0.10.0/src/offagent.egg-info/top_level.txt +1 -0
- offagent-0.10.0/tests/test_config.py +94 -0
- offagent-0.10.0/tests/test_discovery.py +25 -0
- offagent-0.10.0/tests/test_doctor.py +60 -0
- offagent-0.10.0/tests/test_document_authoring_services.py +294 -0
- offagent-0.10.0/tests/test_docx_adapter.py +37 -0
- offagent-0.10.0/tests/test_docx_cli.py +105 -0
- offagent-0.10.0/tests/test_docx_objects.py +204 -0
- offagent-0.10.0/tests/test_docx_services.py +83 -0
- offagent-0.10.0/tests/test_embedding_provider.py +17 -0
- offagent-0.10.0/tests/test_interface_parity.py +147 -0
- offagent-0.10.0/tests/test_locators.py +125 -0
- offagent-0.10.0/tests/test_mcp.py +412 -0
- offagent-0.10.0/tests/test_mcp_acceptance.py +389 -0
- offagent-0.10.0/tests/test_mcp_v2.py +480 -0
- offagent-0.10.0/tests/test_object_models.py +82 -0
- offagent-0.10.0/tests/test_object_services.py +57 -0
- offagent-0.10.0/tests/test_object_services_write.py +255 -0
- offagent-0.10.0/tests/test_partial_formatting_services.py +271 -0
- offagent-0.10.0/tests/test_policy.py +71 -0
- offagent-0.10.0/tests/test_pptx_adapter.py +36 -0
- offagent-0.10.0/tests/test_pptx_cli.py +101 -0
- offagent-0.10.0/tests/test_pptx_objects.py +171 -0
- offagent-0.10.0/tests/test_pptx_services.py +71 -0
- offagent-0.10.0/tests/test_progress_reporting.py +141 -0
- offagent-0.10.0/tests/test_semantic_adapters.py +111 -0
- offagent-0.10.0/tests/test_semantic_mcp.py +269 -0
- offagent-0.10.0/tests/test_semantic_services.py +142 -0
- offagent-0.10.0/tests/test_store.py +136 -0
- offagent-0.10.0/tests/test_summaries.py +21 -0
- offagent-0.10.0/tests/test_text_fragments.py +61 -0
- offagent-0.10.0/tests/test_vector_mcp.py +104 -0
- offagent-0.10.0/tests/test_vector_services.py +156 -0
- offagent-0.10.0/tests/test_versioning.py +19 -0
- offagent-0.10.0/tests/test_xlsx_adapter.py +86 -0
- offagent-0.10.0/tests/test_xlsx_cli.py +133 -0
- offagent-0.10.0/tests/test_xlsx_objects.py +185 -0
- offagent-0.10.0/tests/test_xlsx_services.py +106 -0
offagent-0.10.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Iwan van der Kleijn
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
offagent-0.10.0/PKG-INFO
ADDED
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: offagent
|
|
3
|
+
Version: 0.10.0
|
|
4
|
+
Summary: Local-first Office document tooling with a shared CLI and MCP-ready core
|
|
5
|
+
Author: Iwan van der Kleijn
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.13
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: fastembed<1,>=0.3
|
|
11
|
+
Requires-Dist: mcp<2,>=1
|
|
12
|
+
Requires-Dist: python-docx<2,>=1.1
|
|
13
|
+
Requires-Dist: python-pptx<2,>=1.0
|
|
14
|
+
Requires-Dist: openpyxl<4,>=3.1
|
|
15
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
16
|
+
Requires-Dist: python-dotenv<2,>=1.0
|
|
17
|
+
Requires-Dist: rich<14,>=13
|
|
18
|
+
Requires-Dist: typer<1,>=0.12
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Office-Agent
|
|
22
|
+
[](https://www.python.org/)
|
|
23
|
+
[](https://gofastmcp.com/getting-started/welcome)
|
|
24
|
+
[](LICENSE)
|
|
25
|
+
[](FOSS_PLURALISM_MANIFESTO.md)
|
|
26
|
+
[](https://openspec.dev/)
|
|
27
|
+
|
|
28
|
+
Office-Agent (`offagent`) is a local-first Office document tool with a shared application core and a CLI entrypoint, `office-agent`.
|
|
29
|
+
|
|
30
|
+
Detailed technical implementation notes are available in [technical-implementation.md](./technical-implementation.md).
|
|
31
|
+
|
|
32
|
+

|
|
33
|
+
|
|
34
|
+
Office-Agent supports local-first workflows for `.docx`, `.pptx`, and `.xlsx` documents through both a CLI and an MCP server. The app indexes document structure into a local SQLite + FTS5 store, can optionally persist embeddings for semantic and hybrid retrieval, and routes all reads and writes through one shared service layer with path-policy enforcement, stale-locator checks, versioned outputs, and automatic reindexing.
|
|
35
|
+
|
|
36
|
+
## Current Features
|
|
37
|
+
|
|
38
|
+
### Core App
|
|
39
|
+
|
|
40
|
+
- TOML configuration with environment variable overrides
|
|
41
|
+
- `office-agent doctor` runtime checks for imports, SQLite, FTS5, index writability, configured roots, and vector-search readiness
|
|
42
|
+
- document discovery and indexing for `.docx`, `.pptx`, and `.xlsx`
|
|
43
|
+
- allowed-root guards for indexed reads and output-root guards for writes
|
|
44
|
+
- human-readable, JSON, and quiet CLI output modes
|
|
45
|
+
- default versioned write outputs, with optional in-place overwrite when explicitly enabled
|
|
46
|
+
- keyword, semantic, and hybrid search modes over indexed content
|
|
47
|
+
|
|
48
|
+
### Structured Document Model
|
|
49
|
+
|
|
50
|
+
- typed locators for DOCX, PPTX, and XLSX objects, with legacy locator compatibility where needed
|
|
51
|
+
- object traversal with `get_object` and `list_children`
|
|
52
|
+
- per-object capability reporting for `read`, `update`, `delete`, `add_child`, `move`, `copy`, and `style`
|
|
53
|
+
- generic object lifecycle operations: `create_object`, `update_object`, `move_object`, `copy_object`, `batch_edit`, and `delete_object`
|
|
54
|
+
- stale-locator detection across object and node workflows
|
|
55
|
+
|
|
56
|
+
### DOCX Support
|
|
57
|
+
|
|
58
|
+
- paragraph, run, section, table, table row, table cell, image, and page-break object resolution
|
|
59
|
+
- paragraph-level indexing and search with stable `para:<n>` compatibility
|
|
60
|
+
- node reads and writes for paragraphs and table cells
|
|
61
|
+
- paragraph insertion through the existing content-insert workflow
|
|
62
|
+
- DOCX escape hatches for paragraph style changes, page-break insertion, table creation, and table-cell merging
|
|
63
|
+
|
|
64
|
+
### PPTX Support
|
|
65
|
+
|
|
66
|
+
- presentation, slide, notes, shape, text shape, image shape, table, table row, table cell, and group-shape object resolution
|
|
67
|
+
- text-frame indexing and search with `slide:<n>:shape:<id>` compatibility
|
|
68
|
+
- node reads and writes for editable text-bearing targets
|
|
69
|
+
- slide move and copy operations through the generic object layer
|
|
70
|
+
- PPTX escape hatches for adding slides, duplicating slides, changing slide layouts, and inserting text shapes
|
|
71
|
+
|
|
72
|
+
### XLSX Support
|
|
73
|
+
|
|
74
|
+
- workbook, worksheet, row, column, cell, range, table, merged range, formula cell, and named-range object resolution
|
|
75
|
+
- cell-level indexing and search with `sheet:<name>!<coordinate>` compatibility
|
|
76
|
+
- direct cell writes plus guarded append semantics for string-compatible cells
|
|
77
|
+
- generic object deletion for worksheets, rows, columns, cells, and ranges where supported
|
|
78
|
+
- XLSX escape hatches for range writes, row insertion, column insertion, formula writes, and cell merging
|
|
79
|
+
|
|
80
|
+
### MCP Server
|
|
81
|
+
|
|
82
|
+
- stdio MCP server implemented with FastMCP
|
|
83
|
+
- document indexing and refresh tools
|
|
84
|
+
- canonical V2 search via `search_objects`, with deprecated `search_documents` alias compatibility
|
|
85
|
+
- structure and section tools, node read/write tools, object traversal tools, generic object mutation tools, and format-specific escape hatches
|
|
86
|
+
- structured MCP responses backed by the same application services used by the CLI
|
|
87
|
+
|
|
88
|
+
### Vector Search
|
|
89
|
+
|
|
90
|
+
- optional embedding generation during indexing and reindexing
|
|
91
|
+
- SQLite-backed embedding side tables keyed to indexed document items
|
|
92
|
+
- semantic retrieval over stored vectors
|
|
93
|
+
- hybrid retrieval that combines keyword and semantic scores
|
|
94
|
+
- `match_mode` and per-source score metadata in JSON and MCP search results
|
|
95
|
+
|
|
96
|
+
## Installation
|
|
97
|
+
|
|
98
|
+
The project uses `uv` in development.
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
uv sync
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Run the CLI with:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
uv run office-agent --help
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Configuration
|
|
111
|
+
|
|
112
|
+
The CLI accepts an optional config file via `--config <path>`.
|
|
113
|
+
|
|
114
|
+
Example `office-agent.toml`:
|
|
115
|
+
|
|
116
|
+
```toml
|
|
117
|
+
[offagent]
|
|
118
|
+
index_path = ".offagent/index.sqlite3"
|
|
119
|
+
document_roots = ["./docs"]
|
|
120
|
+
allowed_roots = ["./docs", "./edited"]
|
|
121
|
+
output_directory = "./edited"
|
|
122
|
+
output_roots = ["./edited", "./docs"]
|
|
123
|
+
allow_inplace_overwrite = true
|
|
124
|
+
embedding_model = "BAAI/bge-small-en-v1.5"
|
|
125
|
+
embedding_dimensions = 384
|
|
126
|
+
vector_search_top_k = 20
|
|
127
|
+
hybrid_keyword_weight = 0.4
|
|
128
|
+
hybrid_semantic_weight = 0.6
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Environment variables override file settings:
|
|
132
|
+
|
|
133
|
+
- `OFFAGENT_CONFIG`
|
|
134
|
+
- `OFFAGENT_INDEX_PATH`
|
|
135
|
+
- `OFFAGENT_DOCUMENT_ROOTS`
|
|
136
|
+
- `OFFAGENT_ALLOWED_ROOTS`
|
|
137
|
+
- `OFFAGENT_OUTPUT_DIRECTORY`
|
|
138
|
+
- `OFFAGENT_OUTPUT_ROOTS`
|
|
139
|
+
- `OFFAGENT_ALLOW_INPLACE_OVERWRITE`
|
|
140
|
+
- `OFFAGENT_EMBEDDING_MODEL`
|
|
141
|
+
- `OFFAGENT_EMBEDDING_DIMENSIONS`
|
|
142
|
+
- `OFFAGENT_VECTOR_SEARCH_TOP_K`
|
|
143
|
+
- `OFFAGENT_HYBRID_KEYWORD_WEIGHT`
|
|
144
|
+
- `OFFAGENT_HYBRID_SEMANTIC_WEIGHT`
|
|
145
|
+
|
|
146
|
+
`OFFAGENT_DOCUMENT_ROOTS` uses the platform path separator, for example `:` on macOS/Linux.
|
|
147
|
+
|
|
148
|
+
Configuration fields:
|
|
149
|
+
|
|
150
|
+
- `index_path`: SQLite index file location
|
|
151
|
+
- `document_roots`: directories scanned by discovery and directory-based indexing
|
|
152
|
+
- `allowed_roots`: canonical roots allowed for indexed read operations; empty means no read guard
|
|
153
|
+
- `output_directory`: optional directory for versioned write outputs; defaults to the source document directory
|
|
154
|
+
- `output_roots`: canonical roots allowed for write outputs; if omitted and `output_directory` is set, this defaults to that directory
|
|
155
|
+
- `allow_inplace_overwrite`: when `true`, write commands may use `--output-mode inplace`; default is `true`
|
|
156
|
+
- `embedding_model`: embedding model name used when generating or querying vectors
|
|
157
|
+
- `embedding_dimensions`: expected vector dimensionality for the configured model
|
|
158
|
+
- `vector_search_top_k`: candidate pool size used by semantic and hybrid retrieval
|
|
159
|
+
- `hybrid_keyword_weight`: keyword contribution to hybrid ranking
|
|
160
|
+
- `hybrid_semantic_weight`: semantic contribution to hybrid ranking
|
|
161
|
+
|
|
162
|
+
## CLI
|
|
163
|
+
|
|
164
|
+
All commands accept `--config <path>` after the command name. Example:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
uv run office-agent search "supplier shall" --config office-agent.toml
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Structured commands also accept:
|
|
171
|
+
|
|
172
|
+
- `--json`: emit machine-readable JSON with no extra prose
|
|
173
|
+
- `--quiet`: suppress successful stdout output
|
|
174
|
+
|
|
175
|
+
`--json` and `--quiet` are mutually exclusive.
|
|
176
|
+
|
|
177
|
+
Write commands also accept `--output-mode <mode>` where `<mode>` is:
|
|
178
|
+
|
|
179
|
+
- `versioned`: default; writes `<name>.edited.<timestamp>.<ext>` and reindexes the new file
|
|
180
|
+
- `inplace`: overwrites the source file, but only if `allow_inplace_overwrite = true`
|
|
181
|
+
|
|
182
|
+
Common exit codes:
|
|
183
|
+
|
|
184
|
+
- `0`: success
|
|
185
|
+
- `1`: other runtime failure
|
|
186
|
+
- `2`: invalid arguments or incompatible flag combinations
|
|
187
|
+
- `3`: not-found target, missing indexed item/document, or stale-locator write failure
|
|
188
|
+
- `4`: not-editable target
|
|
189
|
+
- `5`: policy-refused operation
|
|
190
|
+
|
|
191
|
+
### Doctor
|
|
192
|
+
|
|
193
|
+
Checks runtime health, including vector-search readiness.
|
|
194
|
+
|
|
195
|
+
Options:
|
|
196
|
+
|
|
197
|
+
- `--config <path>`: optional config file path
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
uv run office-agent doctor
|
|
201
|
+
uv run office-agent doctor --config office-agent.toml
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Index And Reindex
|
|
205
|
+
|
|
206
|
+
`index` indexes one file or all supported Office files under a directory. `reindex` currently re-runs the same indexing flow. Add `--with-embeddings` to generate and persist embeddings during the run.
|
|
207
|
+
|
|
208
|
+
Arguments:
|
|
209
|
+
|
|
210
|
+
- `path`: file or directory to index
|
|
211
|
+
|
|
212
|
+
Options:
|
|
213
|
+
|
|
214
|
+
- `--with-embeddings`: generate embeddings for indexed items during this run
|
|
215
|
+
- `--config <path>`: optional config file path
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
uv run office-agent index ./docs
|
|
219
|
+
uv run office-agent index ./docs --with-embeddings
|
|
220
|
+
uv run office-agent index ./docs/sample.docx
|
|
221
|
+
uv run office-agent index ./docs/sample.pptx
|
|
222
|
+
uv run office-agent index ./docs/sample.xlsx
|
|
223
|
+
uv run office-agent reindex ./docs/sample.docx
|
|
224
|
+
uv run office-agent reindex ./docs/sample.docx --with-embeddings
|
|
225
|
+
uv run office-agent reindex ./docs/sample.pptx
|
|
226
|
+
uv run office-agent reindex ./docs/sample.xlsx
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Search
|
|
230
|
+
|
|
231
|
+
Searches indexed content through SQLite FTS5, stored embeddings, or both.
|
|
232
|
+
|
|
233
|
+
Arguments:
|
|
234
|
+
|
|
235
|
+
- `query`: search text
|
|
236
|
+
|
|
237
|
+
Options:
|
|
238
|
+
|
|
239
|
+
- `--type <docx|pptx|xlsx>`: restrict search to one document type
|
|
240
|
+
- `--doc <path>`: restrict search to one indexed document path
|
|
241
|
+
- `--mode <keyword|semantic|hybrid>`: retrieval mode; default `keyword`
|
|
242
|
+
- `--config <path>`: optional config file path
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
uv run office-agent search "supplier shall"
|
|
246
|
+
uv run office-agent search "supplier shall" --mode semantic
|
|
247
|
+
uv run office-agent search "supplier shall" --mode hybrid --json
|
|
248
|
+
uv run office-agent search "supplier shall" --type docx
|
|
249
|
+
uv run office-agent search "supplier shall" --type pptx
|
|
250
|
+
uv run office-agent search "supplier shall" --type xlsx
|
|
251
|
+
uv run office-agent search "supplier shall" --type docx --doc ./docs/sample.docx
|
|
252
|
+
uv run office-agent search "supplier shall" --type pptx --doc ./docs/sample.pptx
|
|
253
|
+
uv run office-agent search "supplier shall" --type xlsx --doc ./docs/sample.xlsx
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
Notes:
|
|
257
|
+
|
|
258
|
+
- `keyword` behaves like the original FTS-only search path
|
|
259
|
+
- `semantic` requires prior indexing with `--with-embeddings`
|
|
260
|
+
- `hybrid` combines keyword and semantic candidates and returns merged ranking metadata
|
|
261
|
+
- JSON output includes `match_mode` and `scores`; semantic and hybrid human-readable output also displays that metadata
|
|
262
|
+
|
|
263
|
+
### List
|
|
264
|
+
|
|
265
|
+
Lists indexed documents with document id, path, type, modified time, and indexed item count.
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
uv run office-agent list
|
|
269
|
+
uv run office-agent list --json
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Show
|
|
273
|
+
|
|
274
|
+
Shows one indexed document summary, or one indexed item within that document.
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
uv run office-agent show --doc ./docs/sample.docx
|
|
278
|
+
uv run office-agent show --doc ./docs/sample.docx --item para:3
|
|
279
|
+
uv run office-agent show --doc ./docs/sample.xlsx --item sheet:Budget2026!B12 --json
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Locate
|
|
283
|
+
|
|
284
|
+
Resolves a direct item reference for one document.
|
|
285
|
+
|
|
286
|
+
Options:
|
|
287
|
+
|
|
288
|
+
- `--doc <path>`: required document path
|
|
289
|
+
- `--paragraph <n>`: DOCX-only paragraph lookup
|
|
290
|
+
- `--slide <n>`: PPTX-only slide lookup
|
|
291
|
+
- `--shape <id>`: optional PPTX shape narrowing; only valid with `--slide`
|
|
292
|
+
- `--sheet <name>`: XLSX-only worksheet lookup
|
|
293
|
+
- `--cell <coordinate>`: XLSX-only cell coordinate; only valid with `--sheet`
|
|
294
|
+
- `--config <path>`: optional config file path
|
|
295
|
+
|
|
296
|
+
Valid option combinations:
|
|
297
|
+
|
|
298
|
+
- DOCX: `--doc` + `--paragraph`
|
|
299
|
+
- PPTX: `--doc` + `--slide` with optional `--shape`
|
|
300
|
+
- XLSX: `--doc` + `--sheet` + `--cell`
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
uv run office-agent locate --doc ./docs/sample.docx --paragraph 3
|
|
304
|
+
uv run office-agent locate --doc ./docs/sample.pptx --slide 1
|
|
305
|
+
uv run office-agent locate --doc ./docs/sample.pptx --slide 1 --shape 7
|
|
306
|
+
uv run office-agent locate --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### Read
|
|
310
|
+
|
|
311
|
+
Reads the current source content for one indexed item.
|
|
312
|
+
|
|
313
|
+
Options:
|
|
314
|
+
|
|
315
|
+
- `--doc <path>`: required document path
|
|
316
|
+
- `--item <item-id>`: required item id
|
|
317
|
+
- `--config <path>`: optional config file path
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
uv run office-agent read --doc ./docs/sample.docx --item para:3
|
|
321
|
+
uv run office-agent read --doc ./docs/sample.pptx --item slide:1:shape:7
|
|
322
|
+
uv run office-agent read --doc ./docs/sample.xlsx --item sheet:Budget2026!B12
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Replace
|
|
326
|
+
|
|
327
|
+
Replaces the full text of one DOCX paragraph or PPTX text shape.
|
|
328
|
+
|
|
329
|
+
Options:
|
|
330
|
+
|
|
331
|
+
- `--doc <path>`: required document path
|
|
332
|
+
- `--item <item-id>`: required item id
|
|
333
|
+
- `--text <value>`: replacement text
|
|
334
|
+
- `--output-mode <versioned|inplace>`: write mode; default `versioned`
|
|
335
|
+
- `--config <path>`: optional config file path
|
|
336
|
+
|
|
337
|
+
Notes:
|
|
338
|
+
|
|
339
|
+
- `replace` is supported for DOCX and PPTX only
|
|
340
|
+
- XLSX uses `write-cell` instead of `replace`
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
uv run office-agent replace --doc ./docs/sample.docx --item para:3 --text "Updated paragraph text."
|
|
344
|
+
uv run office-agent replace --doc ./docs/sample.pptx --item slide:1:shape:7 --text "Updated slide text."
|
|
345
|
+
uv run office-agent replace --doc ./docs/sample.docx --item para:3 --text "Updated paragraph text." --output-mode versioned
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### Append
|
|
349
|
+
|
|
350
|
+
Appends text to one supported target.
|
|
351
|
+
|
|
352
|
+
Options:
|
|
353
|
+
|
|
354
|
+
- `--doc <path>`: required document path
|
|
355
|
+
- `--item <item-id>`: required item id
|
|
356
|
+
- `--text <value>`: appended text
|
|
357
|
+
- `--output-mode <versioned|inplace>`: write mode; default `versioned`
|
|
358
|
+
- `--config <path>`: optional config file path
|
|
359
|
+
|
|
360
|
+
Notes:
|
|
361
|
+
|
|
362
|
+
- DOCX appends to the target paragraph
|
|
363
|
+
- PPTX appends to the target text frame
|
|
364
|
+
- XLSX appends only to empty or string-compatible cells
|
|
365
|
+
- stale-locator failures exit with code `3`
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
uv run office-agent append --doc ./docs/sample.docx --item para:3 --text " Additional text."
|
|
369
|
+
uv run office-agent append --doc ./docs/sample.pptx --item slide:1:shape:7 --text "\nAdditional slide text."
|
|
370
|
+
uv run office-agent append --doc ./docs/sample.xlsx --item sheet:Notes!A1 --text " Additional note."
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Write Cell
|
|
374
|
+
|
|
375
|
+
Writes one XLSX cell value directly.
|
|
376
|
+
|
|
377
|
+
Options:
|
|
378
|
+
|
|
379
|
+
- `--doc <path>`: required `.xlsx` document path
|
|
380
|
+
- `--sheet <name>`: required worksheet name
|
|
381
|
+
- `--cell <coordinate>`: required cell coordinate
|
|
382
|
+
- `--value <value>`: new cell value
|
|
383
|
+
- `--output-mode <versioned|inplace>`: write mode; default `versioned`
|
|
384
|
+
- `--config <path>`: optional config file path
|
|
385
|
+
|
|
386
|
+
```bash
|
|
387
|
+
uv run office-agent write-cell --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12 --value "125000"
|
|
388
|
+
uv run office-agent write-cell --doc ./docs/sample.xlsx --sheet Budget2026 --cell B12 --value "125000" --output-mode inplace
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### MCP
|
|
392
|
+
|
|
393
|
+
Starts the FastMCP server on stdio.
|
|
394
|
+
|
|
395
|
+
Options:
|
|
396
|
+
|
|
397
|
+
- `--config <path>`: optional config file path
|
|
398
|
+
|
|
399
|
+
```bash
|
|
400
|
+
uv run office-agent mcp
|
|
401
|
+
uv run office-agent mcp --config office-agent.toml
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
The MCP server exposes these tools:
|
|
405
|
+
|
|
406
|
+
- `index_documents(paths)`
|
|
407
|
+
- `refresh_document(document_id)`
|
|
408
|
+
- `list_documents()`
|
|
409
|
+
- `create_document(format, output_path, initial_sheet_name=None, output_mode="versioned")`
|
|
410
|
+
- `add_content_block(document_id, block_type, properties, output_mode="versioned")`
|
|
411
|
+
- `search_objects(query, file_type=None, document_id=None, mode="keyword", limit=20)`
|
|
412
|
+
- `search_documents(...)` as a deprecated alias for pre-V2 consumers
|
|
413
|
+
- `get_structure(document_id)`
|
|
414
|
+
- `get_section(document_id, section_id, cell_range=None)`
|
|
415
|
+
- `get_node(document_id, node_id)`
|
|
416
|
+
- `write_node(document_id, node_id, content, output_mode="versioned")`
|
|
417
|
+
- `insert_content(document_id, content, style_name=None, after_node_id=None, output_mode="versioned")`
|
|
418
|
+
- `style_inline(document_id, locator, style, range=None, clear_fields=None, output_mode="versioned")`
|
|
419
|
+
- `style_block(document_id, locator, style, clear_fields=None, output_mode="versioned")`
|
|
420
|
+
- `set_structural_role(document_id, locator, role, level=None, output_mode="versioned")`
|
|
421
|
+
- `docx_get_tables(document_id)`
|
|
422
|
+
- `get_object(document_id, locator)`
|
|
423
|
+
- `list_children(document_id, locator, child_type=None, limit=None)`
|
|
424
|
+
- `create_object(document_id, parent_locator, object_type, properties, segments=None, range=None, position=None, output_mode="versioned")`
|
|
425
|
+
- `update_object(document_id, locator, properties, segments=None, range=None, output_mode="versioned")`
|
|
426
|
+
- `move_object(document_id, locator, new_parent_locator, position=None, output_mode="versioned")`
|
|
427
|
+
- `copy_object(document_id, locator, target_parent_locator, position=None, output_mode="versioned")`
|
|
428
|
+
- `batch_edit(document_id, operations, output_mode="versioned", dry_run=False)`
|
|
429
|
+
- `delete_object(document_id, locator, output_mode="versioned")`
|
|
430
|
+
- `docx_set_paragraph_style(document_id, locator, style_name, output_mode="versioned")`
|
|
431
|
+
- `docx_insert_page_break(document_id, locator, output_mode="versioned")`
|
|
432
|
+
- `docx_add_table(document_id, row_count, column_count, position=None, column_widths=None, style_name=None, output_mode="versioned")`
|
|
433
|
+
- `docx_merge_table_cells(document_id, start_locator, end_locator, output_mode="versioned")`
|
|
434
|
+
- `pptx_add_slide(document_id, layout_index=None, layout_name=None, output_mode="versioned")`
|
|
435
|
+
- `pptx_duplicate_slide(document_id, locator, position=None, output_mode="versioned")`
|
|
436
|
+
- `pptx_set_slide_layout(document_id, locator, layout_index=None, layout_name=None, output_mode="versioned")`
|
|
437
|
+
- `pptx_add_text_shape(document_id, locator, text, left, top, width, height, output_mode="versioned")`
|
|
438
|
+
- `xlsx_write_range(document_id, locator, values, output_mode="versioned")`
|
|
439
|
+
- `xlsx_insert_rows(...)` for both append-rows compatibility mode and worksheet row insertion mode
|
|
440
|
+
- `xlsx_insert_columns(document_id, locator, column_index, count, output_mode="versioned")`
|
|
441
|
+
- `xlsx_set_formula(document_id, locator, formula, output_mode="versioned")`
|
|
442
|
+
- `xlsx_merge_cells(document_id, locator, output_mode="versioned")`
|
|
443
|
+
|
|
444
|
+
Notes:
|
|
445
|
+
|
|
446
|
+
- `search_objects` is the canonical MCP search entrypoint; `search_documents` remains for compatibility.
|
|
447
|
+
- `create_object` and `update_object` support additive `segments` inputs for partial-formatting workflows.
|
|
448
|
+
- `style_inline` supports visible-text `range` inputs for partial inline styling across supported DOCX, PPTX, and XLSX targets.
|
|
449
|
+
|
|
450
|
+
## Example query session
|
|
451
|
+
|
|
452
|
+

|
|
453
|
+
|
|
454
|
+
## Item Id Reference
|
|
455
|
+
|
|
456
|
+
Use these item-id formats with `read`, `replace`, and `append`:
|
|
457
|
+
|
|
458
|
+
- DOCX: `para:<n>`
|
|
459
|
+
- PPTX: `slide:<slide-number>:shape:<shape-id>`
|
|
460
|
+
- XLSX: `sheet:<worksheet-name>!<cell-coordinate>`
|
|
461
|
+
|
|
462
|
+
## Write Behavior
|
|
463
|
+
|
|
464
|
+
By default, all write commands create a versioned output file:
|
|
465
|
+
|
|
466
|
+
- `sample.docx` -> `sample.edited.20260318-214512345678.docx`
|
|
467
|
+
- `deck.pptx` -> `deck.edited.20260318-214512345678.pptx`
|
|
468
|
+
- `budget.xlsx` -> `budget.edited.20260318-214512345678.xlsx`
|
|
469
|
+
|
|
470
|
+
After a successful write:
|
|
471
|
+
|
|
472
|
+
- the output file is automatically reindexed
|
|
473
|
+
- CLI output reports the written output path
|
|
474
|
+
- search results can resolve against the new version immediately
|
|
475
|
+
- managed output roots are treated as readable workflow roots so versioned-write follow-up commands continue to work
|
|
476
|
+
|
|
477
|
+
If the source file changed after indexing:
|
|
478
|
+
|
|
479
|
+
- the service compares the current file hash to the indexed `content_hash`
|
|
480
|
+
- it tries to re-resolve the same item id against the current file
|
|
481
|
+
- if that fails, the command exits with stale-locator error code `3`
|
|
482
|
+
|
|
483
|
+
## Development
|
|
484
|
+
|
|
485
|
+
Run the test suite with:
|
|
486
|
+
|
|
487
|
+
```bash
|
|
488
|
+
uv run pytest
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
This repository also includes an example skill for Office-document workflows at [`.github/skills/office-agent`](./.github/skills/office-agent/). The location is VSCode specific but it should work for Claude Code and Codex as well.
|
|
492
|
+
|
|
493
|
+
Acceptance coverage uses checked-in golden Office fixtures under [`tests/fixtures`](./tests/fixtures/), including DOCX, PPTX, and XLSX documents exercised by the CLI and FastMCP integration suites.
|
|
494
|
+
|
|
495
|
+
The test suite is written in `pytest` and includes:
|
|
496
|
+
|
|
497
|
+
- configuration and diagnostics coverage
|
|
498
|
+
- path-policy coverage
|
|
499
|
+
- SQLite store coverage
|
|
500
|
+
- DOCX adapter tests
|
|
501
|
+
- DOCX service workflow tests
|
|
502
|
+
- PPTX adapter tests
|
|
503
|
+
- PPTX service workflow tests
|
|
504
|
+
- XLSX adapter tests
|
|
505
|
+
- XLSX service workflow tests
|
|
506
|
+
- CLI round-trip and acceptance tests for DOCX, PPTX, and XLSX
|
|
507
|
+
- FastMCP stdio integration and acceptance tests for the MCP tool surface
|
|
508
|
+
- CLI/MCP parity tests for the core index → search → locate → read → replace workflow
|
|
509
|
+
|
|
510
|
+
## Project Status
|
|
511
|
+
|
|
512
|
+
Implemented:
|
|
513
|
+
|
|
514
|
+
- shared CLI/application-core structure
|
|
515
|
+
- configuration and doctor checks
|
|
516
|
+
- allowed-root and output-root policy enforcement
|
|
517
|
+
- SQLite index bootstrap and FTS5-backed search
|
|
518
|
+
- embedding-backed semantic and hybrid retrieval with mode-aware CLI/MCP output
|
|
519
|
+
- DOCX paragraph extraction and editing workflow
|
|
520
|
+
- PPTX text-shape extraction and editing workflow
|
|
521
|
+
- XLSX cell extraction and editing workflow
|
|
522
|
+
- indexed document summary commands (`list` and `show`)
|
|
523
|
+
- JSON and quiet CLI output modes
|
|
524
|
+
- versioned write outputs with automatic reindex
|
|
525
|
+
- stale-locator protection for write commands
|
|
526
|
+
- MCP interface over stdio with FastMCP
|
|
527
|
+
|
|
528
|
+
Not implemented yet:
|
|
529
|
+
|
|
530
|
+
- SSE or streamable HTTP MCP transports
|
|
531
|
+
- MCP resources and prompts
|
|
532
|
+
- richer locator resolution beyond the current direct DOCX, PPTX, and XLSX flows
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
## Principles of Participation
|
|
536
|
+
|
|
537
|
+
Everyone is invited and welcome to contribute: open issues, propose pull requests, share ideas, or help improve documentation. Participation is open to all, regardless of background or viewpoint.
|
|
538
|
+
|
|
539
|
+
This project follows the [FOSS Pluralism Manifesto](./FOSS_PLURALISM_MANIFESTO.md),
|
|
540
|
+
which affirms respect for people, freedom to critique ideas, and space for diverse perspectives.
|
|
541
|
+
|
|
542
|
+
## License and Copyright
|
|
543
|
+
|
|
544
|
+
Copyright (c) 2026, Iwan van der Kleijn
|
|
545
|
+
|
|
546
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|