offagent 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. offagent/__init__.py +3 -0
  2. offagent/__main__.py +5 -0
  3. offagent/adapters/__init__.py +1 -0
  4. offagent/adapters/docx_adapter.py +1237 -0
  5. offagent/adapters/embedding_provider.py +132 -0
  6. offagent/adapters/pptx_adapter.py +940 -0
  7. offagent/adapters/xlsx_adapter.py +1266 -0
  8. offagent/app/__init__.py +1 -0
  9. offagent/app/progress.py +52 -0
  10. offagent/app/services.py +4267 -0
  11. offagent/config.py +287 -0
  12. offagent/domain/__init__.py +1 -0
  13. offagent/domain/locators.py +444 -0
  14. offagent/domain/models.py +477 -0
  15. offagent/domain/text_fragments.py +136 -0
  16. offagent/errors.py +29 -0
  17. offagent/indexing/__init__.py +1 -0
  18. offagent/indexing/store.py +795 -0
  19. offagent/interfaces/__init__.py +1 -0
  20. offagent/interfaces/cli.py +438 -0
  21. offagent/interfaces/cli_output.py +139 -0
  22. offagent/interfaces/cli_progress.py +120 -0
  23. offagent/interfaces/mcp.py +1145 -0
  24. offagent/interfaces/mcp_converters.py +80 -0
  25. offagent/interfaces/mcp_models.py +923 -0
  26. offagent/objects/__init__.py +3 -0
  27. offagent/objects/base.py +26 -0
  28. offagent/objects/docx_objects.py +951 -0
  29. offagent/objects/pptx_objects.py +895 -0
  30. offagent/objects/xlsx_objects.py +962 -0
  31. offagent/path_policy.py +42 -0
  32. offagent/storage/__init__.py +1 -0
  33. offagent/storage/versioning.py +31 -0
  34. offagent-0.10.0.dist-info/METADATA +546 -0
  35. offagent-0.10.0.dist-info/RECORD +39 -0
  36. offagent-0.10.0.dist-info/WHEEL +5 -0
  37. offagent-0.10.0.dist-info/entry_points.txt +2 -0
  38. offagent-0.10.0.dist-info/licenses/LICENSE +21 -0
  39. offagent-0.10.0.dist-info/top_level.txt +1 -0
offagent/config.py ADDED
@@ -0,0 +1,287 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import tomllib
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Mapping
8
+
9
+ from offagent.errors import InvalidArgumentsError
10
+
11
+ try:
12
+ from dotenv import load_dotenv
13
+ except ModuleNotFoundError: # pragma: no cover - exercised through doctor checks
14
+ load_dotenv = None
15
+
16
+ DEFAULT_CONFIG_PATH = Path("office-agent.toml")
17
+ DEFAULT_INDEX_PATH = Path(".offagent/index.sqlite3")
18
+ DEFAULT_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
19
+ DEFAULT_EMBEDDING_DIMENSIONS = 384
20
+ DEFAULT_VECTOR_SEARCH_TOP_K = 20
21
+ DEFAULT_HYBRID_KEYWORD_WEIGHT = 0.4
22
+ DEFAULT_HYBRID_SEMANTIC_WEIGHT = 0.6
23
+ ENV_CONFIG_PATH = "OFFAGENT_CONFIG"
24
+ ENV_INDEX_PATH = "OFFAGENT_INDEX_PATH"
25
+ ENV_DOCUMENT_ROOTS = "OFFAGENT_DOCUMENT_ROOTS"
26
+ ENV_ALLOWED_ROOTS = "OFFAGENT_ALLOWED_ROOTS"
27
+ ENV_OUTPUT_DIRECTORY = "OFFAGENT_OUTPUT_DIRECTORY"
28
+ ENV_OUTPUT_ROOTS = "OFFAGENT_OUTPUT_ROOTS"
29
+ ENV_ALLOW_INPLACE_OVERWRITE = "OFFAGENT_ALLOW_INPLACE_OVERWRITE"
30
+ ENV_EMBEDDING_MODEL = "OFFAGENT_EMBEDDING_MODEL"
31
+ ENV_EMBEDDING_DIMENSIONS = "OFFAGENT_EMBEDDING_DIMENSIONS"
32
+ ENV_VECTOR_SEARCH_TOP_K = "OFFAGENT_VECTOR_SEARCH_TOP_K"
33
+ ENV_HYBRID_KEYWORD_WEIGHT = "OFFAGENT_HYBRID_KEYWORD_WEIGHT"
34
+ ENV_HYBRID_SEMANTIC_WEIGHT = "OFFAGENT_HYBRID_SEMANTIC_WEIGHT"
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class AppConfig:
39
+ index_path: Path = DEFAULT_INDEX_PATH
40
+ document_roots: tuple[Path, ...] = ()
41
+ allowed_roots: tuple[Path, ...] = ()
42
+ output_directory: Path | None = None
43
+ output_roots: tuple[Path, ...] = ()
44
+ allow_inplace_overwrite: bool = True
45
+ embedding_model: str = DEFAULT_EMBEDDING_MODEL
46
+ embedding_dimensions: int = DEFAULT_EMBEDDING_DIMENSIONS
47
+ vector_search_top_k: int = DEFAULT_VECTOR_SEARCH_TOP_K
48
+ hybrid_keyword_weight: float = DEFAULT_HYBRID_KEYWORD_WEIGHT
49
+ hybrid_semantic_weight: float = DEFAULT_HYBRID_SEMANTIC_WEIGHT
50
+ config_path: Path | None = None
51
+
52
+
53
+ def load_config(
54
+ config_path: Path | None = None,
55
+ env: Mapping[str, str] | None = None,
56
+ ) -> AppConfig:
57
+ if load_dotenv is not None:
58
+ load_dotenv()
59
+
60
+ env_values = dict(os.environ if env is None else env)
61
+ selected_config_path = _select_config_path(config_path, env_values)
62
+
63
+ values: dict[str, object] = {
64
+ "index_path": DEFAULT_INDEX_PATH,
65
+ "document_roots": (),
66
+ "allowed_roots": (),
67
+ "output_directory": None,
68
+ "output_roots": (),
69
+ "allow_inplace_overwrite": True,
70
+ "embedding_model": DEFAULT_EMBEDDING_MODEL,
71
+ "embedding_dimensions": DEFAULT_EMBEDDING_DIMENSIONS,
72
+ "vector_search_top_k": DEFAULT_VECTOR_SEARCH_TOP_K,
73
+ "hybrid_keyword_weight": DEFAULT_HYBRID_KEYWORD_WEIGHT,
74
+ "hybrid_semantic_weight": DEFAULT_HYBRID_SEMANTIC_WEIGHT,
75
+ "config_path": selected_config_path,
76
+ }
77
+
78
+ if selected_config_path is not None:
79
+ values.update(_load_file_values(selected_config_path))
80
+
81
+ if ENV_INDEX_PATH in env_values:
82
+ values["index_path"] = Path(env_values[ENV_INDEX_PATH]).expanduser()
83
+
84
+ if ENV_DOCUMENT_ROOTS in env_values:
85
+ values["document_roots"] = _split_paths(env_values[ENV_DOCUMENT_ROOTS])
86
+
87
+ if ENV_ALLOWED_ROOTS in env_values:
88
+ values["allowed_roots"] = _split_paths(env_values[ENV_ALLOWED_ROOTS])
89
+
90
+ if ENV_OUTPUT_DIRECTORY in env_values:
91
+ values["output_directory"] = Path(env_values[ENV_OUTPUT_DIRECTORY]).expanduser()
92
+
93
+ if ENV_OUTPUT_ROOTS in env_values:
94
+ values["output_roots"] = _split_paths(env_values[ENV_OUTPUT_ROOTS])
95
+
96
+ if ENV_ALLOW_INPLACE_OVERWRITE in env_values:
97
+ values["allow_inplace_overwrite"] = _parse_bool(
98
+ env_values[ENV_ALLOW_INPLACE_OVERWRITE]
99
+ )
100
+
101
+ if ENV_EMBEDDING_MODEL in env_values:
102
+ values["embedding_model"] = env_values[ENV_EMBEDDING_MODEL]
103
+
104
+ if ENV_EMBEDDING_DIMENSIONS in env_values:
105
+ values["embedding_dimensions"] = _parse_int(
106
+ env_values[ENV_EMBEDDING_DIMENSIONS],
107
+ ENV_EMBEDDING_DIMENSIONS,
108
+ minimum=1,
109
+ )
110
+
111
+ if ENV_VECTOR_SEARCH_TOP_K in env_values:
112
+ values["vector_search_top_k"] = _parse_int(
113
+ env_values[ENV_VECTOR_SEARCH_TOP_K],
114
+ ENV_VECTOR_SEARCH_TOP_K,
115
+ minimum=1,
116
+ )
117
+
118
+ if ENV_HYBRID_KEYWORD_WEIGHT in env_values:
119
+ values["hybrid_keyword_weight"] = _parse_float(
120
+ env_values[ENV_HYBRID_KEYWORD_WEIGHT],
121
+ ENV_HYBRID_KEYWORD_WEIGHT,
122
+ minimum=0.0,
123
+ )
124
+
125
+ if ENV_HYBRID_SEMANTIC_WEIGHT in env_values:
126
+ values["hybrid_semantic_weight"] = _parse_float(
127
+ env_values[ENV_HYBRID_SEMANTIC_WEIGHT],
128
+ ENV_HYBRID_SEMANTIC_WEIGHT,
129
+ minimum=0.0,
130
+ )
131
+
132
+ output_directory = _expand_optional_path(values["output_directory"])
133
+ output_roots = tuple(Path(root).expanduser() for root in values["output_roots"])
134
+ if not output_roots and output_directory is not None:
135
+ output_roots = (output_directory,)
136
+
137
+ return AppConfig(
138
+ index_path=Path(values["index_path"]).expanduser(),
139
+ document_roots=tuple(
140
+ Path(root).expanduser() for root in values["document_roots"]
141
+ ),
142
+ allowed_roots=tuple(
143
+ Path(root).expanduser() for root in values["allowed_roots"]
144
+ ),
145
+ output_directory=output_directory,
146
+ output_roots=output_roots,
147
+ allow_inplace_overwrite=bool(values["allow_inplace_overwrite"]),
148
+ embedding_model=str(values["embedding_model"]),
149
+ embedding_dimensions=int(values["embedding_dimensions"]),
150
+ vector_search_top_k=int(values["vector_search_top_k"]),
151
+ hybrid_keyword_weight=float(values["hybrid_keyword_weight"]),
152
+ hybrid_semantic_weight=float(values["hybrid_semantic_weight"]),
153
+ config_path=selected_config_path,
154
+ )
155
+
156
+
157
+ def _select_config_path(
158
+ config_path: Path | None, env: Mapping[str, str]
159
+ ) -> Path | None:
160
+ if config_path is not None:
161
+ selected = config_path.expanduser()
162
+ if not selected.exists():
163
+ raise FileNotFoundError(selected)
164
+ return selected
165
+
166
+ if ENV_CONFIG_PATH in env:
167
+ selected = Path(env[ENV_CONFIG_PATH]).expanduser()
168
+ if not selected.exists():
169
+ raise FileNotFoundError(selected)
170
+ return selected
171
+
172
+ if DEFAULT_CONFIG_PATH.exists():
173
+ return DEFAULT_CONFIG_PATH
174
+
175
+ return None
176
+
177
+
178
+ def _load_file_values(config_path: Path) -> dict[str, object]:
179
+ with config_path.open("rb") as handle:
180
+ raw = tomllib.load(handle)
181
+
182
+ payload = raw.get("offagent", raw)
183
+ roots = payload.get("document_roots", ())
184
+ allowed_roots = payload.get("allowed_roots", ())
185
+ output_roots = payload.get("output_roots", ())
186
+ return {
187
+ "index_path": Path(payload.get("index_path", DEFAULT_INDEX_PATH)).expanduser(),
188
+ "document_roots": tuple(Path(root).expanduser() for root in roots),
189
+ "allowed_roots": tuple(Path(root).expanduser() for root in allowed_roots),
190
+ "output_directory": _optional_path(payload.get("output_directory")),
191
+ "output_roots": tuple(Path(root).expanduser() for root in output_roots),
192
+ "allow_inplace_overwrite": bool(payload.get("allow_inplace_overwrite", True)),
193
+ "embedding_model": str(payload.get("embedding_model", DEFAULT_EMBEDDING_MODEL)),
194
+ "embedding_dimensions": _coerce_int(
195
+ payload.get("embedding_dimensions", DEFAULT_EMBEDDING_DIMENSIONS),
196
+ "embedding_dimensions",
197
+ minimum=1,
198
+ ),
199
+ "vector_search_top_k": _coerce_int(
200
+ payload.get("vector_search_top_k", DEFAULT_VECTOR_SEARCH_TOP_K),
201
+ "vector_search_top_k",
202
+ minimum=1,
203
+ ),
204
+ "hybrid_keyword_weight": _coerce_float(
205
+ payload.get("hybrid_keyword_weight", DEFAULT_HYBRID_KEYWORD_WEIGHT),
206
+ "hybrid_keyword_weight",
207
+ minimum=0.0,
208
+ ),
209
+ "hybrid_semantic_weight": _coerce_float(
210
+ payload.get("hybrid_semantic_weight", DEFAULT_HYBRID_SEMANTIC_WEIGHT),
211
+ "hybrid_semantic_weight",
212
+ minimum=0.0,
213
+ ),
214
+ }
215
+
216
+
217
+ def _split_paths(value: str) -> tuple[Path, ...]:
218
+ if not value.strip():
219
+ return ()
220
+ return tuple(Path(part).expanduser() for part in value.split(os.pathsep) if part)
221
+
222
+
223
+ def _optional_path(value: object) -> Path | None:
224
+ if value in (None, ""):
225
+ return None
226
+ return Path(str(value)).expanduser()
227
+
228
+
229
+ def _expand_optional_path(value: object) -> Path | None:
230
+ if value is None:
231
+ return None
232
+ return Path(value).expanduser()
233
+
234
+
235
+ def _parse_bool(value: str) -> bool:
236
+ normalized = value.strip().lower()
237
+ if normalized in {"1", "true", "yes", "on"}:
238
+ return True
239
+ if normalized in {"0", "false", "no", "off"}:
240
+ return False
241
+ raise InvalidArgumentsError(f"Invalid boolean value: {value}")
242
+
243
+
244
+ def _parse_int(value: str, name: str, *, minimum: int | None = None) -> int:
245
+ try:
246
+ parsed = int(value)
247
+ except ValueError as exc:
248
+ raise InvalidArgumentsError(
249
+ f"Invalid integer value for {name}: {value}"
250
+ ) from exc
251
+ if minimum is not None and parsed < minimum:
252
+ raise InvalidArgumentsError(f"{name} must be >= {minimum}")
253
+ return parsed
254
+
255
+
256
+ def _parse_float(value: str, name: str, *, minimum: float | None = None) -> float:
257
+ try:
258
+ parsed = float(value)
259
+ except ValueError as exc:
260
+ raise InvalidArgumentsError(f"Invalid float value for {name}: {value}") from exc
261
+ if minimum is not None and parsed < minimum:
262
+ raise InvalidArgumentsError(f"{name} must be >= {minimum}")
263
+ return parsed
264
+
265
+
266
+ def _coerce_int(value: object, name: str, *, minimum: int | None = None) -> int:
267
+ if isinstance(value, bool):
268
+ raise InvalidArgumentsError(f"Invalid integer value for {name}: {value}")
269
+ if isinstance(value, int):
270
+ parsed = value
271
+ else:
272
+ parsed = _parse_int(str(value), name, minimum=minimum)
273
+ if minimum is not None and parsed < minimum:
274
+ raise InvalidArgumentsError(f"{name} must be >= {minimum}")
275
+ return parsed
276
+
277
+
278
+ def _coerce_float(value: object, name: str, *, minimum: float | None = None) -> float:
279
+ if isinstance(value, bool):
280
+ raise InvalidArgumentsError(f"Invalid float value for {name}: {value}")
281
+ if isinstance(value, (int, float)):
282
+ parsed = float(value)
283
+ else:
284
+ parsed = _parse_float(str(value), name, minimum=minimum)
285
+ if minimum is not None and parsed < minimum:
286
+ raise InvalidArgumentsError(f"{name} must be >= {minimum}")
287
+ return parsed
@@ -0,0 +1 @@
1
+ """Shared domain models."""