oghma 0.0.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oghma/__init__.py +1 -3
- oghma/cli.py +342 -0
- oghma/config.py +262 -0
- oghma/daemon.py +198 -0
- oghma/embedder.py +107 -0
- oghma/exporter.py +177 -0
- oghma/extractor.py +180 -0
- oghma/mcp_server.py +112 -0
- oghma/migration.py +63 -0
- oghma/parsers/__init__.py +26 -0
- oghma/parsers/base.py +24 -0
- oghma/parsers/claude_code.py +62 -0
- oghma/parsers/codex.py +84 -0
- oghma/parsers/openclaw.py +64 -0
- oghma/parsers/opencode.py +90 -0
- oghma/storage.py +753 -0
- oghma/watcher.py +97 -0
- oghma-0.3.0.dist-info/METADATA +26 -0
- oghma-0.3.0.dist-info/RECORD +22 -0
- {oghma-0.0.1.dist-info → oghma-0.3.0.dist-info}/WHEEL +2 -1
- oghma-0.3.0.dist-info/entry_points.txt +3 -0
- oghma-0.3.0.dist-info/top_level.txt +1 -0
- oghma-0.0.1.dist-info/METADATA +0 -33
- oghma-0.0.1.dist-info/RECORD +0 -4
oghma/__init__.py
CHANGED
oghma/cli.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import signal
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
|
|
10
|
+
from oghma import __version__
|
|
11
|
+
from oghma.config import (
|
|
12
|
+
create_default_config,
|
|
13
|
+
get_config_path,
|
|
14
|
+
load_config,
|
|
15
|
+
validate_config,
|
|
16
|
+
)
|
|
17
|
+
from oghma.daemon import Daemon, get_daemon_pid
|
|
18
|
+
from oghma.embedder import EmbedConfig, create_embedder
|
|
19
|
+
from oghma.exporter import Exporter, ExportOptions
|
|
20
|
+
from oghma.migration import EmbeddingMigration
|
|
21
|
+
from oghma.storage import Storage
|
|
22
|
+
|
|
23
|
+
console = Console()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.group()
|
|
27
|
+
@click.version_option(version=__version__, prog_name="oghma")
|
|
28
|
+
def cli() -> None:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@cli.command()
|
|
33
|
+
def init() -> None:
|
|
34
|
+
config_path = get_config_path()
|
|
35
|
+
|
|
36
|
+
if config_path.exists():
|
|
37
|
+
console.print(f"[yellow]Config already exists at {config_path}[/yellow]")
|
|
38
|
+
if not click.confirm("Overwrite existing config?"):
|
|
39
|
+
console.print("[green]Init cancelled[/green]")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
console.print("[blue]Creating Oghma configuration...[/blue]")
|
|
43
|
+
config = create_default_config()
|
|
44
|
+
console.print(f"[green]Config created at {config_path}[/green]")
|
|
45
|
+
console.print(f"[cyan]Database path: {config['storage']['db_path']}[/cyan]")
|
|
46
|
+
console.print("\n[yellow]Run 'oghma status' to verify setup[/yellow]")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@cli.command()
|
|
50
|
+
def status() -> None:
|
|
51
|
+
try:
|
|
52
|
+
config_path = get_config_path()
|
|
53
|
+
config = load_config()
|
|
54
|
+
db_path = config["storage"]["db_path"]
|
|
55
|
+
pid_file = config["daemon"]["pid_file"]
|
|
56
|
+
|
|
57
|
+
table = Table(title="Oghma Status", show_header=True, header_style="bold magenta")
|
|
58
|
+
table.add_column("Property", style="cyan")
|
|
59
|
+
table.add_column("Value", style="green")
|
|
60
|
+
|
|
61
|
+
table.add_row("Config Path", str(config_path))
|
|
62
|
+
|
|
63
|
+
pid = get_daemon_pid(pid_file)
|
|
64
|
+
if pid:
|
|
65
|
+
table.add_row("Daemon Status", f"[green]Running (PID: {pid})[/green]")
|
|
66
|
+
else:
|
|
67
|
+
table.add_row("Daemon Status", "[red]Stopped[/red]")
|
|
68
|
+
|
|
69
|
+
table.add_row("Database Path", db_path)
|
|
70
|
+
|
|
71
|
+
if Path(db_path).exists():
|
|
72
|
+
storage = Storage(db_path, config)
|
|
73
|
+
memory_count = storage.get_memory_count()
|
|
74
|
+
table.add_row("Memory Count", str(memory_count))
|
|
75
|
+
|
|
76
|
+
logs = storage.get_recent_extraction_logs(limit=1)
|
|
77
|
+
if logs:
|
|
78
|
+
last_extraction = logs[0]["created_at"]
|
|
79
|
+
table.add_row("Last Extraction", last_extraction)
|
|
80
|
+
else:
|
|
81
|
+
table.add_row("Last Extraction", "Never")
|
|
82
|
+
|
|
83
|
+
table.add_row("Database Status", "[green]Exists[/green]")
|
|
84
|
+
|
|
85
|
+
from oghma.watcher import Watcher
|
|
86
|
+
|
|
87
|
+
watcher = Watcher(config, storage)
|
|
88
|
+
watched_files = watcher.discover_files()
|
|
89
|
+
table.add_row("Watched Files", str(len(watched_files)))
|
|
90
|
+
else:
|
|
91
|
+
table.add_row("Memory Count", "0")
|
|
92
|
+
table.add_row("Last Extraction", "Never")
|
|
93
|
+
table.add_row("Database Status", "[yellow]Not created yet[/yellow]")
|
|
94
|
+
table.add_row("Watched Files", "0")
|
|
95
|
+
|
|
96
|
+
console.print(table)
|
|
97
|
+
|
|
98
|
+
errors = validate_config(config)
|
|
99
|
+
if errors:
|
|
100
|
+
console.print("\n[red]Configuration errors:[/red]")
|
|
101
|
+
for error in errors:
|
|
102
|
+
console.print(f" [red]- {error}[/red]")
|
|
103
|
+
|
|
104
|
+
except FileNotFoundError:
|
|
105
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
106
|
+
except Exception as e:
|
|
107
|
+
console.print(f"[red]Error: {e}[/red]")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@cli.command()
|
|
111
|
+
@click.option("--foreground", "-f", is_flag=True, help="Run in foreground (don't daemonize)")
|
|
112
|
+
def start(foreground: bool) -> None:
|
|
113
|
+
try:
|
|
114
|
+
config = load_config()
|
|
115
|
+
pid_file = config["daemon"]["pid_file"]
|
|
116
|
+
|
|
117
|
+
pid = get_daemon_pid(pid_file)
|
|
118
|
+
if pid:
|
|
119
|
+
console.print(f"[red]Daemon already running (PID: {pid})[/red]")
|
|
120
|
+
console.print("Use 'oghma stop' to stop it first.")
|
|
121
|
+
raise SystemExit(1)
|
|
122
|
+
|
|
123
|
+
console.print("[blue]Starting Oghma daemon...[/blue]")
|
|
124
|
+
|
|
125
|
+
if not foreground:
|
|
126
|
+
try:
|
|
127
|
+
pid = os.fork()
|
|
128
|
+
if pid > 0:
|
|
129
|
+
console.print(f"[green]Daemon started in background (PID: {pid})[/green]")
|
|
130
|
+
return
|
|
131
|
+
except OSError as e:
|
|
132
|
+
console.print(f"[yellow]Fork failed: {e}. Running in foreground.[/yellow]")
|
|
133
|
+
|
|
134
|
+
daemon = Daemon(config)
|
|
135
|
+
daemon.start()
|
|
136
|
+
|
|
137
|
+
except FileNotFoundError:
|
|
138
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
139
|
+
raise SystemExit(1) from None
|
|
140
|
+
except Exception as e:
|
|
141
|
+
console.print(f"[red]Error starting daemon: {e}[/red]")
|
|
142
|
+
raise SystemExit(1) from None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@cli.command()
|
|
146
|
+
def stop() -> None:
|
|
147
|
+
try:
|
|
148
|
+
config = load_config()
|
|
149
|
+
pid_file = config["daemon"]["pid_file"]
|
|
150
|
+
|
|
151
|
+
pid = get_daemon_pid(pid_file)
|
|
152
|
+
if not pid:
|
|
153
|
+
console.print("[yellow]Daemon is not running[/yellow]")
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
console.print(f"[blue]Stopping daemon (PID: {pid})...[/blue]")
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
os.kill(pid, signal.SIGTERM)
|
|
160
|
+
except ProcessLookupError:
|
|
161
|
+
console.print("[yellow]Daemon process not found. Cleaning up PID file.[/yellow]")
|
|
162
|
+
Path(pid_file).unlink(missing_ok=True)
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
for _ in range(10):
|
|
166
|
+
time.sleep(0.5)
|
|
167
|
+
if not get_daemon_pid(pid_file):
|
|
168
|
+
console.print("[green]Daemon stopped successfully[/green]")
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
console.print("[yellow]Daemon did not stop gracefully. Sending SIGKILL...[/yellow]")
|
|
172
|
+
try:
|
|
173
|
+
os.kill(pid, signal.SIGKILL)
|
|
174
|
+
except ProcessLookupError:
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
Path(pid_file).unlink(missing_ok=True)
|
|
178
|
+
console.print("[green]Daemon force stopped[/green]")
|
|
179
|
+
|
|
180
|
+
except FileNotFoundError:
|
|
181
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
182
|
+
raise SystemExit(1) from None
|
|
183
|
+
except Exception as e:
|
|
184
|
+
console.print(f"[red]Error stopping daemon: {e}[/red]")
|
|
185
|
+
raise SystemExit(1) from None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@cli.command()
|
|
189
|
+
@click.argument("query")
|
|
190
|
+
@click.option("--limit", "-n", default=10, help="Max results")
|
|
191
|
+
@click.option("--category", "-c", help="Filter by category")
|
|
192
|
+
@click.option(
|
|
193
|
+
"--mode",
|
|
194
|
+
type=click.Choice(["keyword", "vector", "hybrid"]),
|
|
195
|
+
default="keyword",
|
|
196
|
+
show_default=True,
|
|
197
|
+
help="Search strategy",
|
|
198
|
+
)
|
|
199
|
+
def search(query: str, limit: int, category: str | None, mode: str) -> None:
|
|
200
|
+
try:
|
|
201
|
+
config = load_config()
|
|
202
|
+
storage = Storage(config=config)
|
|
203
|
+
query_embedding: list[float] | None = None
|
|
204
|
+
|
|
205
|
+
if mode in {"vector", "hybrid"}:
|
|
206
|
+
embed_config = config.get("embedding", {})
|
|
207
|
+
embedder = create_embedder(EmbedConfig.from_dict(embed_config))
|
|
208
|
+
query_embedding = embedder.embed(query)
|
|
209
|
+
|
|
210
|
+
results = storage.search_memories_hybrid(
|
|
211
|
+
query=query,
|
|
212
|
+
query_embedding=query_embedding,
|
|
213
|
+
limit=limit,
|
|
214
|
+
category=category,
|
|
215
|
+
search_mode=mode,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if not results:
|
|
219
|
+
console.print(f"[yellow]No memories found matching: {query}[/yellow]")
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
console.print(f"[cyan]Found {len(results)} memories matching: {query}[/cyan]\n")
|
|
223
|
+
|
|
224
|
+
for idx, memory in enumerate(results, 1):
|
|
225
|
+
table = Table(show_header=False, box=None, padding=(0, 0))
|
|
226
|
+
table.add_column("", style="cyan")
|
|
227
|
+
table.add_column("")
|
|
228
|
+
|
|
229
|
+
table.add_row(f"[bold]#{idx}[/bold]", f"[dim]{memory['created_at']}[/dim]")
|
|
230
|
+
table.add_row("Category", f"[green]{memory['category']}[/green]")
|
|
231
|
+
table.add_row("Source", f"{memory['source_tool']} ({Path(memory['source_file']).name})")
|
|
232
|
+
table.add_row("Confidence", f"{memory['confidence']:.0%}")
|
|
233
|
+
table.add_row("Content", memory["content"])
|
|
234
|
+
|
|
235
|
+
console.print(table)
|
|
236
|
+
console.print()
|
|
237
|
+
|
|
238
|
+
except FileNotFoundError:
|
|
239
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
240
|
+
raise SystemExit(1) from None
|
|
241
|
+
except Exception as e:
|
|
242
|
+
console.print(f"[red]Error searching memories: {e}[/red]")
|
|
243
|
+
raise SystemExit(1) from None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@cli.command("migrate-embeddings")
|
|
247
|
+
@click.option("--batch-size", default=100, show_default=True, help="Batch size")
|
|
248
|
+
@click.option("--dry-run", is_flag=True, help="Preview migration without writing embeddings")
|
|
249
|
+
def migrate_embeddings(batch_size: int, dry_run: bool) -> None:
|
|
250
|
+
try:
|
|
251
|
+
config = load_config()
|
|
252
|
+
storage = Storage(config=config)
|
|
253
|
+
|
|
254
|
+
done_before, total = storage.get_embedding_progress()
|
|
255
|
+
console.print(
|
|
256
|
+
f"[blue]Embedding progress before migration:[/blue] {done_before}/{total} memories"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if done_before == total and total > 0:
|
|
260
|
+
console.print("[green]All active memories already have embeddings.[/green]")
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
embed_config = config.get("embedding", {})
|
|
264
|
+
embedder = create_embedder(EmbedConfig.from_dict(embed_config, batch_size=batch_size))
|
|
265
|
+
|
|
266
|
+
migration = EmbeddingMigration(
|
|
267
|
+
storage=storage,
|
|
268
|
+
embedder=embedder,
|
|
269
|
+
batch_size=batch_size,
|
|
270
|
+
)
|
|
271
|
+
result = migration.run(dry_run=dry_run)
|
|
272
|
+
|
|
273
|
+
done_after, total_after = storage.get_embedding_progress()
|
|
274
|
+
if dry_run:
|
|
275
|
+
console.print(
|
|
276
|
+
f"[yellow]Dry run complete.[/yellow] "
|
|
277
|
+
f"Would process {result.processed} memories."
|
|
278
|
+
)
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
console.print(
|
|
282
|
+
"[green]Migration complete.[/green] "
|
|
283
|
+
f"Processed={result.processed}, migrated={result.migrated}, "
|
|
284
|
+
f"failed={result.failed}, skipped={result.skipped}"
|
|
285
|
+
)
|
|
286
|
+
console.print(
|
|
287
|
+
f"[cyan]Embedding progress after migration:[/cyan] {done_after}/{total_after} memories"
|
|
288
|
+
)
|
|
289
|
+
except FileNotFoundError:
|
|
290
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
291
|
+
raise SystemExit(1) from None
|
|
292
|
+
except Exception as e:
|
|
293
|
+
console.print(f"[red]Error migrating embeddings: {e}[/red]")
|
|
294
|
+
raise SystemExit(1) from None
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@cli.command()
|
|
298
|
+
@click.option("--output", "-o", type=click.Path(), help="Output directory")
|
|
299
|
+
@click.option("--format", "-f", type=click.Choice(["markdown", "json"]), default="markdown")
|
|
300
|
+
@click.option(
|
|
301
|
+
"--group-by", "-g", type=click.Choice(["category", "date", "source"]), default="category"
|
|
302
|
+
)
|
|
303
|
+
@click.option("--category", "-c", help="Export only this category")
|
|
304
|
+
def export(output: str | None, format: str, group_by: str, category: str | None) -> None:
|
|
305
|
+
"""Export memories to files."""
|
|
306
|
+
try:
|
|
307
|
+
config = load_config()
|
|
308
|
+
storage = Storage(config=config)
|
|
309
|
+
|
|
310
|
+
output_dir = Path(output or config["export"]["output_dir"])
|
|
311
|
+
|
|
312
|
+
options = ExportOptions(output_dir=output_dir, format=format, group_by=group_by)
|
|
313
|
+
exporter = Exporter(storage, options)
|
|
314
|
+
|
|
315
|
+
if category:
|
|
316
|
+
console.print(f"[blue]Exporting memories for category: {category}[/blue]")
|
|
317
|
+
file_path = exporter.export_category(category)
|
|
318
|
+
console.print(f"[green]Exported to: {file_path}[/green]")
|
|
319
|
+
else:
|
|
320
|
+
console.print(f"[blue]Exporting memories (grouped by {group_by})...[/blue]")
|
|
321
|
+
files = exporter.export()
|
|
322
|
+
|
|
323
|
+
if not files:
|
|
324
|
+
console.print("[yellow]No memories found to export[/yellow]")
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
for file_path in files:
|
|
328
|
+
console.print(f"[green]Exported to: {file_path}[/green]")
|
|
329
|
+
|
|
330
|
+
except ValueError as e:
|
|
331
|
+
console.print(f"[red]Error: {e}[/red]")
|
|
332
|
+
raise SystemExit(1) from None
|
|
333
|
+
except FileNotFoundError:
|
|
334
|
+
console.print("[red]Config not found. Run 'oghma init' first.[/red]")
|
|
335
|
+
raise SystemExit(1) from None
|
|
336
|
+
except Exception as e:
|
|
337
|
+
console.print(f"[red]Error exporting memories: {e}[/red]")
|
|
338
|
+
raise SystemExit(1) from None
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def main() -> None:
|
|
342
|
+
cli()
|
oghma/config.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import TypedDict
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StorageConfig(TypedDict):
|
|
9
|
+
db_path: str
|
|
10
|
+
backup_enabled: bool
|
|
11
|
+
backup_dir: str
|
|
12
|
+
backup_retention_days: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DaemonConfig(TypedDict):
|
|
16
|
+
poll_interval: int
|
|
17
|
+
log_level: str
|
|
18
|
+
log_file: str
|
|
19
|
+
pid_file: str
|
|
20
|
+
min_messages: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ExtractionConfig(TypedDict):
|
|
24
|
+
model: str
|
|
25
|
+
max_content_chars: int
|
|
26
|
+
categories: list[str]
|
|
27
|
+
confidence_threshold: float
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ExportConfig(TypedDict):
|
|
31
|
+
output_dir: str
|
|
32
|
+
format: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EmbeddingConfig(TypedDict):
|
|
36
|
+
provider: str
|
|
37
|
+
model: str
|
|
38
|
+
dimensions: int
|
|
39
|
+
batch_size: int
|
|
40
|
+
rate_limit_delay: float
|
|
41
|
+
max_retries: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ToolConfig(TypedDict, total=False):
|
|
45
|
+
enabled: bool
|
|
46
|
+
paths: list[str]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ToolsConfig(TypedDict, total=False):
|
|
50
|
+
claude_code: ToolConfig
|
|
51
|
+
codex: ToolConfig
|
|
52
|
+
openclaw: ToolConfig
|
|
53
|
+
opencode: ToolConfig
|
|
54
|
+
cursor: ToolConfig
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Config(TypedDict):
|
|
58
|
+
storage: StorageConfig
|
|
59
|
+
daemon: DaemonConfig
|
|
60
|
+
extraction: ExtractionConfig
|
|
61
|
+
embedding: EmbeddingConfig
|
|
62
|
+
export: ExportConfig
|
|
63
|
+
tools: ToolsConfig
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
DEFAULT_CONFIG: Config = {
|
|
67
|
+
"storage": {
|
|
68
|
+
"db_path": "~/.oghma/oghma.db",
|
|
69
|
+
"backup_enabled": True,
|
|
70
|
+
"backup_dir": "~/.oghma/backups",
|
|
71
|
+
"backup_retention_days": 30,
|
|
72
|
+
},
|
|
73
|
+
"daemon": {
|
|
74
|
+
"poll_interval": 300,
|
|
75
|
+
"log_level": "INFO",
|
|
76
|
+
"log_file": "~/.oghma/oghma.log",
|
|
77
|
+
"pid_file": "~/.oghma/oghma.pid",
|
|
78
|
+
"min_messages": 6,
|
|
79
|
+
},
|
|
80
|
+
"extraction": {
|
|
81
|
+
"model": "gpt-4o-mini",
|
|
82
|
+
"max_content_chars": 4000,
|
|
83
|
+
"categories": ["learning", "preference", "project_context", "gotcha", "workflow"],
|
|
84
|
+
"confidence_threshold": 0.5,
|
|
85
|
+
},
|
|
86
|
+
"embedding": {
|
|
87
|
+
"provider": "openai",
|
|
88
|
+
"model": "text-embedding-3-small",
|
|
89
|
+
"dimensions": 1536,
|
|
90
|
+
"batch_size": 100,
|
|
91
|
+
"rate_limit_delay": 0.1,
|
|
92
|
+
"max_retries": 3,
|
|
93
|
+
},
|
|
94
|
+
"export": {
|
|
95
|
+
"output_dir": "~/.oghma/export",
|
|
96
|
+
"format": "markdown",
|
|
97
|
+
},
|
|
98
|
+
"tools": {
|
|
99
|
+
"claude_code": {"enabled": True, "paths": ["~/.claude/projects/-Users-*/*.jsonl"]},
|
|
100
|
+
"codex": {"enabled": True, "paths": ["~/.codex/sessions/**/rollout-*.jsonl"]},
|
|
101
|
+
"openclaw": {"enabled": True, "paths": ["~/.openclaw/agents/*/sessions/*.jsonl"]},
|
|
102
|
+
"opencode": {"enabled": True, "paths": ["~/.local/share/opencode/storage/message/ses_*"]},
|
|
103
|
+
"cursor": {"enabled": False, "paths": []},
|
|
104
|
+
},
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def expand_path(path: str) -> str:
|
|
109
|
+
return str(Path(path).expanduser())
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_config_path() -> Path:
|
|
113
|
+
return Path.home() / ".oghma" / "config.yaml"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_config() -> Config:
|
|
117
|
+
config_path = get_config_path()
|
|
118
|
+
|
|
119
|
+
if not config_path.exists():
|
|
120
|
+
return create_default_config()
|
|
121
|
+
|
|
122
|
+
with open(config_path) as f:
|
|
123
|
+
loaded = yaml.safe_load(f)
|
|
124
|
+
|
|
125
|
+
if not loaded:
|
|
126
|
+
return create_default_config()
|
|
127
|
+
|
|
128
|
+
from typing import cast
|
|
129
|
+
|
|
130
|
+
merged = _merge_defaults(cast(dict, DEFAULT_CONFIG), loaded)
|
|
131
|
+
merged = _apply_env_overrides(merged)
|
|
132
|
+
_expand_paths_inplace(merged)
|
|
133
|
+
|
|
134
|
+
return merged
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def create_default_config() -> Config:
|
|
138
|
+
from typing import cast
|
|
139
|
+
|
|
140
|
+
config_path = get_config_path()
|
|
141
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
config = _apply_env_overrides(cast(Config, DEFAULT_CONFIG.copy()))
|
|
144
|
+
_expand_paths_inplace(config)
|
|
145
|
+
|
|
146
|
+
with open(config_path, "w") as f:
|
|
147
|
+
yaml.dump(DEFAULT_CONFIG, f, default_flow_style=False)
|
|
148
|
+
|
|
149
|
+
return config
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _merge_defaults(defaults: dict, loaded: dict) -> Config:
|
|
153
|
+
from typing import cast
|
|
154
|
+
|
|
155
|
+
merged = defaults.copy()
|
|
156
|
+
for key, value in loaded.items():
|
|
157
|
+
if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
|
|
158
|
+
merged[key] = {**merged[key], **value}
|
|
159
|
+
else:
|
|
160
|
+
merged[key] = value
|
|
161
|
+
return cast(Config, merged)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _apply_env_overrides(config: Config) -> Config:
|
|
165
|
+
overrides = {
|
|
166
|
+
"OGHMA_DB_PATH": ("storage", "db_path"),
|
|
167
|
+
"OGHMA_BACKUP_DIR": ("storage", "backup_dir"),
|
|
168
|
+
"OGHMA_POLL_INTERVAL": ("daemon", "poll_interval"),
|
|
169
|
+
"OGHMA_LOG_LEVEL": ("daemon", "log_level"),
|
|
170
|
+
"OGHMA_LOG_FILE": ("daemon", "log_file"),
|
|
171
|
+
"OGHMA_PID_FILE": ("daemon", "pid_file"),
|
|
172
|
+
"OGHMA_EXPORT_DIR": ("export", "output_dir"),
|
|
173
|
+
"OGHMA_EXPORT_FORMAT": ("export", "format"),
|
|
174
|
+
"OGHMA_EXTRACTION_MODEL": ("extraction", "model"),
|
|
175
|
+
"OGHMA_EMBEDDING_PROVIDER": ("embedding", "provider"),
|
|
176
|
+
"OGHMA_EMBEDDING_MODEL": ("embedding", "model"),
|
|
177
|
+
"OGHMA_EMBEDDING_DIMENSIONS": ("embedding", "dimensions"),
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
for env_var, (section, key) in overrides.items():
|
|
181
|
+
value = os.environ.get(env_var)
|
|
182
|
+
if value is not None:
|
|
183
|
+
if key in [
|
|
184
|
+
"poll_interval",
|
|
185
|
+
"backup_retention_days",
|
|
186
|
+
"dimensions",
|
|
187
|
+
"batch_size",
|
|
188
|
+
"max_retries",
|
|
189
|
+
]:
|
|
190
|
+
config[section][key] = int(value)
|
|
191
|
+
elif key in ["backup_enabled"]:
|
|
192
|
+
config[section][key] = value.lower() in ("true", "1", "yes")
|
|
193
|
+
elif key in ["confidence_threshold", "rate_limit_delay"]:
|
|
194
|
+
config[section][key] = float(value)
|
|
195
|
+
else:
|
|
196
|
+
config[section][key] = value
|
|
197
|
+
|
|
198
|
+
return config
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _expand_paths_inplace(config: Config) -> None:
|
|
202
|
+
path_keys = [
|
|
203
|
+
("storage", "db_path"),
|
|
204
|
+
("storage", "backup_dir"),
|
|
205
|
+
("daemon", "log_file"),
|
|
206
|
+
("daemon", "pid_file"),
|
|
207
|
+
("export", "output_dir"),
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
for section, key in path_keys:
|
|
211
|
+
if section in config and key in config[section]:
|
|
212
|
+
config[section][key] = expand_path(config[section][key])
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def validate_config(config: Config) -> list[str]:
|
|
216
|
+
errors: list[str] = []
|
|
217
|
+
|
|
218
|
+
required_sections = ["storage", "daemon", "extraction", "embedding", "export"]
|
|
219
|
+
for section in required_sections:
|
|
220
|
+
if section not in config:
|
|
221
|
+
errors.append(f"Missing required section: {section}")
|
|
222
|
+
|
|
223
|
+
if "storage" in config:
|
|
224
|
+
storage = config["storage"]
|
|
225
|
+
if "db_path" not in storage or not storage["db_path"]:
|
|
226
|
+
errors.append("storage.db_path is required")
|
|
227
|
+
|
|
228
|
+
if "daemon" in config:
|
|
229
|
+
daemon = config["daemon"]
|
|
230
|
+
if "poll_interval" not in daemon or daemon["poll_interval"] <= 0:
|
|
231
|
+
errors.append("daemon.poll_interval must be positive")
|
|
232
|
+
if "log_level" not in daemon or daemon["log_level"] not in [
|
|
233
|
+
"DEBUG",
|
|
234
|
+
"INFO",
|
|
235
|
+
"WARNING",
|
|
236
|
+
"ERROR",
|
|
237
|
+
]:
|
|
238
|
+
errors.append("daemon.log_level must be DEBUG, INFO, WARNING, or ERROR")
|
|
239
|
+
|
|
240
|
+
if "extraction" in config:
|
|
241
|
+
extraction = config["extraction"]
|
|
242
|
+
if "model" not in extraction or not extraction["model"]:
|
|
243
|
+
errors.append("extraction.model is required")
|
|
244
|
+
if "categories" not in extraction or not extraction["categories"]:
|
|
245
|
+
errors.append("extraction.categories must not be empty")
|
|
246
|
+
|
|
247
|
+
if "embedding" in config:
|
|
248
|
+
embedding = config["embedding"]
|
|
249
|
+
if "provider" not in embedding or not embedding["provider"]:
|
|
250
|
+
errors.append("embedding.provider is required")
|
|
251
|
+
if "model" not in embedding or not embedding["model"]:
|
|
252
|
+
errors.append("embedding.model is required")
|
|
253
|
+
if "dimensions" not in embedding or embedding["dimensions"] <= 0:
|
|
254
|
+
errors.append("embedding.dimensions must be positive")
|
|
255
|
+
|
|
256
|
+
return errors
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def get_db_path(config: Config | None = None) -> str:
|
|
260
|
+
if config is None:
|
|
261
|
+
config = load_config()
|
|
262
|
+
return config["storage"]["db_path"]
|