oghma 0.0.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oghma/__init__.py CHANGED
@@ -1,3 +1 @@
1
- """Oghma Unified AI memory layer."""
2
-
3
- __version__ = "0.0.1"
1
+ __version__ = "0.3.0"
oghma/cli.py ADDED
@@ -0,0 +1,342 @@
1
+ import os
2
+ import signal
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import click
7
+ from rich.console import Console
8
+ from rich.table import Table
9
+
10
+ from oghma import __version__
11
+ from oghma.config import (
12
+ create_default_config,
13
+ get_config_path,
14
+ load_config,
15
+ validate_config,
16
+ )
17
+ from oghma.daemon import Daemon, get_daemon_pid
18
+ from oghma.embedder import EmbedConfig, create_embedder
19
+ from oghma.exporter import Exporter, ExportOptions
20
+ from oghma.migration import EmbeddingMigration
21
+ from oghma.storage import Storage
22
+
23
+ console = Console()
24
+
25
+
26
+ @click.group()
27
+ @click.version_option(version=__version__, prog_name="oghma")
28
+ def cli() -> None:
29
+ pass
30
+
31
+
32
+ @cli.command()
33
+ def init() -> None:
34
+ config_path = get_config_path()
35
+
36
+ if config_path.exists():
37
+ console.print(f"[yellow]Config already exists at {config_path}[/yellow]")
38
+ if not click.confirm("Overwrite existing config?"):
39
+ console.print("[green]Init cancelled[/green]")
40
+ return
41
+
42
+ console.print("[blue]Creating Oghma configuration...[/blue]")
43
+ config = create_default_config()
44
+ console.print(f"[green]Config created at {config_path}[/green]")
45
+ console.print(f"[cyan]Database path: {config['storage']['db_path']}[/cyan]")
46
+ console.print("\n[yellow]Run 'oghma status' to verify setup[/yellow]")
47
+
48
+
49
+ @cli.command()
50
+ def status() -> None:
51
+ try:
52
+ config_path = get_config_path()
53
+ config = load_config()
54
+ db_path = config["storage"]["db_path"]
55
+ pid_file = config["daemon"]["pid_file"]
56
+
57
+ table = Table(title="Oghma Status", show_header=True, header_style="bold magenta")
58
+ table.add_column("Property", style="cyan")
59
+ table.add_column("Value", style="green")
60
+
61
+ table.add_row("Config Path", str(config_path))
62
+
63
+ pid = get_daemon_pid(pid_file)
64
+ if pid:
65
+ table.add_row("Daemon Status", f"[green]Running (PID: {pid})[/green]")
66
+ else:
67
+ table.add_row("Daemon Status", "[red]Stopped[/red]")
68
+
69
+ table.add_row("Database Path", db_path)
70
+
71
+ if Path(db_path).exists():
72
+ storage = Storage(db_path, config)
73
+ memory_count = storage.get_memory_count()
74
+ table.add_row("Memory Count", str(memory_count))
75
+
76
+ logs = storage.get_recent_extraction_logs(limit=1)
77
+ if logs:
78
+ last_extraction = logs[0]["created_at"]
79
+ table.add_row("Last Extraction", last_extraction)
80
+ else:
81
+ table.add_row("Last Extraction", "Never")
82
+
83
+ table.add_row("Database Status", "[green]Exists[/green]")
84
+
85
+ from oghma.watcher import Watcher
86
+
87
+ watcher = Watcher(config, storage)
88
+ watched_files = watcher.discover_files()
89
+ table.add_row("Watched Files", str(len(watched_files)))
90
+ else:
91
+ table.add_row("Memory Count", "0")
92
+ table.add_row("Last Extraction", "Never")
93
+ table.add_row("Database Status", "[yellow]Not created yet[/yellow]")
94
+ table.add_row("Watched Files", "0")
95
+
96
+ console.print(table)
97
+
98
+ errors = validate_config(config)
99
+ if errors:
100
+ console.print("\n[red]Configuration errors:[/red]")
101
+ for error in errors:
102
+ console.print(f" [red]- {error}[/red]")
103
+
104
+ except FileNotFoundError:
105
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
106
+ except Exception as e:
107
+ console.print(f"[red]Error: {e}[/red]")
108
+
109
+
110
+ @cli.command()
111
+ @click.option("--foreground", "-f", is_flag=True, help="Run in foreground (don't daemonize)")
112
+ def start(foreground: bool) -> None:
113
+ try:
114
+ config = load_config()
115
+ pid_file = config["daemon"]["pid_file"]
116
+
117
+ pid = get_daemon_pid(pid_file)
118
+ if pid:
119
+ console.print(f"[red]Daemon already running (PID: {pid})[/red]")
120
+ console.print("Use 'oghma stop' to stop it first.")
121
+ raise SystemExit(1)
122
+
123
+ console.print("[blue]Starting Oghma daemon...[/blue]")
124
+
125
+ if not foreground:
126
+ try:
127
+ pid = os.fork()
128
+ if pid > 0:
129
+ console.print(f"[green]Daemon started in background (PID: {pid})[/green]")
130
+ return
131
+ except OSError as e:
132
+ console.print(f"[yellow]Fork failed: {e}. Running in foreground.[/yellow]")
133
+
134
+ daemon = Daemon(config)
135
+ daemon.start()
136
+
137
+ except FileNotFoundError:
138
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
139
+ raise SystemExit(1) from None
140
+ except Exception as e:
141
+ console.print(f"[red]Error starting daemon: {e}[/red]")
142
+ raise SystemExit(1) from None
143
+
144
+
145
+ @cli.command()
146
+ def stop() -> None:
147
+ try:
148
+ config = load_config()
149
+ pid_file = config["daemon"]["pid_file"]
150
+
151
+ pid = get_daemon_pid(pid_file)
152
+ if not pid:
153
+ console.print("[yellow]Daemon is not running[/yellow]")
154
+ return
155
+
156
+ console.print(f"[blue]Stopping daemon (PID: {pid})...[/blue]")
157
+
158
+ try:
159
+ os.kill(pid, signal.SIGTERM)
160
+ except ProcessLookupError:
161
+ console.print("[yellow]Daemon process not found. Cleaning up PID file.[/yellow]")
162
+ Path(pid_file).unlink(missing_ok=True)
163
+ return
164
+
165
+ for _ in range(10):
166
+ time.sleep(0.5)
167
+ if not get_daemon_pid(pid_file):
168
+ console.print("[green]Daemon stopped successfully[/green]")
169
+ return
170
+
171
+ console.print("[yellow]Daemon did not stop gracefully. Sending SIGKILL...[/yellow]")
172
+ try:
173
+ os.kill(pid, signal.SIGKILL)
174
+ except ProcessLookupError:
175
+ pass
176
+
177
+ Path(pid_file).unlink(missing_ok=True)
178
+ console.print("[green]Daemon force stopped[/green]")
179
+
180
+ except FileNotFoundError:
181
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
182
+ raise SystemExit(1) from None
183
+ except Exception as e:
184
+ console.print(f"[red]Error stopping daemon: {e}[/red]")
185
+ raise SystemExit(1) from None
186
+
187
+
188
+ @cli.command()
189
+ @click.argument("query")
190
+ @click.option("--limit", "-n", default=10, help="Max results")
191
+ @click.option("--category", "-c", help="Filter by category")
192
+ @click.option(
193
+ "--mode",
194
+ type=click.Choice(["keyword", "vector", "hybrid"]),
195
+ default="keyword",
196
+ show_default=True,
197
+ help="Search strategy",
198
+ )
199
+ def search(query: str, limit: int, category: str | None, mode: str) -> None:
200
+ try:
201
+ config = load_config()
202
+ storage = Storage(config=config)
203
+ query_embedding: list[float] | None = None
204
+
205
+ if mode in {"vector", "hybrid"}:
206
+ embed_config = config.get("embedding", {})
207
+ embedder = create_embedder(EmbedConfig.from_dict(embed_config))
208
+ query_embedding = embedder.embed(query)
209
+
210
+ results = storage.search_memories_hybrid(
211
+ query=query,
212
+ query_embedding=query_embedding,
213
+ limit=limit,
214
+ category=category,
215
+ search_mode=mode,
216
+ )
217
+
218
+ if not results:
219
+ console.print(f"[yellow]No memories found matching: {query}[/yellow]")
220
+ return
221
+
222
+ console.print(f"[cyan]Found {len(results)} memories matching: {query}[/cyan]\n")
223
+
224
+ for idx, memory in enumerate(results, 1):
225
+ table = Table(show_header=False, box=None, padding=(0, 0))
226
+ table.add_column("", style="cyan")
227
+ table.add_column("")
228
+
229
+ table.add_row(f"[bold]#{idx}[/bold]", f"[dim]{memory['created_at']}[/dim]")
230
+ table.add_row("Category", f"[green]{memory['category']}[/green]")
231
+ table.add_row("Source", f"{memory['source_tool']} ({Path(memory['source_file']).name})")
232
+ table.add_row("Confidence", f"{memory['confidence']:.0%}")
233
+ table.add_row("Content", memory["content"])
234
+
235
+ console.print(table)
236
+ console.print()
237
+
238
+ except FileNotFoundError:
239
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
240
+ raise SystemExit(1) from None
241
+ except Exception as e:
242
+ console.print(f"[red]Error searching memories: {e}[/red]")
243
+ raise SystemExit(1) from None
244
+
245
+
246
+ @cli.command("migrate-embeddings")
247
+ @click.option("--batch-size", default=100, show_default=True, help="Batch size")
248
+ @click.option("--dry-run", is_flag=True, help="Preview migration without writing embeddings")
249
+ def migrate_embeddings(batch_size: int, dry_run: bool) -> None:
250
+ try:
251
+ config = load_config()
252
+ storage = Storage(config=config)
253
+
254
+ done_before, total = storage.get_embedding_progress()
255
+ console.print(
256
+ f"[blue]Embedding progress before migration:[/blue] {done_before}/{total} memories"
257
+ )
258
+
259
+ if done_before == total and total > 0:
260
+ console.print("[green]All active memories already have embeddings.[/green]")
261
+ return
262
+
263
+ embed_config = config.get("embedding", {})
264
+ embedder = create_embedder(EmbedConfig.from_dict(embed_config, batch_size=batch_size))
265
+
266
+ migration = EmbeddingMigration(
267
+ storage=storage,
268
+ embedder=embedder,
269
+ batch_size=batch_size,
270
+ )
271
+ result = migration.run(dry_run=dry_run)
272
+
273
+ done_after, total_after = storage.get_embedding_progress()
274
+ if dry_run:
275
+ console.print(
276
+ f"[yellow]Dry run complete.[/yellow] "
277
+ f"Would process {result.processed} memories."
278
+ )
279
+ return
280
+
281
+ console.print(
282
+ "[green]Migration complete.[/green] "
283
+ f"Processed={result.processed}, migrated={result.migrated}, "
284
+ f"failed={result.failed}, skipped={result.skipped}"
285
+ )
286
+ console.print(
287
+ f"[cyan]Embedding progress after migration:[/cyan] {done_after}/{total_after} memories"
288
+ )
289
+ except FileNotFoundError:
290
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
291
+ raise SystemExit(1) from None
292
+ except Exception as e:
293
+ console.print(f"[red]Error migrating embeddings: {e}[/red]")
294
+ raise SystemExit(1) from None
295
+
296
+
297
+ @cli.command()
298
+ @click.option("--output", "-o", type=click.Path(), help="Output directory")
299
+ @click.option("--format", "-f", type=click.Choice(["markdown", "json"]), default="markdown")
300
+ @click.option(
301
+ "--group-by", "-g", type=click.Choice(["category", "date", "source"]), default="category"
302
+ )
303
+ @click.option("--category", "-c", help="Export only this category")
304
+ def export(output: str | None, format: str, group_by: str, category: str | None) -> None:
305
+ """Export memories to files."""
306
+ try:
307
+ config = load_config()
308
+ storage = Storage(config=config)
309
+
310
+ output_dir = Path(output or config["export"]["output_dir"])
311
+
312
+ options = ExportOptions(output_dir=output_dir, format=format, group_by=group_by)
313
+ exporter = Exporter(storage, options)
314
+
315
+ if category:
316
+ console.print(f"[blue]Exporting memories for category: {category}[/blue]")
317
+ file_path = exporter.export_category(category)
318
+ console.print(f"[green]Exported to: {file_path}[/green]")
319
+ else:
320
+ console.print(f"[blue]Exporting memories (grouped by {group_by})...[/blue]")
321
+ files = exporter.export()
322
+
323
+ if not files:
324
+ console.print("[yellow]No memories found to export[/yellow]")
325
+ return
326
+
327
+ for file_path in files:
328
+ console.print(f"[green]Exported to: {file_path}[/green]")
329
+
330
+ except ValueError as e:
331
+ console.print(f"[red]Error: {e}[/red]")
332
+ raise SystemExit(1) from None
333
+ except FileNotFoundError:
334
+ console.print("[red]Config not found. Run 'oghma init' first.[/red]")
335
+ raise SystemExit(1) from None
336
+ except Exception as e:
337
+ console.print(f"[red]Error exporting memories: {e}[/red]")
338
+ raise SystemExit(1) from None
339
+
340
+
341
+ def main() -> None:
342
+ cli()
oghma/config.py ADDED
@@ -0,0 +1,262 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import TypedDict
4
+
5
+ import yaml
6
+
7
+
8
+ class StorageConfig(TypedDict):
9
+ db_path: str
10
+ backup_enabled: bool
11
+ backup_dir: str
12
+ backup_retention_days: int
13
+
14
+
15
+ class DaemonConfig(TypedDict):
16
+ poll_interval: int
17
+ log_level: str
18
+ log_file: str
19
+ pid_file: str
20
+ min_messages: int
21
+
22
+
23
+ class ExtractionConfig(TypedDict):
24
+ model: str
25
+ max_content_chars: int
26
+ categories: list[str]
27
+ confidence_threshold: float
28
+
29
+
30
+ class ExportConfig(TypedDict):
31
+ output_dir: str
32
+ format: str
33
+
34
+
35
+ class EmbeddingConfig(TypedDict):
36
+ provider: str
37
+ model: str
38
+ dimensions: int
39
+ batch_size: int
40
+ rate_limit_delay: float
41
+ max_retries: int
42
+
43
+
44
+ class ToolConfig(TypedDict, total=False):
45
+ enabled: bool
46
+ paths: list[str]
47
+
48
+
49
+ class ToolsConfig(TypedDict, total=False):
50
+ claude_code: ToolConfig
51
+ codex: ToolConfig
52
+ openclaw: ToolConfig
53
+ opencode: ToolConfig
54
+ cursor: ToolConfig
55
+
56
+
57
+ class Config(TypedDict):
58
+ storage: StorageConfig
59
+ daemon: DaemonConfig
60
+ extraction: ExtractionConfig
61
+ embedding: EmbeddingConfig
62
+ export: ExportConfig
63
+ tools: ToolsConfig
64
+
65
+
66
+ DEFAULT_CONFIG: Config = {
67
+ "storage": {
68
+ "db_path": "~/.oghma/oghma.db",
69
+ "backup_enabled": True,
70
+ "backup_dir": "~/.oghma/backups",
71
+ "backup_retention_days": 30,
72
+ },
73
+ "daemon": {
74
+ "poll_interval": 300,
75
+ "log_level": "INFO",
76
+ "log_file": "~/.oghma/oghma.log",
77
+ "pid_file": "~/.oghma/oghma.pid",
78
+ "min_messages": 6,
79
+ },
80
+ "extraction": {
81
+ "model": "gpt-4o-mini",
82
+ "max_content_chars": 4000,
83
+ "categories": ["learning", "preference", "project_context", "gotcha", "workflow"],
84
+ "confidence_threshold": 0.5,
85
+ },
86
+ "embedding": {
87
+ "provider": "openai",
88
+ "model": "text-embedding-3-small",
89
+ "dimensions": 1536,
90
+ "batch_size": 100,
91
+ "rate_limit_delay": 0.1,
92
+ "max_retries": 3,
93
+ },
94
+ "export": {
95
+ "output_dir": "~/.oghma/export",
96
+ "format": "markdown",
97
+ },
98
+ "tools": {
99
+ "claude_code": {"enabled": True, "paths": ["~/.claude/projects/-Users-*/*.jsonl"]},
100
+ "codex": {"enabled": True, "paths": ["~/.codex/sessions/**/rollout-*.jsonl"]},
101
+ "openclaw": {"enabled": True, "paths": ["~/.openclaw/agents/*/sessions/*.jsonl"]},
102
+ "opencode": {"enabled": True, "paths": ["~/.local/share/opencode/storage/message/ses_*"]},
103
+ "cursor": {"enabled": False, "paths": []},
104
+ },
105
+ }
106
+
107
+
108
+ def expand_path(path: str) -> str:
109
+ return str(Path(path).expanduser())
110
+
111
+
112
+ def get_config_path() -> Path:
113
+ return Path.home() / ".oghma" / "config.yaml"
114
+
115
+
116
+ def load_config() -> Config:
117
+ config_path = get_config_path()
118
+
119
+ if not config_path.exists():
120
+ return create_default_config()
121
+
122
+ with open(config_path) as f:
123
+ loaded = yaml.safe_load(f)
124
+
125
+ if not loaded:
126
+ return create_default_config()
127
+
128
+ from typing import cast
129
+
130
+ merged = _merge_defaults(cast(dict, DEFAULT_CONFIG), loaded)
131
+ merged = _apply_env_overrides(merged)
132
+ _expand_paths_inplace(merged)
133
+
134
+ return merged
135
+
136
+
137
+ def create_default_config() -> Config:
138
+ from typing import cast
139
+
140
+ config_path = get_config_path()
141
+ config_path.parent.mkdir(parents=True, exist_ok=True)
142
+
143
+ config = _apply_env_overrides(cast(Config, DEFAULT_CONFIG.copy()))
144
+ _expand_paths_inplace(config)
145
+
146
+ with open(config_path, "w") as f:
147
+ yaml.dump(DEFAULT_CONFIG, f, default_flow_style=False)
148
+
149
+ return config
150
+
151
+
152
+ def _merge_defaults(defaults: dict, loaded: dict) -> Config:
153
+ from typing import cast
154
+
155
+ merged = defaults.copy()
156
+ for key, value in loaded.items():
157
+ if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
158
+ merged[key] = {**merged[key], **value}
159
+ else:
160
+ merged[key] = value
161
+ return cast(Config, merged)
162
+
163
+
164
+ def _apply_env_overrides(config: Config) -> Config:
165
+ overrides = {
166
+ "OGHMA_DB_PATH": ("storage", "db_path"),
167
+ "OGHMA_BACKUP_DIR": ("storage", "backup_dir"),
168
+ "OGHMA_POLL_INTERVAL": ("daemon", "poll_interval"),
169
+ "OGHMA_LOG_LEVEL": ("daemon", "log_level"),
170
+ "OGHMA_LOG_FILE": ("daemon", "log_file"),
171
+ "OGHMA_PID_FILE": ("daemon", "pid_file"),
172
+ "OGHMA_EXPORT_DIR": ("export", "output_dir"),
173
+ "OGHMA_EXPORT_FORMAT": ("export", "format"),
174
+ "OGHMA_EXTRACTION_MODEL": ("extraction", "model"),
175
+ "OGHMA_EMBEDDING_PROVIDER": ("embedding", "provider"),
176
+ "OGHMA_EMBEDDING_MODEL": ("embedding", "model"),
177
+ "OGHMA_EMBEDDING_DIMENSIONS": ("embedding", "dimensions"),
178
+ }
179
+
180
+ for env_var, (section, key) in overrides.items():
181
+ value = os.environ.get(env_var)
182
+ if value is not None:
183
+ if key in [
184
+ "poll_interval",
185
+ "backup_retention_days",
186
+ "dimensions",
187
+ "batch_size",
188
+ "max_retries",
189
+ ]:
190
+ config[section][key] = int(value)
191
+ elif key in ["backup_enabled"]:
192
+ config[section][key] = value.lower() in ("true", "1", "yes")
193
+ elif key in ["confidence_threshold", "rate_limit_delay"]:
194
+ config[section][key] = float(value)
195
+ else:
196
+ config[section][key] = value
197
+
198
+ return config
199
+
200
+
201
+ def _expand_paths_inplace(config: Config) -> None:
202
+ path_keys = [
203
+ ("storage", "db_path"),
204
+ ("storage", "backup_dir"),
205
+ ("daemon", "log_file"),
206
+ ("daemon", "pid_file"),
207
+ ("export", "output_dir"),
208
+ ]
209
+
210
+ for section, key in path_keys:
211
+ if section in config and key in config[section]:
212
+ config[section][key] = expand_path(config[section][key])
213
+
214
+
215
+ def validate_config(config: Config) -> list[str]:
216
+ errors: list[str] = []
217
+
218
+ required_sections = ["storage", "daemon", "extraction", "embedding", "export"]
219
+ for section in required_sections:
220
+ if section not in config:
221
+ errors.append(f"Missing required section: {section}")
222
+
223
+ if "storage" in config:
224
+ storage = config["storage"]
225
+ if "db_path" not in storage or not storage["db_path"]:
226
+ errors.append("storage.db_path is required")
227
+
228
+ if "daemon" in config:
229
+ daemon = config["daemon"]
230
+ if "poll_interval" not in daemon or daemon["poll_interval"] <= 0:
231
+ errors.append("daemon.poll_interval must be positive")
232
+ if "log_level" not in daemon or daemon["log_level"] not in [
233
+ "DEBUG",
234
+ "INFO",
235
+ "WARNING",
236
+ "ERROR",
237
+ ]:
238
+ errors.append("daemon.log_level must be DEBUG, INFO, WARNING, or ERROR")
239
+
240
+ if "extraction" in config:
241
+ extraction = config["extraction"]
242
+ if "model" not in extraction or not extraction["model"]:
243
+ errors.append("extraction.model is required")
244
+ if "categories" not in extraction or not extraction["categories"]:
245
+ errors.append("extraction.categories must not be empty")
246
+
247
+ if "embedding" in config:
248
+ embedding = config["embedding"]
249
+ if "provider" not in embedding or not embedding["provider"]:
250
+ errors.append("embedding.provider is required")
251
+ if "model" not in embedding or not embedding["model"]:
252
+ errors.append("embedding.model is required")
253
+ if "dimensions" not in embedding or embedding["dimensions"] <= 0:
254
+ errors.append("embedding.dimensions must be positive")
255
+
256
+ return errors
257
+
258
+
259
+ def get_db_path(config: Config | None = None) -> str:
260
+ if config is None:
261
+ config = load_config()
262
+ return config["storage"]["db_path"]