oghma 0.0.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oghma/mcp_server.py ADDED
@@ -0,0 +1,112 @@
1
+ from collections import Counter
2
+ from contextlib import asynccontextmanager
3
+ from typing import Any
4
+
5
+ from mcp.server.fastmcp import FastMCP
6
+
7
+ from oghma.config import load_config
8
+ from oghma.embedder import EmbedConfig, create_embedder
9
+ from oghma.storage import MemoryRecord, Storage
10
+
11
+
12
+ @asynccontextmanager
13
+ async def lifespan(_: FastMCP):
14
+ config = load_config()
15
+ storage = Storage(config=config, read_only=True)
16
+ yield {"storage": storage, "config": config}
17
+
18
+
19
+ mcp = FastMCP("Oghma Memory", lifespan=lifespan)
20
+
21
+
22
+ def _get_storage() -> Storage:
23
+ return mcp.get_context()["storage"]
24
+
25
+
26
+ def _get_config() -> dict[str, Any]:
27
+ return mcp.get_context().get("config", {})
28
+
29
+
30
+ @mcp.tool()
31
+ def oghma_search(
32
+ query: str,
33
+ category: str | None = None,
34
+ source_tool: str | None = None,
35
+ limit: int = 10,
36
+ search_mode: str = "keyword",
37
+ ) -> list[MemoryRecord]:
38
+ """Search memories by keyword, vector, or hybrid mode."""
39
+ if limit < 1:
40
+ raise ValueError("limit must be >= 1")
41
+ if search_mode not in {"keyword", "vector", "hybrid"}:
42
+ raise ValueError("search_mode must be one of: keyword, vector, hybrid")
43
+
44
+ storage = _get_storage()
45
+ if search_mode == "keyword":
46
+ return storage.search_memories(
47
+ query=query,
48
+ category=category,
49
+ source_tool=source_tool,
50
+ limit=limit,
51
+ )
52
+
53
+ query_embedding: list[float] | None = None
54
+ try:
55
+ embed_config = _get_config().get("embedding", {})
56
+ embedder = create_embedder(EmbedConfig.from_dict(embed_config))
57
+ query_embedding = embedder.embed(query)
58
+ except Exception:
59
+ if search_mode == "vector":
60
+ return []
61
+
62
+ return storage.search_memories_hybrid(
63
+ query=query,
64
+ query_embedding=query_embedding,
65
+ category=category,
66
+ source_tool=source_tool,
67
+ limit=limit,
68
+ search_mode=search_mode,
69
+ )
70
+
71
+
72
+ @mcp.tool()
73
+ def oghma_get(memory_id: int) -> MemoryRecord | None:
74
+ """Get a memory by ID."""
75
+ storage = _get_storage()
76
+ return storage.get_memory_by_id(memory_id)
77
+
78
+
79
+ @mcp.tool()
80
+ def oghma_stats() -> dict[str, Any]:
81
+ """Get memory database statistics."""
82
+ storage = _get_storage()
83
+ memories = storage.get_all_memories(status="active")
84
+ extraction_logs = storage.get_recent_extraction_logs(limit=1)
85
+
86
+ return {
87
+ "total_memories": storage.get_memory_count(),
88
+ "memories_by_category": dict(Counter(memory["category"] for memory in memories)),
89
+ "memories_by_source": dict(Counter(memory["source_tool"] for memory in memories)),
90
+ "last_extraction_time": extraction_logs[0]["created_at"] if extraction_logs else None,
91
+ }
92
+
93
+
94
+ @mcp.tool()
95
+ def oghma_categories() -> list[dict[str, Any]]:
96
+ """List categories with memory counts."""
97
+ storage = _get_storage()
98
+ memories = storage.get_all_memories(status="active")
99
+ category_counts = Counter(memory["category"] for memory in memories)
100
+
101
+ return [
102
+ {"category": category, "count": count}
103
+ for category, count in sorted(category_counts.items(), key=lambda item: (-item[1], item[0]))
104
+ ]
105
+
106
+
107
+ def main() -> None:
108
+ mcp.run()
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
oghma/migration.py ADDED
@@ -0,0 +1,63 @@
1
+ from dataclasses import dataclass
2
+
3
+ from openai import APIError
4
+
5
+ from oghma.embedder import Embedder
6
+ from oghma.storage import Storage
7
+
8
+
9
+ @dataclass
10
+ class MigrationResult:
11
+ processed: int
12
+ migrated: int
13
+ skipped: int
14
+ failed: int
15
+
16
+
17
+ class EmbeddingMigration:
18
+ def __init__(
19
+ self,
20
+ storage: Storage,
21
+ embedder: Embedder,
22
+ batch_size: int = 100,
23
+ ):
24
+ self.storage = storage
25
+ self.embedder = embedder
26
+ self.batch_size = batch_size
27
+
28
+ def run(self, dry_run: bool = False) -> MigrationResult:
29
+ processed = 0
30
+ migrated = 0
31
+ failed = 0
32
+
33
+ while True:
34
+ batch = self.storage.get_memories_without_embeddings(limit=self.batch_size)
35
+ if not batch:
36
+ break
37
+
38
+ contents = [memory["content"] for memory in batch]
39
+ processed += len(batch)
40
+
41
+ if dry_run:
42
+ continue
43
+
44
+ try:
45
+ vectors = self.embedder.embed_batch(contents)
46
+ except (APIError, RuntimeError, ValueError):
47
+ failed += len(batch)
48
+ continue
49
+
50
+ for memory, vector in zip(batch, vectors, strict=False):
51
+ success = self.storage.upsert_memory_embedding(memory["id"], vector)
52
+ if success:
53
+ migrated += 1
54
+ else:
55
+ failed += 1
56
+
57
+ skipped = max(processed - migrated - failed, 0)
58
+ return MigrationResult(
59
+ processed=processed,
60
+ migrated=migrated,
61
+ skipped=skipped,
62
+ failed=failed,
63
+ )
@@ -0,0 +1,26 @@
1
+ from pathlib import Path
2
+
3
+ from oghma.parsers.base import BaseParser
4
+ from oghma.parsers.base import Message as _Message
5
+ from oghma.parsers.claude_code import ClaudeCodeParser
6
+ from oghma.parsers.codex import CodexParser
7
+ from oghma.parsers.openclaw import OpenClawParser
8
+ from oghma.parsers.opencode import OpenCodeParser
9
+
10
+ __all__ = ["BaseParser", "Message", "get_parser_for_file", "PARSERS"]
11
+
12
+ Message = _Message
13
+
14
+ PARSERS: list[BaseParser] = [
15
+ ClaudeCodeParser(),
16
+ CodexParser(),
17
+ OpenClawParser(),
18
+ OpenCodeParser(),
19
+ ]
20
+
21
+
22
+ def get_parser_for_file(file_path: Path) -> BaseParser | None:
23
+ for parser in PARSERS:
24
+ if parser.can_parse(file_path):
25
+ return parser
26
+ return None
oghma/parsers/base.py ADDED
@@ -0,0 +1,24 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ pass
8
+
9
+
10
+ @dataclass
11
+ class Message:
12
+ role: str
13
+ content: str
14
+ timestamp: str | None = None
15
+
16
+
17
+ class BaseParser(ABC):
18
+ @abstractmethod
19
+ def parse(self, file_path: Path) -> list[Message]:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def can_parse(self, file_path: Path) -> bool:
24
+ pass
@@ -0,0 +1,62 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from oghma.parsers.base import BaseParser, Message
5
+
6
+
7
+ class ClaudeCodeParser(BaseParser):
8
+ def can_parse(self, file_path: Path) -> bool:
9
+ if not file_path.name.endswith(".jsonl"):
10
+ return False
11
+ path_str = str(file_path)
12
+ return ".claude/projects/-Users-" in path_str
13
+
14
+ def parse(self, file_path: Path) -> list[Message]:
15
+ messages: list[Message] = []
16
+
17
+ try:
18
+ with open(file_path, encoding="utf-8") as f:
19
+ for _line_num, line in enumerate(f, 1):
20
+ line = line.strip()
21
+ if not line:
22
+ continue
23
+
24
+ try:
25
+ data = json.loads(line)
26
+ role = self._extract_role(data)
27
+ content = self._extract_content(data)
28
+
29
+ if role and content:
30
+ messages.append(Message(role=role, content=content[:3000]))
31
+ except (json.JSONDecodeError, KeyError, TypeError):
32
+ continue
33
+ except (OSError, UnicodeDecodeError):
34
+ return []
35
+
36
+ return messages
37
+
38
+ def _extract_role(self, data: dict) -> str | None:
39
+ msg_type = data.get("type")
40
+ if msg_type == "user":
41
+ return "user"
42
+ elif msg_type == "assistant":
43
+ return "assistant"
44
+ return None
45
+
46
+ def _extract_content(self, data: dict) -> str:
47
+ message = data.get("message", {})
48
+ content = message.get("content", "")
49
+
50
+ if isinstance(content, str):
51
+ return content
52
+
53
+ if isinstance(content, list):
54
+ parts: list[str] = []
55
+ for block in content:
56
+ if isinstance(block, dict) and block.get("type") == "text":
57
+ text = block.get("text", "")
58
+ if text:
59
+ parts.append(text)
60
+ return "\n".join(parts)
61
+
62
+ return str(content)
oghma/parsers/codex.py ADDED
@@ -0,0 +1,84 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from oghma.parsers.base import BaseParser, Message
5
+
6
+
7
+ class CodexParser(BaseParser):
8
+ def can_parse(self, file_path: Path) -> bool:
9
+ if not file_path.name.endswith(".jsonl"):
10
+ return False
11
+ path_str = str(file_path)
12
+ return ".codex/sessions/" in path_str and "rollout-" in file_path.name
13
+
14
+ def parse(self, file_path: Path) -> list[Message]:
15
+ messages: list[Message] = []
16
+
17
+ try:
18
+ with open(file_path, encoding="utf-8") as f:
19
+ for line in f:
20
+ line = line.strip()
21
+ if not line:
22
+ continue
23
+
24
+ try:
25
+ data = json.loads(line)
26
+ role = self._extract_role(data)
27
+ content = self._extract_content(data)
28
+
29
+ if role and content:
30
+ messages.append(Message(role=role, content=content[:3000]))
31
+ except (json.JSONDecodeError, KeyError, TypeError):
32
+ continue
33
+ except (OSError, UnicodeDecodeError):
34
+ return []
35
+
36
+ return messages
37
+
38
+ def _extract_role(self, data: dict) -> str | None:
39
+ msg_type = data.get("type")
40
+ # Support both old format (item) and new format (response_item, event_msg)
41
+ if msg_type not in ("item", "response_item", "event_msg"):
42
+ return None
43
+
44
+ payload = data.get("payload", {})
45
+
46
+ # New format: role directly in payload
47
+ if "role" in payload:
48
+ role = payload.get("role")
49
+ # Map developer/assistant to assistant, user to user
50
+ if role in ("developer", "assistant"):
51
+ return "assistant"
52
+ elif role == "user":
53
+ return "user"
54
+ return None
55
+
56
+ # Old format: nested in payload.item
57
+ item = payload.get("item", {})
58
+ return item.get("role")
59
+
60
+ def _extract_content(self, data: dict) -> str:
61
+ payload = data.get("payload", {})
62
+
63
+ # New format: content directly in payload
64
+ if "content" in payload:
65
+ content = payload.get("content", "")
66
+ else:
67
+ # Old format: nested in payload.item
68
+ item = payload.get("item", {})
69
+ content = item.get("content", "")
70
+
71
+ if isinstance(content, str):
72
+ return content
73
+
74
+ if isinstance(content, list):
75
+ parts: list[str] = []
76
+ for block in content:
77
+ if isinstance(block, dict):
78
+ block_type = block.get("type")
79
+ text = block.get("text", "")
80
+ if text and block_type in ("input_text", "output_text", "text"):
81
+ parts.append(text)
82
+ return "\n".join(parts)
83
+
84
+ return str(content)
@@ -0,0 +1,64 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from oghma.parsers.base import BaseParser, Message
5
+
6
+
7
+ class OpenClawParser(BaseParser):
8
+ def can_parse(self, file_path: Path) -> bool:
9
+ if not file_path.name.endswith(".jsonl"):
10
+ return False
11
+ path_str = str(file_path)
12
+ return ".openclaw/agents/" in path_str and "/sessions/" in path_str
13
+
14
+ def parse(self, file_path: Path) -> list[Message]:
15
+ messages: list[Message] = []
16
+
17
+ try:
18
+ with open(file_path, encoding="utf-8") as f:
19
+ for line in f:
20
+ line = line.strip()
21
+ if not line:
22
+ continue
23
+
24
+ try:
25
+ data = json.loads(line)
26
+ role = self._extract_role(data)
27
+ content = self._extract_content(data)
28
+
29
+ if role and content:
30
+ messages.append(Message(role=role, content=content[:3000]))
31
+ except (json.JSONDecodeError, KeyError, TypeError):
32
+ continue
33
+ except (OSError, UnicodeDecodeError):
34
+ return []
35
+
36
+ return messages
37
+
38
+ def _extract_role(self, data: dict) -> str | None:
39
+ if data.get("type") != "message":
40
+ return None
41
+
42
+ message = data.get("message", {})
43
+ role = message.get("role")
44
+ if role in ("user", "assistant"):
45
+ return role
46
+ return None
47
+
48
+ def _extract_content(self, data: dict) -> str:
49
+ message = data.get("message", {})
50
+ content = message.get("content", "")
51
+
52
+ if isinstance(content, str):
53
+ return content
54
+
55
+ if isinstance(content, list):
56
+ parts: list[str] = []
57
+ for item in content:
58
+ if isinstance(item, dict) and "text" in item:
59
+ text = item.get("text", "")
60
+ if text:
61
+ parts.append(text)
62
+ return "\n".join(parts)
63
+
64
+ return str(content)
@@ -0,0 +1,90 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from oghma.parsers.base import BaseParser, Message
5
+
6
+
7
+ class OpenCodeParser(BaseParser):
8
+ def can_parse(self, file_path: Path) -> bool:
9
+ if not file_path.is_dir():
10
+ return False
11
+ path_str = str(file_path)
12
+ return ".local/share/opencode/storage/message/ses_" in path_str
13
+
14
+ def parse(self, file_path: Path) -> list[Message]:
15
+ messages: list[Message] = []
16
+
17
+ message_files = sorted(file_path.glob("msg_*.json"))
18
+ part_files = list(file_path.glob("part/msg_*/prt_*.json"))
19
+
20
+ parts_map = self._build_parts_map(part_files)
21
+
22
+ for msg_file in message_files:
23
+ try:
24
+ with open(msg_file, encoding="utf-8") as f:
25
+ data = json.load(f)
26
+ except (OSError, json.JSONDecodeError):
27
+ continue
28
+
29
+ role = self._extract_role(data)
30
+ if not role:
31
+ continue
32
+
33
+ content_parts = self._get_message_content(data, parts_map)
34
+ if content_parts:
35
+ content = "\n".join(content_parts)
36
+ messages.append(Message(role=role, content=content[:3000]))
37
+
38
+ return messages
39
+
40
+ def _build_parts_map(self, part_files: list[Path]) -> dict[str, list[str]]:
41
+ parts_map: dict[str, list[str]] = {}
42
+
43
+ for part_file in part_files:
44
+ try:
45
+ with open(part_file, encoding="utf-8") as f:
46
+ data = json.load(f)
47
+ except (OSError, json.JSONDecodeError):
48
+ continue
49
+
50
+ msg_id = data.get("message_id")
51
+ if not msg_id:
52
+ continue
53
+
54
+ if msg_id not in parts_map:
55
+ parts_map[msg_id] = []
56
+
57
+ text = data.get("text", "")
58
+ if text:
59
+ parts_map[msg_id].append(text)
60
+
61
+ return parts_map
62
+
63
+ def _extract_role(self, data: dict) -> str | None:
64
+ role = data.get("role")
65
+ if role in ("user", "assistant"):
66
+ return role
67
+ return None
68
+
69
+ def _get_message_content(self, data: dict, parts_map: dict[str, list[str]]) -> list[str]:
70
+ msg_id = data.get("id")
71
+ if msg_id in parts_map:
72
+ return parts_map[msg_id]
73
+
74
+ content = data.get("content", "")
75
+ if isinstance(content, str):
76
+ return [content]
77
+
78
+ if isinstance(content, list):
79
+ parts: list[str] = []
80
+ for item in content:
81
+ if isinstance(item, dict) and "text" in item:
82
+ text = item.get("text", "")
83
+ if text:
84
+ parts.append(text)
85
+ return parts
86
+
87
+ if content:
88
+ return [str(content)]
89
+
90
+ return []