oghma 0.0.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oghma/__init__.py +1 -3
- oghma/cli.py +342 -0
- oghma/config.py +262 -0
- oghma/daemon.py +198 -0
- oghma/embedder.py +107 -0
- oghma/exporter.py +177 -0
- oghma/extractor.py +180 -0
- oghma/mcp_server.py +112 -0
- oghma/migration.py +63 -0
- oghma/parsers/__init__.py +26 -0
- oghma/parsers/base.py +24 -0
- oghma/parsers/claude_code.py +62 -0
- oghma/parsers/codex.py +84 -0
- oghma/parsers/openclaw.py +64 -0
- oghma/parsers/opencode.py +90 -0
- oghma/storage.py +753 -0
- oghma/watcher.py +97 -0
- oghma-0.3.0.dist-info/METADATA +26 -0
- oghma-0.3.0.dist-info/RECORD +22 -0
- {oghma-0.0.1.dist-info → oghma-0.3.0.dist-info}/WHEEL +2 -1
- oghma-0.3.0.dist-info/entry_points.txt +3 -0
- oghma-0.3.0.dist-info/top_level.txt +1 -0
- oghma-0.0.1.dist-info/METADATA +0 -33
- oghma-0.0.1.dist-info/RECORD +0 -4
oghma/mcp_server.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
from contextlib import asynccontextmanager
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from mcp.server.fastmcp import FastMCP
|
|
6
|
+
|
|
7
|
+
from oghma.config import load_config
|
|
8
|
+
from oghma.embedder import EmbedConfig, create_embedder
|
|
9
|
+
from oghma.storage import MemoryRecord, Storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@asynccontextmanager
|
|
13
|
+
async def lifespan(_: FastMCP):
|
|
14
|
+
config = load_config()
|
|
15
|
+
storage = Storage(config=config, read_only=True)
|
|
16
|
+
yield {"storage": storage, "config": config}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
mcp = FastMCP("Oghma Memory", lifespan=lifespan)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_storage() -> Storage:
|
|
23
|
+
return mcp.get_context()["storage"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_config() -> dict[str, Any]:
|
|
27
|
+
return mcp.get_context().get("config", {})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@mcp.tool()
|
|
31
|
+
def oghma_search(
|
|
32
|
+
query: str,
|
|
33
|
+
category: str | None = None,
|
|
34
|
+
source_tool: str | None = None,
|
|
35
|
+
limit: int = 10,
|
|
36
|
+
search_mode: str = "keyword",
|
|
37
|
+
) -> list[MemoryRecord]:
|
|
38
|
+
"""Search memories by keyword, vector, or hybrid mode."""
|
|
39
|
+
if limit < 1:
|
|
40
|
+
raise ValueError("limit must be >= 1")
|
|
41
|
+
if search_mode not in {"keyword", "vector", "hybrid"}:
|
|
42
|
+
raise ValueError("search_mode must be one of: keyword, vector, hybrid")
|
|
43
|
+
|
|
44
|
+
storage = _get_storage()
|
|
45
|
+
if search_mode == "keyword":
|
|
46
|
+
return storage.search_memories(
|
|
47
|
+
query=query,
|
|
48
|
+
category=category,
|
|
49
|
+
source_tool=source_tool,
|
|
50
|
+
limit=limit,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
query_embedding: list[float] | None = None
|
|
54
|
+
try:
|
|
55
|
+
embed_config = _get_config().get("embedding", {})
|
|
56
|
+
embedder = create_embedder(EmbedConfig.from_dict(embed_config))
|
|
57
|
+
query_embedding = embedder.embed(query)
|
|
58
|
+
except Exception:
|
|
59
|
+
if search_mode == "vector":
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
return storage.search_memories_hybrid(
|
|
63
|
+
query=query,
|
|
64
|
+
query_embedding=query_embedding,
|
|
65
|
+
category=category,
|
|
66
|
+
source_tool=source_tool,
|
|
67
|
+
limit=limit,
|
|
68
|
+
search_mode=search_mode,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@mcp.tool()
|
|
73
|
+
def oghma_get(memory_id: int) -> MemoryRecord | None:
|
|
74
|
+
"""Get a memory by ID."""
|
|
75
|
+
storage = _get_storage()
|
|
76
|
+
return storage.get_memory_by_id(memory_id)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@mcp.tool()
|
|
80
|
+
def oghma_stats() -> dict[str, Any]:
|
|
81
|
+
"""Get memory database statistics."""
|
|
82
|
+
storage = _get_storage()
|
|
83
|
+
memories = storage.get_all_memories(status="active")
|
|
84
|
+
extraction_logs = storage.get_recent_extraction_logs(limit=1)
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
"total_memories": storage.get_memory_count(),
|
|
88
|
+
"memories_by_category": dict(Counter(memory["category"] for memory in memories)),
|
|
89
|
+
"memories_by_source": dict(Counter(memory["source_tool"] for memory in memories)),
|
|
90
|
+
"last_extraction_time": extraction_logs[0]["created_at"] if extraction_logs else None,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@mcp.tool()
|
|
95
|
+
def oghma_categories() -> list[dict[str, Any]]:
|
|
96
|
+
"""List categories with memory counts."""
|
|
97
|
+
storage = _get_storage()
|
|
98
|
+
memories = storage.get_all_memories(status="active")
|
|
99
|
+
category_counts = Counter(memory["category"] for memory in memories)
|
|
100
|
+
|
|
101
|
+
return [
|
|
102
|
+
{"category": category, "count": count}
|
|
103
|
+
for category, count in sorted(category_counts.items(), key=lambda item: (-item[1], item[0]))
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def main() -> None:
|
|
108
|
+
mcp.run()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
main()
|
oghma/migration.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from openai import APIError
|
|
4
|
+
|
|
5
|
+
from oghma.embedder import Embedder
|
|
6
|
+
from oghma.storage import Storage
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class MigrationResult:
|
|
11
|
+
processed: int
|
|
12
|
+
migrated: int
|
|
13
|
+
skipped: int
|
|
14
|
+
failed: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EmbeddingMigration:
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
storage: Storage,
|
|
21
|
+
embedder: Embedder,
|
|
22
|
+
batch_size: int = 100,
|
|
23
|
+
):
|
|
24
|
+
self.storage = storage
|
|
25
|
+
self.embedder = embedder
|
|
26
|
+
self.batch_size = batch_size
|
|
27
|
+
|
|
28
|
+
def run(self, dry_run: bool = False) -> MigrationResult:
|
|
29
|
+
processed = 0
|
|
30
|
+
migrated = 0
|
|
31
|
+
failed = 0
|
|
32
|
+
|
|
33
|
+
while True:
|
|
34
|
+
batch = self.storage.get_memories_without_embeddings(limit=self.batch_size)
|
|
35
|
+
if not batch:
|
|
36
|
+
break
|
|
37
|
+
|
|
38
|
+
contents = [memory["content"] for memory in batch]
|
|
39
|
+
processed += len(batch)
|
|
40
|
+
|
|
41
|
+
if dry_run:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
vectors = self.embedder.embed_batch(contents)
|
|
46
|
+
except (APIError, RuntimeError, ValueError):
|
|
47
|
+
failed += len(batch)
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
for memory, vector in zip(batch, vectors, strict=False):
|
|
51
|
+
success = self.storage.upsert_memory_embedding(memory["id"], vector)
|
|
52
|
+
if success:
|
|
53
|
+
migrated += 1
|
|
54
|
+
else:
|
|
55
|
+
failed += 1
|
|
56
|
+
|
|
57
|
+
skipped = max(processed - migrated - failed, 0)
|
|
58
|
+
return MigrationResult(
|
|
59
|
+
processed=processed,
|
|
60
|
+
migrated=migrated,
|
|
61
|
+
skipped=skipped,
|
|
62
|
+
failed=failed,
|
|
63
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from oghma.parsers.base import BaseParser
|
|
4
|
+
from oghma.parsers.base import Message as _Message
|
|
5
|
+
from oghma.parsers.claude_code import ClaudeCodeParser
|
|
6
|
+
from oghma.parsers.codex import CodexParser
|
|
7
|
+
from oghma.parsers.openclaw import OpenClawParser
|
|
8
|
+
from oghma.parsers.opencode import OpenCodeParser
|
|
9
|
+
|
|
10
|
+
__all__ = ["BaseParser", "Message", "get_parser_for_file", "PARSERS"]
|
|
11
|
+
|
|
12
|
+
Message = _Message
|
|
13
|
+
|
|
14
|
+
PARSERS: list[BaseParser] = [
|
|
15
|
+
ClaudeCodeParser(),
|
|
16
|
+
CodexParser(),
|
|
17
|
+
OpenClawParser(),
|
|
18
|
+
OpenCodeParser(),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_parser_for_file(file_path: Path) -> BaseParser | None:
|
|
23
|
+
for parser in PARSERS:
|
|
24
|
+
if parser.can_parse(file_path):
|
|
25
|
+
return parser
|
|
26
|
+
return None
|
oghma/parsers/base.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Message:
|
|
12
|
+
role: str
|
|
13
|
+
content: str
|
|
14
|
+
timestamp: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseParser(ABC):
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def parse(self, file_path: Path) -> list[Message]:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
24
|
+
pass
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from oghma.parsers.base import BaseParser, Message
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ClaudeCodeParser(BaseParser):
|
|
8
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
9
|
+
if not file_path.name.endswith(".jsonl"):
|
|
10
|
+
return False
|
|
11
|
+
path_str = str(file_path)
|
|
12
|
+
return ".claude/projects/-Users-" in path_str
|
|
13
|
+
|
|
14
|
+
def parse(self, file_path: Path) -> list[Message]:
|
|
15
|
+
messages: list[Message] = []
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
with open(file_path, encoding="utf-8") as f:
|
|
19
|
+
for _line_num, line in enumerate(f, 1):
|
|
20
|
+
line = line.strip()
|
|
21
|
+
if not line:
|
|
22
|
+
continue
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
data = json.loads(line)
|
|
26
|
+
role = self._extract_role(data)
|
|
27
|
+
content = self._extract_content(data)
|
|
28
|
+
|
|
29
|
+
if role and content:
|
|
30
|
+
messages.append(Message(role=role, content=content[:3000]))
|
|
31
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
32
|
+
continue
|
|
33
|
+
except (OSError, UnicodeDecodeError):
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
return messages
|
|
37
|
+
|
|
38
|
+
def _extract_role(self, data: dict) -> str | None:
|
|
39
|
+
msg_type = data.get("type")
|
|
40
|
+
if msg_type == "user":
|
|
41
|
+
return "user"
|
|
42
|
+
elif msg_type == "assistant":
|
|
43
|
+
return "assistant"
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
def _extract_content(self, data: dict) -> str:
|
|
47
|
+
message = data.get("message", {})
|
|
48
|
+
content = message.get("content", "")
|
|
49
|
+
|
|
50
|
+
if isinstance(content, str):
|
|
51
|
+
return content
|
|
52
|
+
|
|
53
|
+
if isinstance(content, list):
|
|
54
|
+
parts: list[str] = []
|
|
55
|
+
for block in content:
|
|
56
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
57
|
+
text = block.get("text", "")
|
|
58
|
+
if text:
|
|
59
|
+
parts.append(text)
|
|
60
|
+
return "\n".join(parts)
|
|
61
|
+
|
|
62
|
+
return str(content)
|
oghma/parsers/codex.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from oghma.parsers.base import BaseParser, Message
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CodexParser(BaseParser):
|
|
8
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
9
|
+
if not file_path.name.endswith(".jsonl"):
|
|
10
|
+
return False
|
|
11
|
+
path_str = str(file_path)
|
|
12
|
+
return ".codex/sessions/" in path_str and "rollout-" in file_path.name
|
|
13
|
+
|
|
14
|
+
def parse(self, file_path: Path) -> list[Message]:
|
|
15
|
+
messages: list[Message] = []
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
with open(file_path, encoding="utf-8") as f:
|
|
19
|
+
for line in f:
|
|
20
|
+
line = line.strip()
|
|
21
|
+
if not line:
|
|
22
|
+
continue
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
data = json.loads(line)
|
|
26
|
+
role = self._extract_role(data)
|
|
27
|
+
content = self._extract_content(data)
|
|
28
|
+
|
|
29
|
+
if role and content:
|
|
30
|
+
messages.append(Message(role=role, content=content[:3000]))
|
|
31
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
32
|
+
continue
|
|
33
|
+
except (OSError, UnicodeDecodeError):
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
return messages
|
|
37
|
+
|
|
38
|
+
def _extract_role(self, data: dict) -> str | None:
|
|
39
|
+
msg_type = data.get("type")
|
|
40
|
+
# Support both old format (item) and new format (response_item, event_msg)
|
|
41
|
+
if msg_type not in ("item", "response_item", "event_msg"):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
payload = data.get("payload", {})
|
|
45
|
+
|
|
46
|
+
# New format: role directly in payload
|
|
47
|
+
if "role" in payload:
|
|
48
|
+
role = payload.get("role")
|
|
49
|
+
# Map developer/assistant to assistant, user to user
|
|
50
|
+
if role in ("developer", "assistant"):
|
|
51
|
+
return "assistant"
|
|
52
|
+
elif role == "user":
|
|
53
|
+
return "user"
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
# Old format: nested in payload.item
|
|
57
|
+
item = payload.get("item", {})
|
|
58
|
+
return item.get("role")
|
|
59
|
+
|
|
60
|
+
def _extract_content(self, data: dict) -> str:
|
|
61
|
+
payload = data.get("payload", {})
|
|
62
|
+
|
|
63
|
+
# New format: content directly in payload
|
|
64
|
+
if "content" in payload:
|
|
65
|
+
content = payload.get("content", "")
|
|
66
|
+
else:
|
|
67
|
+
# Old format: nested in payload.item
|
|
68
|
+
item = payload.get("item", {})
|
|
69
|
+
content = item.get("content", "")
|
|
70
|
+
|
|
71
|
+
if isinstance(content, str):
|
|
72
|
+
return content
|
|
73
|
+
|
|
74
|
+
if isinstance(content, list):
|
|
75
|
+
parts: list[str] = []
|
|
76
|
+
for block in content:
|
|
77
|
+
if isinstance(block, dict):
|
|
78
|
+
block_type = block.get("type")
|
|
79
|
+
text = block.get("text", "")
|
|
80
|
+
if text and block_type in ("input_text", "output_text", "text"):
|
|
81
|
+
parts.append(text)
|
|
82
|
+
return "\n".join(parts)
|
|
83
|
+
|
|
84
|
+
return str(content)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from oghma.parsers.base import BaseParser, Message
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OpenClawParser(BaseParser):
|
|
8
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
9
|
+
if not file_path.name.endswith(".jsonl"):
|
|
10
|
+
return False
|
|
11
|
+
path_str = str(file_path)
|
|
12
|
+
return ".openclaw/agents/" in path_str and "/sessions/" in path_str
|
|
13
|
+
|
|
14
|
+
def parse(self, file_path: Path) -> list[Message]:
|
|
15
|
+
messages: list[Message] = []
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
with open(file_path, encoding="utf-8") as f:
|
|
19
|
+
for line in f:
|
|
20
|
+
line = line.strip()
|
|
21
|
+
if not line:
|
|
22
|
+
continue
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
data = json.loads(line)
|
|
26
|
+
role = self._extract_role(data)
|
|
27
|
+
content = self._extract_content(data)
|
|
28
|
+
|
|
29
|
+
if role and content:
|
|
30
|
+
messages.append(Message(role=role, content=content[:3000]))
|
|
31
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
32
|
+
continue
|
|
33
|
+
except (OSError, UnicodeDecodeError):
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
return messages
|
|
37
|
+
|
|
38
|
+
def _extract_role(self, data: dict) -> str | None:
|
|
39
|
+
if data.get("type") != "message":
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
message = data.get("message", {})
|
|
43
|
+
role = message.get("role")
|
|
44
|
+
if role in ("user", "assistant"):
|
|
45
|
+
return role
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def _extract_content(self, data: dict) -> str:
|
|
49
|
+
message = data.get("message", {})
|
|
50
|
+
content = message.get("content", "")
|
|
51
|
+
|
|
52
|
+
if isinstance(content, str):
|
|
53
|
+
return content
|
|
54
|
+
|
|
55
|
+
if isinstance(content, list):
|
|
56
|
+
parts: list[str] = []
|
|
57
|
+
for item in content:
|
|
58
|
+
if isinstance(item, dict) and "text" in item:
|
|
59
|
+
text = item.get("text", "")
|
|
60
|
+
if text:
|
|
61
|
+
parts.append(text)
|
|
62
|
+
return "\n".join(parts)
|
|
63
|
+
|
|
64
|
+
return str(content)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from oghma.parsers.base import BaseParser, Message
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OpenCodeParser(BaseParser):
|
|
8
|
+
def can_parse(self, file_path: Path) -> bool:
|
|
9
|
+
if not file_path.is_dir():
|
|
10
|
+
return False
|
|
11
|
+
path_str = str(file_path)
|
|
12
|
+
return ".local/share/opencode/storage/message/ses_" in path_str
|
|
13
|
+
|
|
14
|
+
def parse(self, file_path: Path) -> list[Message]:
|
|
15
|
+
messages: list[Message] = []
|
|
16
|
+
|
|
17
|
+
message_files = sorted(file_path.glob("msg_*.json"))
|
|
18
|
+
part_files = list(file_path.glob("part/msg_*/prt_*.json"))
|
|
19
|
+
|
|
20
|
+
parts_map = self._build_parts_map(part_files)
|
|
21
|
+
|
|
22
|
+
for msg_file in message_files:
|
|
23
|
+
try:
|
|
24
|
+
with open(msg_file, encoding="utf-8") as f:
|
|
25
|
+
data = json.load(f)
|
|
26
|
+
except (OSError, json.JSONDecodeError):
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
role = self._extract_role(data)
|
|
30
|
+
if not role:
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
content_parts = self._get_message_content(data, parts_map)
|
|
34
|
+
if content_parts:
|
|
35
|
+
content = "\n".join(content_parts)
|
|
36
|
+
messages.append(Message(role=role, content=content[:3000]))
|
|
37
|
+
|
|
38
|
+
return messages
|
|
39
|
+
|
|
40
|
+
def _build_parts_map(self, part_files: list[Path]) -> dict[str, list[str]]:
|
|
41
|
+
parts_map: dict[str, list[str]] = {}
|
|
42
|
+
|
|
43
|
+
for part_file in part_files:
|
|
44
|
+
try:
|
|
45
|
+
with open(part_file, encoding="utf-8") as f:
|
|
46
|
+
data = json.load(f)
|
|
47
|
+
except (OSError, json.JSONDecodeError):
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
msg_id = data.get("message_id")
|
|
51
|
+
if not msg_id:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
if msg_id not in parts_map:
|
|
55
|
+
parts_map[msg_id] = []
|
|
56
|
+
|
|
57
|
+
text = data.get("text", "")
|
|
58
|
+
if text:
|
|
59
|
+
parts_map[msg_id].append(text)
|
|
60
|
+
|
|
61
|
+
return parts_map
|
|
62
|
+
|
|
63
|
+
def _extract_role(self, data: dict) -> str | None:
|
|
64
|
+
role = data.get("role")
|
|
65
|
+
if role in ("user", "assistant"):
|
|
66
|
+
return role
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def _get_message_content(self, data: dict, parts_map: dict[str, list[str]]) -> list[str]:
|
|
70
|
+
msg_id = data.get("id")
|
|
71
|
+
if msg_id in parts_map:
|
|
72
|
+
return parts_map[msg_id]
|
|
73
|
+
|
|
74
|
+
content = data.get("content", "")
|
|
75
|
+
if isinstance(content, str):
|
|
76
|
+
return [content]
|
|
77
|
+
|
|
78
|
+
if isinstance(content, list):
|
|
79
|
+
parts: list[str] = []
|
|
80
|
+
for item in content:
|
|
81
|
+
if isinstance(item, dict) and "text" in item:
|
|
82
|
+
text = item.get("text", "")
|
|
83
|
+
if text:
|
|
84
|
+
parts.append(text)
|
|
85
|
+
return parts
|
|
86
|
+
|
|
87
|
+
if content:
|
|
88
|
+
return [str(content)]
|
|
89
|
+
|
|
90
|
+
return []
|