bot-knows 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bot_knows/__init__.py +70 -0
- bot_knows/config.py +115 -0
- bot_knows/domain/__init__.py +5 -0
- bot_knows/domain/chat.py +62 -0
- bot_knows/domain/message.py +64 -0
- bot_knows/domain/relation.py +56 -0
- bot_knows/domain/topic.py +132 -0
- bot_knows/domain/topic_evidence.py +55 -0
- bot_knows/importers/__init__.py +12 -0
- bot_knows/importers/base.py +116 -0
- bot_knows/importers/chatgpt.py +154 -0
- bot_knows/importers/claude.py +172 -0
- bot_knows/importers/generic_json.py +272 -0
- bot_knows/importers/registry.py +125 -0
- bot_knows/infra/__init__.py +5 -0
- bot_knows/infra/llm/__init__.py +6 -0
- bot_knows/infra/llm/anthropic_provider.py +172 -0
- bot_knows/infra/llm/openai_provider.py +195 -0
- bot_knows/infra/mongo/__init__.py +5 -0
- bot_knows/infra/mongo/client.py +145 -0
- bot_knows/infra/mongo/repositories.py +348 -0
- bot_knows/infra/neo4j/__init__.py +5 -0
- bot_knows/infra/neo4j/client.py +152 -0
- bot_knows/infra/neo4j/graph_repository.py +329 -0
- bot_knows/infra/redis/__init__.py +6 -0
- bot_knows/infra/redis/cache.py +198 -0
- bot_knows/infra/redis/client.py +193 -0
- bot_knows/interfaces/__init__.py +18 -0
- bot_knows/interfaces/embedding.py +55 -0
- bot_knows/interfaces/graph.py +194 -0
- bot_knows/interfaces/llm.py +70 -0
- bot_knows/interfaces/recall.py +92 -0
- bot_knows/interfaces/storage.py +225 -0
- bot_knows/logging.py +101 -0
- bot_knows/models/__init__.py +22 -0
- bot_knows/models/chat.py +55 -0
- bot_knows/models/ingest.py +70 -0
- bot_knows/models/message.py +49 -0
- bot_knows/models/recall.py +58 -0
- bot_knows/models/topic.py +100 -0
- bot_knows/orchestrator.py +398 -0
- bot_knows/py.typed +0 -0
- bot_knows/services/__init__.py +24 -0
- bot_knows/services/chat_processing.py +182 -0
- bot_knows/services/dedup_service.py +161 -0
- bot_knows/services/graph_service.py +217 -0
- bot_knows/services/message_builder.py +135 -0
- bot_knows/services/recall_service.py +296 -0
- bot_knows/services/tasks.py +128 -0
- bot_knows/services/topic_extraction.py +199 -0
- bot_knows/utils/__init__.py +22 -0
- bot_knows/utils/hashing.py +126 -0
- bot_knows-0.1.0.dist-info/METADATA +294 -0
- bot_knows-0.1.0.dist-info/RECORD +56 -0
- bot_knows-0.1.0.dist-info/WHEEL +4 -0
- bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""MongoDB repositories for bot_knows.
|
|
2
|
+
|
|
3
|
+
This module provides repository implementations for MongoDB storage.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Self
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from bot_knows.config import MongoSettings
|
|
11
|
+
from bot_knows.infra.mongo.client import MongoClient
|
|
12
|
+
from bot_knows.interfaces.storage import StorageInterface
|
|
13
|
+
from bot_knows.logging import get_logger
|
|
14
|
+
from bot_knows.models.chat import ChatCategory, ChatDTO
|
|
15
|
+
from bot_knows.models.message import MessageDTO
|
|
16
|
+
from bot_knows.models.recall import TopicRecallStateDTO
|
|
17
|
+
from bot_knows.models.topic import TopicDTO, TopicEvidenceDTO
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"MongoStorageRepository",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MongoStorageRepository(StorageInterface):
|
|
27
|
+
"""MongoDB implementation of StorageInterface.
|
|
28
|
+
|
|
29
|
+
Provides CRUD operations for chats, messages, topics,
|
|
30
|
+
evidence, and recall states.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
config_class = MongoSettings
|
|
34
|
+
|
|
35
|
+
def __init__(self, client: MongoClient) -> None:
|
|
36
|
+
"""Initialize repository with MongoDB client.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
client: Connected MongoClient instance
|
|
40
|
+
"""
|
|
41
|
+
self._client = client
|
|
42
|
+
self._owns_client = False
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
async def from_config(cls, config: MongoSettings) -> Self:
|
|
46
|
+
"""Factory method for BotKnows instantiation.
|
|
47
|
+
|
|
48
|
+
Creates a MongoClient, connects, creates indexes, and returns repository.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
config: MongoDB settings
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Connected MongoStorageRepository instance
|
|
55
|
+
"""
|
|
56
|
+
client = MongoClient(config)
|
|
57
|
+
await client.connect()
|
|
58
|
+
await client.create_indexes()
|
|
59
|
+
instance = cls(client)
|
|
60
|
+
instance._owns_client = True
|
|
61
|
+
return instance
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
async def from_dict(cls, config: dict[str, Any]) -> Self:
|
|
65
|
+
"""Factory method for custom config dict.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
config: Dictionary with MongoDB settings
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Connected MongoStorageRepository instance
|
|
72
|
+
"""
|
|
73
|
+
settings = MongoSettings(**config)
|
|
74
|
+
return await cls.from_config(settings)
|
|
75
|
+
|
|
76
|
+
async def close(self) -> None:
|
|
77
|
+
"""Close owned resources."""
|
|
78
|
+
if self._owns_client and self._client:
|
|
79
|
+
await self._client.disconnect()
|
|
80
|
+
|
|
81
|
+
# Chat operations
|
|
82
|
+
async def save_chat(self, chat: ChatDTO) -> str:
|
|
83
|
+
"""Save or update a chat."""
|
|
84
|
+
doc = self._chat_to_doc(chat)
|
|
85
|
+
await self._client.chats.replace_one(
|
|
86
|
+
{"id": chat.id},
|
|
87
|
+
doc,
|
|
88
|
+
upsert=True,
|
|
89
|
+
)
|
|
90
|
+
return chat.id
|
|
91
|
+
|
|
92
|
+
async def get_chat(self, chat_id: str) -> ChatDTO | None:
|
|
93
|
+
"""Get a chat by ID."""
|
|
94
|
+
doc = await self._client.chats.find_one({"id": chat_id})
|
|
95
|
+
return self._doc_to_chat(doc) if doc else None
|
|
96
|
+
|
|
97
|
+
async def chat_exists(self, chat_id: str) -> bool:
|
|
98
|
+
"""Check if a chat exists."""
|
|
99
|
+
count = await self._client.chats.count_documents({"id": chat_id}, limit=1)
|
|
100
|
+
return count > 0
|
|
101
|
+
|
|
102
|
+
async def find_chats_by_source(self, source: str) -> list[ChatDTO]:
|
|
103
|
+
"""Find all chats from a source."""
|
|
104
|
+
cursor = self._client.chats.find({"source": source})
|
|
105
|
+
return [self._doc_to_chat(doc) async for doc in cursor]
|
|
106
|
+
|
|
107
|
+
# Message operations
|
|
108
|
+
async def save_message(self, message: MessageDTO) -> str:
|
|
109
|
+
"""Save or update a message."""
|
|
110
|
+
doc = self._message_to_doc(message)
|
|
111
|
+
await self._client.messages.replace_one(
|
|
112
|
+
{"message_id": message.message_id},
|
|
113
|
+
doc,
|
|
114
|
+
upsert=True,
|
|
115
|
+
)
|
|
116
|
+
return message.message_id
|
|
117
|
+
|
|
118
|
+
async def get_message(self, message_id: str) -> MessageDTO | None:
|
|
119
|
+
"""Get a message by ID."""
|
|
120
|
+
doc = await self._client.messages.find_one({"message_id": message_id})
|
|
121
|
+
return self._doc_to_message(doc) if doc else None
|
|
122
|
+
|
|
123
|
+
async def get_messages_for_chat(self, chat_id: str) -> list[MessageDTO]:
|
|
124
|
+
"""Get all messages for a chat, ordered by timestamp."""
|
|
125
|
+
cursor = self._client.messages.find({"chat_id": chat_id}).sort("created_on", 1)
|
|
126
|
+
return [self._doc_to_message(doc) async for doc in cursor]
|
|
127
|
+
|
|
128
|
+
# Topic operations
|
|
129
|
+
async def save_topic(self, topic: TopicDTO) -> str:
|
|
130
|
+
"""Save or update a topic."""
|
|
131
|
+
doc = self._topic_to_doc(topic)
|
|
132
|
+
await self._client.topics.replace_one(
|
|
133
|
+
{"topic_id": topic.topic_id},
|
|
134
|
+
doc,
|
|
135
|
+
upsert=True,
|
|
136
|
+
)
|
|
137
|
+
return topic.topic_id
|
|
138
|
+
|
|
139
|
+
async def get_topic(self, topic_id: str) -> TopicDTO | None:
|
|
140
|
+
"""Get a topic by ID."""
|
|
141
|
+
doc = await self._client.topics.find_one({"topic_id": topic_id})
|
|
142
|
+
return self._doc_to_topic(doc) if doc else None
|
|
143
|
+
|
|
144
|
+
async def update_topic(self, topic: TopicDTO) -> None:
|
|
145
|
+
"""Update an existing topic."""
|
|
146
|
+
await self.save_topic(topic)
|
|
147
|
+
|
|
148
|
+
async def find_similar_topics(
|
|
149
|
+
self,
|
|
150
|
+
embedding: list[float],
|
|
151
|
+
threshold: float,
|
|
152
|
+
) -> list[tuple[TopicDTO, float]]:
|
|
153
|
+
"""Find topics with similar embeddings.
|
|
154
|
+
|
|
155
|
+
Uses cosine similarity comparison against all topics.
|
|
156
|
+
For production, consider using MongoDB Atlas Vector Search
|
|
157
|
+
or a dedicated vector database.
|
|
158
|
+
"""
|
|
159
|
+
results: list[tuple[TopicDTO, float]] = []
|
|
160
|
+
query_vec = np.array(embedding)
|
|
161
|
+
query_norm = np.linalg.norm(query_vec)
|
|
162
|
+
|
|
163
|
+
if query_norm == 0:
|
|
164
|
+
return results
|
|
165
|
+
|
|
166
|
+
# Fetch all topics with embeddings
|
|
167
|
+
cursor = self._client.topics.find({"centroid_embedding": {"$exists": True, "$ne": []}})
|
|
168
|
+
|
|
169
|
+
async for doc in cursor:
|
|
170
|
+
topic = self._doc_to_topic(doc)
|
|
171
|
+
if not topic.centroid_embedding:
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
# Calculate cosine similarity
|
|
175
|
+
doc_vec = np.array(topic.centroid_embedding)
|
|
176
|
+
doc_norm = np.linalg.norm(doc_vec)
|
|
177
|
+
if doc_norm == 0:
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
similarity = float(np.dot(query_vec, doc_vec) / (query_norm * doc_norm))
|
|
181
|
+
|
|
182
|
+
if similarity >= threshold:
|
|
183
|
+
results.append((topic, similarity))
|
|
184
|
+
|
|
185
|
+
# Sort by similarity descending
|
|
186
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
187
|
+
return results
|
|
188
|
+
|
|
189
|
+
async def get_all_topics(self, limit: int = 1000) -> list[TopicDTO]:
|
|
190
|
+
"""Get all topics."""
|
|
191
|
+
cursor = self._client.topics.find().limit(limit)
|
|
192
|
+
return [self._doc_to_topic(doc) async for doc in cursor]
|
|
193
|
+
|
|
194
|
+
# Evidence operations
|
|
195
|
+
async def append_evidence(self, evidence: TopicEvidenceDTO) -> str:
|
|
196
|
+
"""Append evidence record (never update)."""
|
|
197
|
+
doc = self._evidence_to_doc(evidence)
|
|
198
|
+
await self._client.evidence.insert_one(doc)
|
|
199
|
+
return evidence.evidence_id
|
|
200
|
+
|
|
201
|
+
async def get_evidence_for_topic(self, topic_id: str) -> list[TopicEvidenceDTO]:
|
|
202
|
+
"""Get all evidence for a topic."""
|
|
203
|
+
cursor = self._client.evidence.find({"topic_id": topic_id}).sort("timestamp", 1)
|
|
204
|
+
return [self._doc_to_evidence(doc) async for doc in cursor]
|
|
205
|
+
|
|
206
|
+
# Recall state operations
|
|
207
|
+
async def save_recall_state(self, state: TopicRecallStateDTO) -> None:
|
|
208
|
+
"""Save or update recall state."""
|
|
209
|
+
doc = self._recall_state_to_doc(state)
|
|
210
|
+
await self._client.recall_states.replace_one(
|
|
211
|
+
{"topic_id": state.topic_id},
|
|
212
|
+
doc,
|
|
213
|
+
upsert=True,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
async def get_recall_state(self, topic_id: str) -> TopicRecallStateDTO | None:
|
|
217
|
+
"""Get recall state for a topic."""
|
|
218
|
+
doc = await self._client.recall_states.find_one({"topic_id": topic_id})
|
|
219
|
+
return self._doc_to_recall_state(doc) if doc else None
|
|
220
|
+
|
|
221
|
+
async def get_due_topics(self, threshold: float) -> list[TopicRecallStateDTO]:
|
|
222
|
+
"""Get topics due for recall (strength below threshold)."""
|
|
223
|
+
cursor = self._client.recall_states.find({"strength": {"$lt": threshold}}).sort(
|
|
224
|
+
"strength", 1
|
|
225
|
+
)
|
|
226
|
+
return [self._doc_to_recall_state(doc) async for doc in cursor]
|
|
227
|
+
|
|
228
|
+
async def get_all_recall_states(self) -> list[TopicRecallStateDTO]:
|
|
229
|
+
"""Get all recall states."""
|
|
230
|
+
cursor = self._client.recall_states.find()
|
|
231
|
+
return [self._doc_to_recall_state(doc) async for doc in cursor]
|
|
232
|
+
|
|
233
|
+
# Document conversion helpers
|
|
234
|
+
@staticmethod
|
|
235
|
+
def _chat_to_doc(chat: ChatDTO) -> dict[str, Any]:
|
|
236
|
+
return {
|
|
237
|
+
"id": chat.id,
|
|
238
|
+
"title": chat.title,
|
|
239
|
+
"source": chat.source,
|
|
240
|
+
"category": chat.category.value,
|
|
241
|
+
"tags": chat.tags,
|
|
242
|
+
"created_on": chat.created_on,
|
|
243
|
+
"schema_version": chat.schema_version,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def _doc_to_chat(doc: dict[str, Any]) -> ChatDTO:
|
|
248
|
+
return ChatDTO(
|
|
249
|
+
id=doc["id"],
|
|
250
|
+
title=doc["title"],
|
|
251
|
+
source=doc["source"],
|
|
252
|
+
category=ChatCategory(doc["category"]),
|
|
253
|
+
tags=doc.get("tags", []),
|
|
254
|
+
created_on=doc["created_on"],
|
|
255
|
+
schema_version=doc.get("schema_version", 1),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def _message_to_doc(message: MessageDTO) -> dict[str, Any]:
|
|
260
|
+
return {
|
|
261
|
+
"message_id": message.message_id,
|
|
262
|
+
"chat_id": message.chat_id,
|
|
263
|
+
"user_content": message.user_content,
|
|
264
|
+
"assistant_content": message.assistant_content,
|
|
265
|
+
"created_on": message.created_on,
|
|
266
|
+
"schema_version": message.schema_version,
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
@staticmethod
|
|
270
|
+
def _doc_to_message(doc: dict[str, Any]) -> MessageDTO:
|
|
271
|
+
return MessageDTO(
|
|
272
|
+
message_id=doc["message_id"],
|
|
273
|
+
chat_id=doc["chat_id"],
|
|
274
|
+
user_content=doc.get("user_content", ""),
|
|
275
|
+
assistant_content=doc.get("assistant_content", ""),
|
|
276
|
+
created_on=doc["created_on"],
|
|
277
|
+
schema_version=doc.get("schema_version", 1),
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _topic_to_doc(topic: TopicDTO) -> dict[str, Any]:
|
|
282
|
+
return {
|
|
283
|
+
"topic_id": topic.topic_id,
|
|
284
|
+
"canonical_name": topic.canonical_name,
|
|
285
|
+
"centroid_embedding": topic.centroid_embedding,
|
|
286
|
+
"evidence_count": topic.evidence_count,
|
|
287
|
+
"importance": topic.importance,
|
|
288
|
+
"recall_strength": topic.recall_strength,
|
|
289
|
+
"schema_version": topic.schema_version,
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
@staticmethod
|
|
293
|
+
def _doc_to_topic(doc: dict[str, Any]) -> TopicDTO:
|
|
294
|
+
return TopicDTO(
|
|
295
|
+
topic_id=doc["topic_id"],
|
|
296
|
+
canonical_name=doc["canonical_name"],
|
|
297
|
+
centroid_embedding=doc.get("centroid_embedding", []),
|
|
298
|
+
evidence_count=doc.get("evidence_count", 0),
|
|
299
|
+
importance=doc.get("importance", 0.0),
|
|
300
|
+
recall_strength=doc.get("recall_strength", 0.0),
|
|
301
|
+
schema_version=doc.get("schema_version", 1),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
@staticmethod
|
|
305
|
+
def _evidence_to_doc(evidence: TopicEvidenceDTO) -> dict[str, Any]:
|
|
306
|
+
return {
|
|
307
|
+
"evidence_id": evidence.evidence_id,
|
|
308
|
+
"topic_id": evidence.topic_id,
|
|
309
|
+
"extracted_name": evidence.extracted_name,
|
|
310
|
+
"source_message_id": evidence.source_message_id,
|
|
311
|
+
"confidence": evidence.confidence,
|
|
312
|
+
"timestamp": evidence.timestamp,
|
|
313
|
+
"schema_version": evidence.schema_version,
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
@staticmethod
|
|
317
|
+
def _doc_to_evidence(doc: dict[str, Any]) -> TopicEvidenceDTO:
|
|
318
|
+
return TopicEvidenceDTO(
|
|
319
|
+
evidence_id=doc["evidence_id"],
|
|
320
|
+
topic_id=doc["topic_id"],
|
|
321
|
+
extracted_name=doc["extracted_name"],
|
|
322
|
+
source_message_id=doc["source_message_id"],
|
|
323
|
+
confidence=doc["confidence"],
|
|
324
|
+
timestamp=doc["timestamp"],
|
|
325
|
+
schema_version=doc.get("schema_version", 1),
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
@staticmethod
|
|
329
|
+
def _recall_state_to_doc(state: TopicRecallStateDTO) -> dict[str, Any]:
|
|
330
|
+
return {
|
|
331
|
+
"topic_id": state.topic_id,
|
|
332
|
+
"strength": state.strength,
|
|
333
|
+
"last_seen": state.last_seen,
|
|
334
|
+
"last_updated": state.last_updated,
|
|
335
|
+
"stability": state.stability,
|
|
336
|
+
"schema_version": state.schema_version,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
@staticmethod
|
|
340
|
+
def _doc_to_recall_state(doc: dict[str, Any]) -> TopicRecallStateDTO:
|
|
341
|
+
return TopicRecallStateDTO(
|
|
342
|
+
topic_id=doc["topic_id"],
|
|
343
|
+
strength=doc["strength"],
|
|
344
|
+
last_seen=doc["last_seen"],
|
|
345
|
+
last_updated=doc["last_updated"],
|
|
346
|
+
stability=doc.get("stability", 1.0),
|
|
347
|
+
schema_version=doc.get("schema_version", 1),
|
|
348
|
+
)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Neo4j client for bot_knows.
|
|
2
|
+
|
|
3
|
+
This module provides an async Neo4j client wrapper.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from neo4j import AsyncDriver, AsyncGraphDatabase
|
|
9
|
+
|
|
10
|
+
from bot_knows.config import Neo4jSettings
|
|
11
|
+
from bot_knows.logging import get_logger
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Neo4jClient",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Neo4jClient:
|
|
21
|
+
"""Async Neo4j client wrapper.
|
|
22
|
+
|
|
23
|
+
Provides connection management and query execution
|
|
24
|
+
for the bot_knows knowledge graph.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
client = Neo4jClient(settings)
|
|
28
|
+
await client.connect()
|
|
29
|
+
|
|
30
|
+
result = await client.execute_query(
|
|
31
|
+
"MATCH (n:Chat) RETURN n LIMIT 10"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
await client.disconnect()
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, settings: Neo4jSettings) -> None:
|
|
38
|
+
"""Initialize client with settings.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
settings: Neo4j connection settings
|
|
42
|
+
"""
|
|
43
|
+
self._settings = settings
|
|
44
|
+
self._driver: AsyncDriver | None = None
|
|
45
|
+
|
|
46
|
+
async def connect(self) -> None:
|
|
47
|
+
"""Initialize connection to Neo4j."""
|
|
48
|
+
if self._driver is not None:
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
self._driver = AsyncGraphDatabase.driver(
|
|
52
|
+
self._settings.uri,
|
|
53
|
+
auth=(
|
|
54
|
+
self._settings.username,
|
|
55
|
+
self._settings.password.get_secret_value(),
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Verify connection
|
|
60
|
+
await self._driver.verify_connectivity()
|
|
61
|
+
logger.info("connected_to_neo4j", uri=self._settings.uri)
|
|
62
|
+
|
|
63
|
+
async def disconnect(self) -> None:
|
|
64
|
+
"""Close connection to Neo4j."""
|
|
65
|
+
if self._driver:
|
|
66
|
+
await self._driver.close()
|
|
67
|
+
self._driver = None
|
|
68
|
+
logger.info("disconnected_from_neo4j")
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def driver(self) -> AsyncDriver:
|
|
72
|
+
"""Get driver instance.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
RuntimeError: If not connected
|
|
76
|
+
"""
|
|
77
|
+
if self._driver is None:
|
|
78
|
+
raise RuntimeError("Neo4jClient not connected. Call connect() first.")
|
|
79
|
+
return self._driver
|
|
80
|
+
|
|
81
|
+
async def execute_query(
|
|
82
|
+
self,
|
|
83
|
+
query: str,
|
|
84
|
+
parameters: dict[str, Any] | None = None,
|
|
85
|
+
) -> list[dict[str, Any]]:
|
|
86
|
+
"""Execute a Cypher query and return results.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
query: Cypher query string
|
|
90
|
+
parameters: Query parameters
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of result records as dicts
|
|
94
|
+
"""
|
|
95
|
+
async with self.driver.session() as session:
|
|
96
|
+
result = await session.run(query, parameters or {})
|
|
97
|
+
records = await result.data()
|
|
98
|
+
return records
|
|
99
|
+
|
|
100
|
+
async def execute_write(
|
|
101
|
+
self,
|
|
102
|
+
query: str,
|
|
103
|
+
parameters: dict[str, Any] | None = None,
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Execute a write query (CREATE, MERGE, etc.).
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
query: Cypher query string
|
|
109
|
+
parameters: Query parameters
|
|
110
|
+
"""
|
|
111
|
+
async with self.driver.session() as session:
|
|
112
|
+
await session.run(query, parameters or {})
|
|
113
|
+
|
|
114
|
+
async def create_indexes(self) -> None:
|
|
115
|
+
"""Create indexes for the knowledge graph."""
|
|
116
|
+
indexes = [
|
|
117
|
+
"CREATE INDEX chat_id_idx IF NOT EXISTS FOR (c:Chat) ON (c.id)",
|
|
118
|
+
"CREATE INDEX message_id_idx IF NOT EXISTS FOR (m:Message) ON (m.message_id)",
|
|
119
|
+
"CREATE INDEX message_chat_idx IF NOT EXISTS FOR (m:Message) ON (m.chat_id)",
|
|
120
|
+
"CREATE INDEX topic_id_idx IF NOT EXISTS FOR (t:Topic) ON (t.topic_id)",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
for index_query in indexes:
|
|
124
|
+
await self.execute_write(index_query)
|
|
125
|
+
|
|
126
|
+
logger.info("created_neo4j_indexes")
|
|
127
|
+
|
|
128
|
+
async def create_constraints(self) -> None:
|
|
129
|
+
"""Create uniqueness constraints."""
|
|
130
|
+
constraints = [
|
|
131
|
+
"CREATE CONSTRAINT chat_id_unique IF NOT EXISTS FOR (c:Chat) REQUIRE c.id IS UNIQUE",
|
|
132
|
+
"CREATE CONSTRAINT message_id_unique IF NOT EXISTS FOR (m:Message) REQUIRE m.message_id IS UNIQUE",
|
|
133
|
+
"CREATE CONSTRAINT topic_id_unique IF NOT EXISTS FOR (t:Topic) REQUIRE t.topic_id IS UNIQUE",
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
for constraint_query in constraints:
|
|
137
|
+
try:
|
|
138
|
+
await self.execute_write(constraint_query)
|
|
139
|
+
except Exception as e:
|
|
140
|
+
# Constraint may already exist
|
|
141
|
+
logger.debug("constraint_creation_skipped", error=str(e))
|
|
142
|
+
|
|
143
|
+
logger.info("created_neo4j_constraints")
|
|
144
|
+
|
|
145
|
+
async def __aenter__(self) -> "Neo4jClient":
|
|
146
|
+
"""Async context manager entry."""
|
|
147
|
+
await self.connect()
|
|
148
|
+
return self
|
|
149
|
+
|
|
150
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
151
|
+
"""Async context manager exit."""
|
|
152
|
+
await self.disconnect()
|