bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bot_knows/__init__.py +70 -0
  2. bot_knows/config.py +115 -0
  3. bot_knows/domain/__init__.py +5 -0
  4. bot_knows/domain/chat.py +62 -0
  5. bot_knows/domain/message.py +64 -0
  6. bot_knows/domain/relation.py +56 -0
  7. bot_knows/domain/topic.py +132 -0
  8. bot_knows/domain/topic_evidence.py +55 -0
  9. bot_knows/importers/__init__.py +12 -0
  10. bot_knows/importers/base.py +116 -0
  11. bot_knows/importers/chatgpt.py +154 -0
  12. bot_knows/importers/claude.py +172 -0
  13. bot_knows/importers/generic_json.py +272 -0
  14. bot_knows/importers/registry.py +125 -0
  15. bot_knows/infra/__init__.py +5 -0
  16. bot_knows/infra/llm/__init__.py +6 -0
  17. bot_knows/infra/llm/anthropic_provider.py +172 -0
  18. bot_knows/infra/llm/openai_provider.py +195 -0
  19. bot_knows/infra/mongo/__init__.py +5 -0
  20. bot_knows/infra/mongo/client.py +145 -0
  21. bot_knows/infra/mongo/repositories.py +348 -0
  22. bot_knows/infra/neo4j/__init__.py +5 -0
  23. bot_knows/infra/neo4j/client.py +152 -0
  24. bot_knows/infra/neo4j/graph_repository.py +329 -0
  25. bot_knows/infra/redis/__init__.py +6 -0
  26. bot_knows/infra/redis/cache.py +198 -0
  27. bot_knows/infra/redis/client.py +193 -0
  28. bot_knows/interfaces/__init__.py +18 -0
  29. bot_knows/interfaces/embedding.py +55 -0
  30. bot_knows/interfaces/graph.py +194 -0
  31. bot_knows/interfaces/llm.py +70 -0
  32. bot_knows/interfaces/recall.py +92 -0
  33. bot_knows/interfaces/storage.py +225 -0
  34. bot_knows/logging.py +101 -0
  35. bot_knows/models/__init__.py +22 -0
  36. bot_knows/models/chat.py +55 -0
  37. bot_knows/models/ingest.py +70 -0
  38. bot_knows/models/message.py +49 -0
  39. bot_knows/models/recall.py +58 -0
  40. bot_knows/models/topic.py +100 -0
  41. bot_knows/orchestrator.py +398 -0
  42. bot_knows/py.typed +0 -0
  43. bot_knows/services/__init__.py +24 -0
  44. bot_knows/services/chat_processing.py +182 -0
  45. bot_knows/services/dedup_service.py +161 -0
  46. bot_knows/services/graph_service.py +217 -0
  47. bot_knows/services/message_builder.py +135 -0
  48. bot_knows/services/recall_service.py +296 -0
  49. bot_knows/services/tasks.py +128 -0
  50. bot_knows/services/topic_extraction.py +199 -0
  51. bot_knows/utils/__init__.py +22 -0
  52. bot_knows/utils/hashing.py +126 -0
  53. bot_knows-0.1.0.dist-info/METADATA +294 -0
  54. bot_knows-0.1.0.dist-info/RECORD +56 -0
  55. bot_knows-0.1.0.dist-info/WHEEL +4 -0
  56. bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,329 @@
1
+ """Neo4j graph repository for bot_knows.
2
+
3
+ This module provides the graph repository implementation for Neo4j.
4
+ """
5
+
6
+ from typing import Any, Self
7
+
8
+ from bot_knows.config import Neo4jSettings
9
+ from bot_knows.infra.neo4j.client import Neo4jClient
10
+ from bot_knows.interfaces.graph import GraphServiceInterface
11
+ from bot_knows.logging import get_logger
12
+ from bot_knows.models.chat import ChatDTO
13
+ from bot_knows.models.message import MessageDTO
14
+ from bot_knows.models.topic import TopicDTO, TopicEvidenceDTO
15
+
16
+ __all__ = [
17
+ "Neo4jGraphRepository",
18
+ ]
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class Neo4jGraphRepository(GraphServiceInterface):
24
+ """Neo4j implementation of GraphServiceInterface.
25
+
26
+ Provides graph operations for the knowledge base including
27
+ node creation, edge creation, and graph queries.
28
+ """
29
+
30
+ config_class = Neo4jSettings
31
+
32
+ def __init__(self, client: Neo4jClient) -> None:
33
+ """Initialize repository with Neo4j client.
34
+
35
+ Args:
36
+ client: Connected Neo4jClient instance
37
+ """
38
+ self._client = client
39
+ self._owns_client = False
40
+
41
+ @classmethod
42
+ async def from_config(cls, config: Neo4jSettings) -> Self:
43
+ """Factory method for BotKnows instantiation.
44
+
45
+ Creates a Neo4jClient, connects, creates indexes/constraints, and returns repository.
46
+
47
+ Args:
48
+ config: Neo4j settings
49
+
50
+ Returns:
51
+ Connected Neo4jGraphRepository instance
52
+ """
53
+ client = Neo4jClient(config)
54
+ await client.connect()
55
+ await client.create_indexes()
56
+ await client.create_constraints()
57
+ instance = cls(client)
58
+ instance._owns_client = True
59
+ return instance
60
+
61
+ @classmethod
62
+ async def from_dict(cls, config: dict[str, Any]) -> Self:
63
+ """Factory method for custom config dict.
64
+
65
+ Args:
66
+ config: Dictionary with Neo4j settings
67
+
68
+ Returns:
69
+ Connected Neo4jGraphRepository instance
70
+ """
71
+ settings = Neo4jSettings(**config)
72
+ return await cls.from_config(settings)
73
+
74
+ async def close(self) -> None:
75
+ """Close owned resources."""
76
+ if self._owns_client and self._client:
77
+ await self._client.disconnect()
78
+
79
+ # Node operations
80
+ async def create_chat_node(self, chat: ChatDTO) -> str:
81
+ """Create or update a Chat node."""
82
+ query = """
83
+ MERGE (c:Chat {id: $id})
84
+ SET c.title = $title,
85
+ c.source = $source,
86
+ c.category = $category,
87
+ c.tags = $tags,
88
+ c.created_on = $created_on
89
+ RETURN c.id as id
90
+ """
91
+ await self._client.execute_write(
92
+ query,
93
+ {
94
+ "id": chat.id,
95
+ "title": chat.title,
96
+ "source": chat.source,
97
+ "category": chat.category.value,
98
+ "tags": chat.tags,
99
+ "created_on": chat.created_on,
100
+ },
101
+ )
102
+ return chat.id
103
+
104
+ async def create_message_node(self, message: MessageDTO) -> str:
105
+ """Create or update a Message node."""
106
+ query = """
107
+ MERGE (m:Message {message_id: $message_id})
108
+ SET m.chat_id = $chat_id,
109
+ m.created_on = $created_on,
110
+ m.user_content = $user_content,
111
+ m.assistent_content = $assistent_content
112
+ RETURN m.message_id as id
113
+ """
114
+ await self._client.execute_write(
115
+ query,
116
+ {
117
+ "message_id": message.message_id,
118
+ "chat_id": message.chat_id,
119
+ "created_on": message.created_on,
120
+ "user_content": message.user_content,
121
+ "assistent_content": message.assistant_content,
122
+ },
123
+ )
124
+ return message.message_id
125
+
126
+ async def create_topic_node(self, topic: TopicDTO) -> str:
127
+ """Create or update a Topic node."""
128
+ query = """
129
+ MERGE (t:Topic {topic_id: $topic_id})
130
+ SET t.canonical_name = $canonical_name,
131
+ t.importance = $importance,
132
+ t.recall_strength = $recall_strength
133
+ RETURN t.topic_id as id
134
+ """
135
+ await self._client.execute_write(
136
+ query,
137
+ {
138
+ "topic_id": topic.topic_id,
139
+ "canonical_name": topic.canonical_name,
140
+ "importance": topic.importance,
141
+ "recall_strength": topic.recall_strength,
142
+ },
143
+ )
144
+ return topic.topic_id
145
+
146
+ async def update_topic_node(self, topic: TopicDTO) -> None:
147
+ """Update an existing Topic node."""
148
+ await self.create_topic_node(topic)
149
+
150
+ # Edge operations
151
+ async def create_is_part_of_edge(self, message_id: str, chat_id: str) -> None:
152
+ """Create IS_PART_OF edge: (Message)-[:IS_PART_OF]->(Chat)."""
153
+ query = """
154
+ MATCH (m:Message {message_id: $message_id})
155
+ MATCH (c:Chat {id: $chat_id})
156
+ MERGE (m)-[:IS_PART_OF]->(c)
157
+ """
158
+ await self._client.execute_write(
159
+ query,
160
+ {"message_id": message_id, "chat_id": chat_id},
161
+ )
162
+
163
+ async def create_follows_after_edge(
164
+ self,
165
+ message_id: str,
166
+ previous_message_id: str,
167
+ ) -> None:
168
+ """Create FOLLOWS_AFTER edge: (Message)-[:FOLLOWS_AFTER]->(Message)."""
169
+ query = """
170
+ MATCH (m1:Message {message_id: $message_id})
171
+ MATCH (m2:Message {message_id: $previous_message_id})
172
+ MERGE (m1)-[:FOLLOWS_AFTER]->(m2)
173
+ """
174
+ await self._client.execute_write(
175
+ query,
176
+ {
177
+ "message_id": message_id,
178
+ "previous_message_id": previous_message_id,
179
+ },
180
+ )
181
+
182
+ async def create_is_supported_by_edge(
183
+ self,
184
+ topic_id: str,
185
+ message_id: str,
186
+ evidence: TopicEvidenceDTO,
187
+ ) -> None:
188
+ """Create IS_SUPPORTED_BY edge with evidence properties.
189
+
190
+ (Topic)-[:IS_SUPPORTED_BY {evidence data}]->(Message)
191
+ """
192
+ query = """
193
+ MATCH (t:Topic {topic_id: $topic_id})
194
+ MATCH (m:Message {message_id: $message_id})
195
+ MERGE (t)-[r:IS_SUPPORTED_BY {evidence_id: $evidence_id}]->(m)
196
+ SET r.extracted_name = $extracted_name,
197
+ r.confidence = $confidence,
198
+ r.timestamp = $timestamp
199
+ """
200
+ await self._client.execute_write(
201
+ query,
202
+ {
203
+ "topic_id": topic_id,
204
+ "message_id": message_id,
205
+ "evidence_id": evidence.evidence_id,
206
+ "extracted_name": evidence.extracted_name,
207
+ "confidence": evidence.confidence,
208
+ "timestamp": evidence.timestamp,
209
+ },
210
+ )
211
+
212
+ async def create_potentially_duplicate_of_edge(
213
+ self,
214
+ topic_id: str,
215
+ existing_topic_id: str,
216
+ similarity: float,
217
+ ) -> None:
218
+ """Create POTENTIALLY_DUPLICATE_OF edge between topics."""
219
+ query = """
220
+ MATCH (t1:Topic {topic_id: $topic_id})
221
+ MATCH (t2:Topic {topic_id: $existing_topic_id})
222
+ MERGE (t1)-[r:POTENTIALLY_DUPLICATE_OF]->(t2)
223
+ SET r.similarity = $similarity
224
+ """
225
+ await self._client.execute_write(
226
+ query,
227
+ {
228
+ "topic_id": topic_id,
229
+ "existing_topic_id": existing_topic_id,
230
+ "similarity": similarity,
231
+ },
232
+ )
233
+
234
+ async def create_relates_to_edge(
235
+ self,
236
+ topic_id: str,
237
+ related_topic_id: str,
238
+ relation_type: str,
239
+ weight: float,
240
+ ) -> None:
241
+ """Create RELATES_TO edge between topics."""
242
+ query = """
243
+ MATCH (t1:Topic {topic_id: $topic_id})
244
+ MATCH (t2:Topic {topic_id: $related_topic_id})
245
+ MERGE (t1)-[r:RELATES_TO]->(t2)
246
+ SET r.type = $relation_type,
247
+ r.weight = $weight
248
+ """
249
+ await self._client.execute_write(
250
+ query,
251
+ {
252
+ "topic_id": topic_id,
253
+ "related_topic_id": related_topic_id,
254
+ "relation_type": relation_type,
255
+ "weight": weight,
256
+ },
257
+ )
258
+
259
+ # Query operations
260
+ async def get_messages_for_chat(self, chat_id: str) -> list[MessageDTO]:
261
+ """Get all messages in a chat, ordered by FOLLOWS_AFTER."""
262
+ # Get messages ordered by created_on since FOLLOWS_AFTER may not exist
263
+ query = """
264
+ MATCH (m:Message)-[:IS_PART_OF]->(c:Chat {id: $chat_id})
265
+ RETURN m.message_id as message_id,
266
+ m.chat_id as chat_id,
267
+ m.created_on as created_on
268
+ ORDER BY m.created_on
269
+ """
270
+ records = await self._client.execute_query(query, {"chat_id": chat_id})
271
+ return [
272
+ MessageDTO(
273
+ message_id=r["message_id"],
274
+ chat_id=r["chat_id"],
275
+ created_on=r["created_on"],
276
+ )
277
+ for r in records
278
+ ]
279
+
280
+ async def get_related_topics(
281
+ self,
282
+ topic_id: str,
283
+ limit: int = 10,
284
+ ) -> list[tuple[str, float]]:
285
+ """Get topics related to a given topic."""
286
+ query = """
287
+ MATCH (t1:Topic {topic_id: $topic_id})-[r:RELATES_TO]->(t2:Topic)
288
+ RETURN t2.topic_id as topic_id, r.weight as weight
289
+ ORDER BY r.weight DESC
290
+ LIMIT $limit
291
+ """
292
+ records = await self._client.execute_query(
293
+ query,
294
+ {"topic_id": topic_id, "limit": limit},
295
+ )
296
+ return [(r["topic_id"], r["weight"]) for r in records]
297
+
298
+ async def get_topic_evidence(self, topic_id: str) -> list[dict[str, Any]]:
299
+ """Get all evidence for a topic from IS_SUPPORTED_BY edges."""
300
+ query = """
301
+ MATCH (t:Topic {topic_id: $topic_id})-[r:IS_SUPPORTED_BY]->(m:Message)
302
+ RETURN r.evidence_id as evidence_id,
303
+ r.extracted_name as extracted_name,
304
+ r.confidence as confidence,
305
+ r.timestamp as timestamp,
306
+ m.message_id as source_message_id
307
+ ORDER BY r.timestamp
308
+ """
309
+ records = await self._client.execute_query(query, {"topic_id": topic_id})
310
+ return [
311
+ {
312
+ "evidence_id": r["evidence_id"],
313
+ "topic_id": topic_id,
314
+ "extracted_name": r["extracted_name"],
315
+ "confidence": r["confidence"],
316
+ "timestamp": r["timestamp"],
317
+ "source_message_id": r["source_message_id"],
318
+ }
319
+ for r in records
320
+ ]
321
+
322
+ async def get_chat_topics(self, chat_id: str) -> list[str]:
323
+ """Get all topic IDs associated with a chat's messages."""
324
+ query = """
325
+ MATCH (t:Topic)-[:IS_SUPPORTED_BY]->(m:Message)-[:IS_PART_OF]->(c:Chat {id: $chat_id})
326
+ RETURN DISTINCT t.topic_id as topic_id
327
+ """
328
+ records = await self._client.execute_query(query, {"chat_id": chat_id})
329
+ return [r["topic_id"] for r in records]
@@ -0,0 +1,6 @@
1
+ """Redis infrastructure for bot_knows (optional)."""
2
+
3
+ from bot_knows.infra.redis.cache import EmbeddingCache
4
+ from bot_knows.infra.redis.client import RedisClient
5
+
6
+ __all__ = ["EmbeddingCache", "RedisClient"]
@@ -0,0 +1,198 @@
1
+ """Redis cache implementations for bot_knows.
2
+
3
+ This module provides caching utilities for embeddings
4
+ and frequently accessed data.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ from typing import Any
10
+
11
+ from bot_knows.infra.redis.client import RedisClient
12
+ from bot_knows.logging import get_logger
13
+
14
+ __all__ = [
15
+ "EmbeddingCache",
16
+ "TopicCache",
17
+ ]
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class EmbeddingCache:
23
+ """Redis cache for text embeddings.
24
+
25
+ Caches embeddings to avoid redundant API calls.
26
+ Falls back gracefully if Redis is unavailable.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ redis_client: RedisClient,
32
+ ttl: int = 86400, # 24 hours
33
+ prefix: str = "emb:",
34
+ ) -> None:
35
+ """Initialize embedding cache.
36
+
37
+ Args:
38
+ redis_client: Redis client instance
39
+ ttl: Cache TTL in seconds (default: 24 hours)
40
+ prefix: Key prefix for embedding cache
41
+ """
42
+ self._redis = redis_client
43
+ self._ttl = ttl
44
+ self._prefix = prefix
45
+
46
+ def _make_key(self, text: str) -> str:
47
+ """Generate cache key for text."""
48
+ text_hash = hashlib.sha256(text.encode()).hexdigest()
49
+ return f"{self._prefix}{text_hash}"
50
+
51
+ async def get(self, text: str) -> list[float] | None:
52
+ """Get cached embedding for text.
53
+
54
+ Args:
55
+ text: Input text
56
+
57
+ Returns:
58
+ Embedding vector if cached, None otherwise
59
+ """
60
+ if not self._redis.is_connected:
61
+ return None
62
+
63
+ key = self._make_key(text)
64
+ cached = await self._redis.get(key)
65
+
66
+ if cached:
67
+ try:
68
+ return json.loads(cached)
69
+ except json.JSONDecodeError:
70
+ return None
71
+
72
+ return None
73
+
74
+ async def set(self, text: str, embedding: list[float]) -> bool:
75
+ """Cache embedding for text.
76
+
77
+ Args:
78
+ text: Input text
79
+ embedding: Embedding vector to cache
80
+
81
+ Returns:
82
+ True if cached successfully
83
+ """
84
+ if not self._redis.is_connected:
85
+ return False
86
+
87
+ key = self._make_key(text)
88
+ return await self._redis.set(key, json.dumps(embedding), ex=self._ttl)
89
+
90
+ async def get_or_compute(
91
+ self,
92
+ text: str,
93
+ compute_fn: Any,
94
+ ) -> list[float]:
95
+ """Get cached embedding or compute and cache.
96
+
97
+ Args:
98
+ text: Input text
99
+ compute_fn: Async function to compute embedding if not cached
100
+
101
+ Returns:
102
+ Embedding vector
103
+ """
104
+ # Try cache first
105
+ cached = await self.get(text)
106
+ if cached is not None:
107
+ return cached
108
+
109
+ # Compute embedding
110
+ embedding = await compute_fn(text)
111
+
112
+ # Cache result
113
+ await self.set(text, embedding)
114
+
115
+ return embedding
116
+
117
+
118
+ class TopicCache:
119
+ """Redis cache for hot topics.
120
+
121
+ Caches frequently accessed topic data to reduce
122
+ database lookups.
123
+ """
124
+
125
+ def __init__(
126
+ self,
127
+ redis_client: RedisClient,
128
+ ttl: int = 3600, # 1 hour
129
+ prefix: str = "topic:",
130
+ ) -> None:
131
+ """Initialize topic cache.
132
+
133
+ Args:
134
+ redis_client: Redis client instance
135
+ ttl: Cache TTL in seconds (default: 1 hour)
136
+ prefix: Key prefix for topic cache
137
+ """
138
+ self._redis = redis_client
139
+ self._ttl = ttl
140
+ self._prefix = prefix
141
+
142
+ def _make_key(self, topic_id: str) -> str:
143
+ """Generate cache key for topic."""
144
+ return f"{self._prefix}{topic_id}"
145
+
146
+ async def get(self, topic_id: str) -> dict[str, Any] | None:
147
+ """Get cached topic data.
148
+
149
+ Args:
150
+ topic_id: Topic ID
151
+
152
+ Returns:
153
+ Topic data dict if cached, None otherwise
154
+ """
155
+ if not self._redis.is_connected:
156
+ return None
157
+
158
+ key = self._make_key(topic_id)
159
+ cached = await self._redis.get(key)
160
+
161
+ if cached:
162
+ try:
163
+ return json.loads(cached)
164
+ except json.JSONDecodeError:
165
+ return None
166
+
167
+ return None
168
+
169
+ async def set(self, topic_id: str, data: dict[str, Any]) -> bool:
170
+ """Cache topic data.
171
+
172
+ Args:
173
+ topic_id: Topic ID
174
+ data: Topic data to cache
175
+
176
+ Returns:
177
+ True if cached successfully
178
+ """
179
+ if not self._redis.is_connected:
180
+ return False
181
+
182
+ key = self._make_key(topic_id)
183
+ return await self._redis.set(key, json.dumps(data), ex=self._ttl)
184
+
185
+ async def invalidate(self, topic_id: str) -> bool:
186
+ """Invalidate cached topic.
187
+
188
+ Args:
189
+ topic_id: Topic ID to invalidate
190
+
191
+ Returns:
192
+ True if invalidated successfully
193
+ """
194
+ if not self._redis.is_connected:
195
+ return False
196
+
197
+ key = self._make_key(topic_id)
198
+ return await self._redis.delete(key)