bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bot_knows/__init__.py +70 -0
  2. bot_knows/config.py +115 -0
  3. bot_knows/domain/__init__.py +5 -0
  4. bot_knows/domain/chat.py +62 -0
  5. bot_knows/domain/message.py +64 -0
  6. bot_knows/domain/relation.py +56 -0
  7. bot_knows/domain/topic.py +132 -0
  8. bot_knows/domain/topic_evidence.py +55 -0
  9. bot_knows/importers/__init__.py +12 -0
  10. bot_knows/importers/base.py +116 -0
  11. bot_knows/importers/chatgpt.py +154 -0
  12. bot_knows/importers/claude.py +172 -0
  13. bot_knows/importers/generic_json.py +272 -0
  14. bot_knows/importers/registry.py +125 -0
  15. bot_knows/infra/__init__.py +5 -0
  16. bot_knows/infra/llm/__init__.py +6 -0
  17. bot_knows/infra/llm/anthropic_provider.py +172 -0
  18. bot_knows/infra/llm/openai_provider.py +195 -0
  19. bot_knows/infra/mongo/__init__.py +5 -0
  20. bot_knows/infra/mongo/client.py +145 -0
  21. bot_knows/infra/mongo/repositories.py +348 -0
  22. bot_knows/infra/neo4j/__init__.py +5 -0
  23. bot_knows/infra/neo4j/client.py +152 -0
  24. bot_knows/infra/neo4j/graph_repository.py +329 -0
  25. bot_knows/infra/redis/__init__.py +6 -0
  26. bot_knows/infra/redis/cache.py +198 -0
  27. bot_knows/infra/redis/client.py +193 -0
  28. bot_knows/interfaces/__init__.py +18 -0
  29. bot_knows/interfaces/embedding.py +55 -0
  30. bot_knows/interfaces/graph.py +194 -0
  31. bot_knows/interfaces/llm.py +70 -0
  32. bot_knows/interfaces/recall.py +92 -0
  33. bot_knows/interfaces/storage.py +225 -0
  34. bot_knows/logging.py +101 -0
  35. bot_knows/models/__init__.py +22 -0
  36. bot_knows/models/chat.py +55 -0
  37. bot_knows/models/ingest.py +70 -0
  38. bot_knows/models/message.py +49 -0
  39. bot_knows/models/recall.py +58 -0
  40. bot_knows/models/topic.py +100 -0
  41. bot_knows/orchestrator.py +398 -0
  42. bot_knows/py.typed +0 -0
  43. bot_knows/services/__init__.py +24 -0
  44. bot_knows/services/chat_processing.py +182 -0
  45. bot_knows/services/dedup_service.py +161 -0
  46. bot_knows/services/graph_service.py +217 -0
  47. bot_knows/services/message_builder.py +135 -0
  48. bot_knows/services/recall_service.py +296 -0
  49. bot_knows/services/tasks.py +128 -0
  50. bot_knows/services/topic_extraction.py +199 -0
  51. bot_knows/utils/__init__.py +22 -0
  52. bot_knows/utils/hashing.py +126 -0
  53. bot_knows-0.1.0.dist-info/METADATA +294 -0
  54. bot_knows-0.1.0.dist-info/RECORD +56 -0
  55. bot_knows-0.1.0.dist-info/WHEEL +4 -0
  56. bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,193 @@
1
+ """Redis client for bot_knows.
2
+
3
+ This module provides an optional async Redis client wrapper.
4
+ Redis is used for caching and is optional - if not configured,
5
+ caching will be disabled gracefully.
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from redis.asyncio import Redis
11
+
12
+ from bot_knows.config import RedisSettings
13
+ from bot_knows.logging import get_logger
14
+
15
+ __all__ = [
16
+ "RedisClient",
17
+ ]
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class RedisClient:
23
+ """Async Redis client wrapper (optional).
24
+
25
+ Provides connection management for Redis caching.
26
+ If Redis is not configured or unavailable, operations
27
+ will fail gracefully.
28
+
29
+ Example:
30
+ client = RedisClient(settings)
31
+ if await client.connect():
32
+ await client.set("key", "value")
33
+ value = await client.get("key")
34
+ await client.disconnect()
35
+ """
36
+
37
+ def __init__(self, settings: RedisSettings):
38
+ """Initialize client with settings.
39
+
40
+ Args:
41
+ settings: Redis connection settings
42
+ """
43
+ self._settings = settings
44
+ self._redis: Redis | None = None # type: ignore[type-arg]
45
+ self._connected = False
46
+
47
+ @property
48
+ def is_enabled(self) -> bool:
49
+ """Check if Redis is enabled in configuration."""
50
+ return self._settings.enabled and self._settings.url is not None
51
+
52
+ @property
53
+ def is_connected(self) -> bool:
54
+ """Check if connected to Redis."""
55
+ return self._connected
56
+
57
+ async def connect(self) -> bool:
58
+ """Initialize connection to Redis.
59
+
60
+ Returns:
61
+ True if connected successfully, False otherwise
62
+ """
63
+ if self._redis is not None:
64
+ return self._connected
65
+
66
+ if not self.is_enabled:
67
+ logger.info("redis_disabled", reason="not configured")
68
+ return False
69
+
70
+ try:
71
+ self._redis = Redis.from_url(
72
+ self._settings.url, # type: ignore[arg-type]
73
+ decode_responses=True,
74
+ )
75
+ # Verify connection
76
+ await self._redis.ping()
77
+ self._connected = True
78
+ logger.info("connected_to_redis", url=self._settings.url)
79
+ return True
80
+ except Exception as e:
81
+ logger.warning(
82
+ "redis_connection_failed",
83
+ error=str(e),
84
+ reason="Redis unavailable, caching disabled",
85
+ )
86
+ self._redis = None
87
+ self._connected = False
88
+ return False
89
+
90
+ async def disconnect(self) -> None:
91
+ """Close connection to Redis."""
92
+ if self._redis:
93
+ await self._redis.close()
94
+ self._redis = None
95
+ self._connected = False
96
+ logger.info("disconnected_from_redis")
97
+
98
+ @property
99
+ def client(self) -> Redis | None: # type: ignore[type-arg]
100
+ """Get Redis client instance.
101
+
102
+ Returns:
103
+ Redis client if connected, None otherwise
104
+ """
105
+ return self._redis if self._connected else None
106
+
107
+ async def get(self, key: str) -> str | None:
108
+ """Get value from Redis.
109
+
110
+ Args:
111
+ key: Cache key
112
+
113
+ Returns:
114
+ Cached value or None if not found/not connected
115
+ """
116
+ if not self._connected or not self._redis:
117
+ return None
118
+ try:
119
+ result = await self._redis.get(key)
120
+ return result # type: ignore[return-value]
121
+ except Exception as e:
122
+ logger.debug("redis_get_error", key=key, error=str(e))
123
+ return None
124
+
125
+ async def set(
126
+ self,
127
+ key: str,
128
+ value: str,
129
+ ex: int | None = None,
130
+ ) -> bool:
131
+ """Set value in Redis.
132
+
133
+ Args:
134
+ key: Cache key
135
+ value: Value to cache
136
+ ex: Expiration time in seconds
137
+
138
+ Returns:
139
+ True if successful, False otherwise
140
+ """
141
+ if not self._connected or not self._redis:
142
+ return False
143
+ try:
144
+ await self._redis.set(key, value, ex=ex)
145
+ return True
146
+ except Exception as e:
147
+ logger.debug("redis_set_error", key=key, error=str(e))
148
+ return False
149
+
150
+ async def delete(self, key: str) -> bool:
151
+ """Delete key from Redis.
152
+
153
+ Args:
154
+ key: Cache key to delete
155
+
156
+ Returns:
157
+ True if successful, False otherwise
158
+ """
159
+ if not self._connected or not self._redis:
160
+ return False
161
+ try:
162
+ await self._redis.delete(key)
163
+ return True
164
+ except Exception as e:
165
+ logger.debug("redis_delete_error", key=key, error=str(e))
166
+ return False
167
+
168
+ async def exists(self, key: str) -> bool:
169
+ """Check if key exists in Redis.
170
+
171
+ Args:
172
+ key: Cache key to check
173
+
174
+ Returns:
175
+ True if exists, False otherwise
176
+ """
177
+ if not self._connected or not self._redis:
178
+ return False
179
+ try:
180
+ result = await self._redis.exists(key)
181
+ return bool(result)
182
+ except Exception as e:
183
+ logger.debug("redis_exists_error", key=key, error=str(e))
184
+ return False
185
+
186
+ async def __aenter__(self) -> "RedisClient":
187
+ """Async context manager entry."""
188
+ await self.connect()
189
+ return self
190
+
191
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
192
+ """Async context manager exit."""
193
+ await self.disconnect()
@@ -0,0 +1,18 @@
1
+ """Interface contracts for bot_knows.
2
+
3
+ This module exports all Protocol-based interfaces for dependency injection.
4
+ """
5
+
6
+ from bot_knows.interfaces.embedding import EmbeddingServiceInterface
7
+ from bot_knows.interfaces.graph import GraphServiceInterface
8
+ from bot_knows.interfaces.llm import LLMInterface
9
+ from bot_knows.interfaces.recall import RecallServiceInterface
10
+ from bot_knows.interfaces.storage import StorageInterface
11
+
12
+ __all__ = [
13
+ "EmbeddingServiceInterface",
14
+ "GraphServiceInterface",
15
+ "LLMInterface",
16
+ "RecallServiceInterface",
17
+ "StorageInterface",
18
+ ]
@@ -0,0 +1,55 @@
1
+ """Embedding service interface for bot_knows.
2
+
3
+ This module defines the Protocol for embedding generation services.
4
+ """
5
+
6
+ from typing import ClassVar, Protocol, runtime_checkable
7
+
8
+ __all__ = [
9
+ "EmbeddingServiceInterface",
10
+ ]
11
+
12
+
13
+ @runtime_checkable
14
+ class EmbeddingServiceInterface(Protocol):
15
+ """Contract for embedding generation services.
16
+
17
+ Implementations should provide methods for generating
18
+ embeddings from text and computing similarity scores.
19
+ """
20
+
21
+ config_class: ClassVar[type | None] = None
22
+
23
+ async def embed(self, text: str) -> list[float]:
24
+ """Generate embedding vector for text.
25
+
26
+ Args:
27
+ text: Input text to embed
28
+
29
+ Returns:
30
+ Embedding vector as list of floats
31
+ """
32
+ ...
33
+
34
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
35
+ """Generate embeddings for multiple texts.
36
+
37
+ Args:
38
+ texts: List of input texts
39
+
40
+ Returns:
41
+ List of embedding vectors
42
+ """
43
+ ...
44
+
45
+ async def similarity(self, embedding1: list[float], embedding2: list[float]) -> float:
46
+ """Compute cosine similarity between embeddings.
47
+
48
+ Args:
49
+ embedding1: First embedding vector
50
+ embedding2: Second embedding vector
51
+
52
+ Returns:
53
+ Similarity score between 0.0 and 1.0
54
+ """
55
+ ...
@@ -0,0 +1,194 @@
1
+ """Graph service interface for bot_knows.
2
+
3
+ This module defines the Protocol for graph database operations.
4
+ """
5
+
6
+ from typing import Any, ClassVar, Protocol, runtime_checkable
7
+
8
+ from bot_knows.models.chat import ChatDTO
9
+ from bot_knows.models.message import MessageDTO
10
+ from bot_knows.models.topic import TopicDTO, TopicEvidenceDTO
11
+
12
+ __all__ = [
13
+ "GraphServiceInterface",
14
+ ]
15
+
16
+
17
+ @runtime_checkable
18
+ class GraphServiceInterface(Protocol):
19
+ """Contract for graph database operations.
20
+
21
+ Implementations should provide methods for creating nodes,
22
+ edges, and querying the knowledge graph.
23
+ """
24
+
25
+ config_class: ClassVar[type | None] = None
26
+
27
+ # Node operations
28
+ async def create_chat_node(self, chat: ChatDTO) -> str:
29
+ """Create a Chat node in the graph.
30
+
31
+ Args:
32
+ chat: Chat data to store
33
+
34
+ Returns:
35
+ Node ID
36
+ """
37
+ ...
38
+
39
+ async def create_message_node(self, message: MessageDTO) -> str:
40
+ """Create a Message node in the graph.
41
+
42
+ Args:
43
+ message: Message data to store
44
+
45
+ Returns:
46
+ Node ID
47
+ """
48
+ ...
49
+
50
+ async def create_topic_node(self, topic: TopicDTO) -> str:
51
+ """Create a Topic node in the graph.
52
+
53
+ Args:
54
+ topic: Topic data to store
55
+
56
+ Returns:
57
+ Node ID
58
+ """
59
+ ...
60
+
61
+ async def update_topic_node(self, topic: TopicDTO) -> None:
62
+ """Update an existing Topic node.
63
+
64
+ Args:
65
+ topic: Updated topic data
66
+ """
67
+ ...
68
+
69
+ # Edge operations
70
+ async def create_is_part_of_edge(self, message_id: str, chat_id: str) -> None:
71
+ """Create IS_PART_OF edge: (Message)-[:IS_PART_OF]->(Chat).
72
+
73
+ Args:
74
+ message_id: Source message ID
75
+ chat_id: Target chat ID
76
+ """
77
+ ...
78
+
79
+ async def create_follows_after_edge(
80
+ self,
81
+ message_id: str,
82
+ previous_message_id: str,
83
+ ) -> None:
84
+ """Create FOLLOWS_AFTER edge: (Message)-[:FOLLOWS_AFTER]->(Message).
85
+
86
+ Args:
87
+ message_id: Current message ID
88
+ previous_message_id: Previous message ID
89
+ """
90
+ ...
91
+
92
+ async def create_is_supported_by_edge(
93
+ self,
94
+ topic_id: str,
95
+ message_id: str,
96
+ evidence: TopicEvidenceDTO,
97
+ ) -> None:
98
+ """Create IS_SUPPORTED_BY edge with evidence properties.
99
+
100
+ (Topic)-[:IS_SUPPORTED_BY {evidence properties}]->(Message)
101
+
102
+ Args:
103
+ topic_id: Topic ID
104
+ message_id: Supporting message ID
105
+ evidence: Evidence data to store as edge properties
106
+ """
107
+ ...
108
+
109
+ async def create_potentially_duplicate_of_edge(
110
+ self,
111
+ topic_id: str,
112
+ existing_topic_id: str,
113
+ similarity: float,
114
+ ) -> None:
115
+ """Create POTENTIALLY_DUPLICATE_OF edge between topics.
116
+
117
+ Args:
118
+ topic_id: New topic ID
119
+ existing_topic_id: Existing similar topic ID
120
+ similarity: Similarity score between topics
121
+ """
122
+ ...
123
+
124
+ async def create_relates_to_edge(
125
+ self,
126
+ topic_id: str,
127
+ related_topic_id: str,
128
+ relation_type: str,
129
+ weight: float,
130
+ ) -> None:
131
+ """Create RELATES_TO edge between topics.
132
+
133
+ Args:
134
+ topic_id: Source topic ID
135
+ related_topic_id: Related topic ID
136
+ relation_type: Type of relationship
137
+ weight: Relationship weight (0.0 - 1.0)
138
+ """
139
+ ...
140
+
141
+ # Query operations
142
+ async def get_messages_for_chat(self, chat_id: str) -> list[MessageDTO]:
143
+ """Get all messages in a chat, ordered by FOLLOWS_AFTER.
144
+
145
+ Args:
146
+ chat_id: Chat ID to query
147
+
148
+ Returns:
149
+ List of messages in order
150
+ """
151
+ ...
152
+
153
+ async def get_related_topics(
154
+ self,
155
+ topic_id: str,
156
+ limit: int = 10,
157
+ ) -> list[tuple[str, float]]:
158
+ """Get topics related to a given topic.
159
+
160
+ Args:
161
+ topic_id: Topic to find relations for
162
+ limit: Maximum number of results
163
+
164
+ Returns:
165
+ List of (topic_id, weight) tuples
166
+ """
167
+ ...
168
+
169
+ async def get_topic_evidence(
170
+ self,
171
+ topic_id: str,
172
+ ) -> list[dict[str, Any]]:
173
+ """Get all evidence for a topic from IS_SUPPORTED_BY edges.
174
+
175
+ Args:
176
+ topic_id: Topic ID
177
+
178
+ Returns:
179
+ List of evidence properties from edges
180
+ """
181
+ ...
182
+
183
+ async def get_chat_topics(self, chat_id: str) -> list[str]:
184
+ """Get all topic IDs associated with a chat's messages.
185
+
186
+ Traverses: (Chat)<-[:IS_PART_OF]-(Message)<-[:IS_SUPPORTED_BY]-(Topic)
187
+
188
+ Args:
189
+ chat_id: Chat ID to query
190
+
191
+ Returns:
192
+ List of unique topic IDs
193
+ """
194
+ ...
@@ -0,0 +1,70 @@
1
+ """LLM interface for bot_knows.
2
+
3
+ This module defines the Protocol for LLM interactions
4
+ including chat classification and topic extraction.
5
+ """
6
+
7
+ from typing import ClassVar, Protocol, runtime_checkable
8
+
9
+ from bot_knows.models.chat import ChatCategory
10
+
11
+ __all__ = [
12
+ "LLMInterface",
13
+ ]
14
+
15
+
16
+ @runtime_checkable
17
+ class LLMInterface(Protocol):
18
+ """Contract for LLM interactions.
19
+
20
+ Implementations should provide methods for classifying chats
21
+ and extracting topics from messages.
22
+ """
23
+
24
+ config_class: ClassVar[type | None] = None
25
+
26
+ async def classify_chat(
27
+ self,
28
+ first_pair: tuple[str, str],
29
+ last_pair: tuple[str, str],
30
+ ) -> tuple[ChatCategory, list[str]]:
31
+ """Classify a chat and extract tags.
32
+
33
+ Uses the first and last user-assistant pairs to determine
34
+ the chat's category and relevant tags.
35
+
36
+ Args:
37
+ first_pair: (user_message, assistant_message) from start of chat
38
+ last_pair: (user_message, assistant_message) from end of chat
39
+
40
+ Returns:
41
+ Tuple of (ChatCategory, list of tags)
42
+ """
43
+ ...
44
+
45
+ async def extract_topics(
46
+ self,
47
+ user_content: str,
48
+ assistant_content: str,
49
+ ) -> list[tuple[str, float]]:
50
+ """Extract topic candidates from a message pair.
51
+
52
+ Args:
53
+ user_content: User's message content
54
+ assistant_content: Assistant's response content
55
+
56
+ Returns:
57
+ List of (topic_name, confidence) tuples
58
+ """
59
+ ...
60
+
61
+ async def normalize_topic_name(self, extracted_name: str) -> str:
62
+ """Normalize a topic name to canonical form.
63
+
64
+ Args:
65
+ extracted_name: Raw extracted topic name
66
+
67
+ Returns:
68
+ Normalized canonical topic name
69
+ """
70
+ ...
@@ -0,0 +1,92 @@
1
+ """Recall service interface for bot_knows.
2
+
3
+ This module defines the Protocol for recall/spaced repetition operations.
4
+ """
5
+
6
+ from typing import Literal, Protocol, runtime_checkable
7
+
8
+ from bot_knows.models.recall import RecallItemDTO, TopicRecallStateDTO
9
+
10
+ __all__ = [
11
+ "RecallServiceInterface",
12
+ ]
13
+
14
+
15
+ @runtime_checkable
16
+ class RecallServiceInterface(Protocol):
17
+ """Contract for recall/spaced repetition operations.
18
+
19
+ Implementations should provide methods for reinforcing topics,
20
+ applying decay, and getting topics due for review.
21
+ """
22
+
23
+ async def reinforce(
24
+ self,
25
+ topic_id: str,
26
+ confidence: float,
27
+ novelty_factor: float = 1.0,
28
+ context: Literal["passive", "active", "recall"] = "passive",
29
+ ) -> TopicRecallStateDTO:
30
+ """Reinforce a topic's recall strength.
31
+
32
+ Formula:
33
+ delta = confidence * novelty_factor * context_weight
34
+ strength = min(1.0, strength + delta)
35
+ stability += k * confidence
36
+
37
+ Args:
38
+ topic_id: Topic to reinforce
39
+ confidence: Evidence confidence (0.0 - 1.0)
40
+ novelty_factor: How novel this reinforcement is
41
+ context: Reinforcement context (passive=0.2, active=0.6, recall=1.0)
42
+
43
+ Returns:
44
+ Updated recall state
45
+ """
46
+ ...
47
+
48
+ async def apply_decay(
49
+ self,
50
+ topic_id: str,
51
+ current_time: int | None = None,
52
+ ) -> TopicRecallStateDTO:
53
+ """Apply time-based decay to a topic.
54
+
55
+ Formula:
56
+ strength *= exp(-Δt / (stability * 86400))
57
+
58
+ Args:
59
+ topic_id: Topic to decay
60
+ current_time: Current time in epoch seconds (default: now)
61
+
62
+ Returns:
63
+ Updated recall state
64
+ """
65
+ ...
66
+
67
+ async def batch_decay_update(self) -> int:
68
+ """Apply decay to all topics (scheduled task).
69
+
70
+ Returns:
71
+ Number of topics updated
72
+ """
73
+ ...
74
+
75
+ async def get_due_topics(
76
+ self,
77
+ threshold: float = 0.3,
78
+ limit: int = 10,
79
+ ) -> list[RecallItemDTO]:
80
+ """Get topics due for recall review.
81
+
82
+ Topics with strength below threshold are considered due.
83
+ Results are sorted by due_score (higher = more urgent).
84
+
85
+ Args:
86
+ threshold: Strength threshold for being "due"
87
+ limit: Maximum number of topics to return
88
+
89
+ Returns:
90
+ List of RecallItemDTO sorted by priority
91
+ """
92
+ ...