dataknobs-bots 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. dataknobs_bots/__init__.py +42 -0
  2. dataknobs_bots/api/__init__.py +42 -0
  3. dataknobs_bots/api/dependencies.py +140 -0
  4. dataknobs_bots/api/exceptions.py +289 -0
  5. dataknobs_bots/bot/__init__.py +15 -0
  6. dataknobs_bots/bot/base.py +1091 -0
  7. dataknobs_bots/bot/context.py +102 -0
  8. dataknobs_bots/bot/manager.py +430 -0
  9. dataknobs_bots/bot/registry.py +629 -0
  10. dataknobs_bots/config/__init__.py +39 -0
  11. dataknobs_bots/config/resolution.py +353 -0
  12. dataknobs_bots/knowledge/__init__.py +82 -0
  13. dataknobs_bots/knowledge/query/__init__.py +25 -0
  14. dataknobs_bots/knowledge/query/expander.py +262 -0
  15. dataknobs_bots/knowledge/query/transformer.py +288 -0
  16. dataknobs_bots/knowledge/rag.py +738 -0
  17. dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
  18. dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
  19. dataknobs_bots/knowledge/retrieval/merger.py +279 -0
  20. dataknobs_bots/memory/__init__.py +56 -0
  21. dataknobs_bots/memory/base.py +38 -0
  22. dataknobs_bots/memory/buffer.py +58 -0
  23. dataknobs_bots/memory/vector.py +188 -0
  24. dataknobs_bots/middleware/__init__.py +11 -0
  25. dataknobs_bots/middleware/base.py +92 -0
  26. dataknobs_bots/middleware/cost.py +421 -0
  27. dataknobs_bots/middleware/logging.py +184 -0
  28. dataknobs_bots/reasoning/__init__.py +65 -0
  29. dataknobs_bots/reasoning/base.py +50 -0
  30. dataknobs_bots/reasoning/react.py +299 -0
  31. dataknobs_bots/reasoning/simple.py +51 -0
  32. dataknobs_bots/registry/__init__.py +41 -0
  33. dataknobs_bots/registry/backend.py +181 -0
  34. dataknobs_bots/registry/memory.py +244 -0
  35. dataknobs_bots/registry/models.py +102 -0
  36. dataknobs_bots/registry/portability.py +210 -0
  37. dataknobs_bots/tools/__init__.py +5 -0
  38. dataknobs_bots/tools/knowledge_search.py +113 -0
  39. dataknobs_bots/utils/__init__.py +1 -0
  40. dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
  41. dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
  42. dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
@@ -0,0 +1,188 @@
1
+ """Vector-based semantic memory implementation."""
2
+
3
+ from datetime import datetime
4
+ from typing import Any
5
+ from uuid import uuid4
6
+
7
+ import numpy as np
8
+
9
+ from .base import Memory
10
+
11
+
12
+ class VectorMemory(Memory):
13
+ """Vector-based semantic memory using dataknobs-data vector stores.
14
+
15
+ This implementation stores messages with vector embeddings and retrieves
16
+ relevant messages based on semantic similarity.
17
+
18
+ Attributes:
19
+ vector_store: Vector store backend from dataknobs_data.vector.stores
20
+ embedding_provider: LLM provider for generating embeddings
21
+ max_results: Maximum number of results to return
22
+ similarity_threshold: Minimum similarity score for results
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ vector_store: Any,
28
+ embedding_provider: Any,
29
+ max_results: int = 5,
30
+ similarity_threshold: float = 0.7,
31
+ ):
32
+ """Initialize vector memory.
33
+
34
+ Args:
35
+ vector_store: Vector store backend instance
36
+ embedding_provider: LLM provider with embed() method
37
+ max_results: Maximum number of similar messages to return
38
+ similarity_threshold: Minimum similarity score (0-1)
39
+ """
40
+ self.vector_store = vector_store
41
+ self.embedding_provider = embedding_provider
42
+ self.max_results = max_results
43
+ self.similarity_threshold = similarity_threshold
44
+
45
+ @classmethod
46
+ async def from_config(cls, config: dict[str, Any]) -> "VectorMemory":
47
+ """Create VectorMemory from configuration.
48
+
49
+ Args:
50
+ config: Configuration dictionary with:
51
+ - backend: Vector store backend type
52
+ - dimension: Vector dimension (optional, depends on backend)
53
+ - collection: Collection/index name (optional)
54
+ - embedding_provider: LLM provider name for embeddings
55
+ - embedding_model: Model to use for embeddings
56
+ - max_results: Max results to return (default 5)
57
+ - similarity_threshold: Min similarity score (default 0.7)
58
+
59
+ Returns:
60
+ Configured VectorMemory instance
61
+ """
62
+ from dataknobs_data.vector.stores import VectorStoreFactory
63
+ from dataknobs_llm.llm import LLMProviderFactory
64
+
65
+ # Create vector store
66
+ store_config = {
67
+ "backend": config.get("backend", "memory"),
68
+ "dimensions": config.get("dimension", 1536),
69
+ }
70
+
71
+ # Add optional store parameters
72
+ if "collection" in config:
73
+ store_config["collection_name"] = config["collection"]
74
+ if "persist_path" in config:
75
+ store_config["persist_path"] = config["persist_path"]
76
+
77
+ # Merge any additional store_params
78
+ if "store_params" in config:
79
+ store_config.update(config["store_params"])
80
+
81
+ factory = VectorStoreFactory()
82
+ vector_store = factory.create(**store_config)
83
+ await vector_store.initialize()
84
+
85
+ # Create embedding provider
86
+ llm_factory = LLMProviderFactory(is_async=True)
87
+ embedding_provider = llm_factory.create({
88
+ "provider": config.get("embedding_provider", "openai"),
89
+ "model": config.get("embedding_model", "text-embedding-ada-002"),
90
+ })
91
+ await embedding_provider.initialize()
92
+
93
+ return cls(
94
+ vector_store=vector_store,
95
+ embedding_provider=embedding_provider,
96
+ max_results=config.get("max_results", 5),
97
+ similarity_threshold=config.get("similarity_threshold", 0.7),
98
+ )
99
+
100
+ async def add_message(
101
+ self, content: str, role: str, metadata: dict[str, Any] | None = None
102
+ ) -> None:
103
+ """Add message with vector embedding.
104
+
105
+ Args:
106
+ content: Message content
107
+ role: Message role
108
+ metadata: Optional metadata
109
+ """
110
+ # Generate embedding
111
+ embedding = await self.embedding_provider.embed(content)
112
+
113
+ # Convert to numpy array if needed
114
+ if not isinstance(embedding, np.ndarray):
115
+ embedding = np.array(embedding, dtype=np.float32)
116
+
117
+ # Prepare metadata
118
+ msg_metadata = {
119
+ "content": content,
120
+ "role": role,
121
+ "timestamp": datetime.now().isoformat(),
122
+ "id": str(uuid4()),
123
+ }
124
+ if metadata:
125
+ msg_metadata.update(metadata)
126
+
127
+ # Store in vector store
128
+ await self.vector_store.add_vectors(
129
+ vectors=[embedding], ids=[msg_metadata["id"]], metadata=[msg_metadata]
130
+ )
131
+
132
+ async def get_context(self, current_message: str) -> list[dict[str, Any]]:
133
+ """Get semantically relevant messages.
134
+
135
+ Args:
136
+ current_message: Current message to find context for
137
+
138
+ Returns:
139
+ List of relevant message dictionaries sorted by similarity
140
+ """
141
+ # Generate query embedding
142
+ query_embedding = await self.embedding_provider.embed(current_message)
143
+
144
+ # Convert to numpy array if needed
145
+ if not isinstance(query_embedding, np.ndarray):
146
+ query_embedding = np.array(query_embedding, dtype=np.float32)
147
+
148
+ # Search for similar vectors
149
+ results = await self.vector_store.search(
150
+ query_vector=query_embedding,
151
+ k=self.max_results,
152
+ include_metadata=True,
153
+ )
154
+
155
+ # Format results
156
+ context = []
157
+ for _vector_id, similarity, msg_metadata in results:
158
+ if msg_metadata and similarity >= self.similarity_threshold:
159
+ context.append(
160
+ {
161
+ "content": msg_metadata.get("content", ""),
162
+ "role": msg_metadata.get("role", ""),
163
+ "similarity": similarity,
164
+ "metadata": msg_metadata,
165
+ }
166
+ )
167
+
168
+ return context
169
+
170
+ async def clear(self) -> None:
171
+ """Clear all vectors from memory.
172
+
173
+ Note: This deletes all vectors in the store. Use with caution
174
+ if the store is shared across multiple memory instances.
175
+ """
176
+ # Get all vector IDs and delete them
177
+ # Note: This is a simplified implementation
178
+ # In production, you might want to track IDs separately
179
+ # or use collection-level clearing if supported
180
+ if hasattr(self.vector_store, "clear"):
181
+ await self.vector_store.clear()
182
+ else:
183
+ # Fallback: delete individual vectors if we track them
184
+ # For now, we'll raise an error suggesting to use a new instance
185
+ raise NotImplementedError(
186
+ "Vector store does not support clearing. "
187
+ "Consider creating a new VectorMemory instance with a fresh collection."
188
+ )
@@ -0,0 +1,11 @@
1
+ """Middleware components for bot request/response lifecycle."""
2
+
3
+ from .base import Middleware
4
+ from .cost import CostTrackingMiddleware
5
+ from .logging import LoggingMiddleware
6
+
7
+ __all__ = [
8
+ "Middleware",
9
+ "CostTrackingMiddleware",
10
+ "LoggingMiddleware",
11
+ ]
@@ -0,0 +1,92 @@
1
+ """Base middleware interface for bot request/response lifecycle."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+ from dataknobs_bots.bot.context import BotContext
7
+
8
+
9
+ class Middleware(ABC):
10
+ """Abstract base class for bot middleware.
11
+
12
+ Middleware provides hooks into the bot request/response lifecycle:
13
+ - before_message: Called before processing user message
14
+ - after_message: Called after generating bot response (non-streaming)
15
+ - post_stream: Called after streaming response completes
16
+ - on_error: Called when an error occurs
17
+
18
+ Example:
19
+ ```python
20
+ class MyMiddleware(Middleware):
21
+ async def before_message(self, message: str, context: BotContext) -> None:
22
+ print(f"Processing: {message}")
23
+
24
+ async def after_message(
25
+ self, response: str, context: BotContext, **kwargs: Any
26
+ ) -> None:
27
+ print(f"Response: {response}")
28
+
29
+ async def post_stream(
30
+ self, message: str, response: str, context: BotContext
31
+ ) -> None:
32
+ print(f"Streamed response to '{message}': {response}")
33
+
34
+ async def on_error(
35
+ self, error: Exception, message: str, context: BotContext
36
+ ) -> None:
37
+ print(f"Error: {error}")
38
+ ```
39
+ """
40
+
41
+ @abstractmethod
42
+ async def before_message(self, message: str, context: BotContext) -> None:
43
+ """Called before processing user message.
44
+
45
+ Args:
46
+ message: User's input message
47
+ context: Bot context with conversation and user info
48
+ """
49
+ ...
50
+
51
+ @abstractmethod
52
+ async def after_message(
53
+ self, response: str, context: BotContext, **kwargs: Any
54
+ ) -> None:
55
+ """Called after generating bot response (non-streaming).
56
+
57
+ Args:
58
+ response: Bot's generated response
59
+ context: Bot context
60
+ **kwargs: Additional data (e.g., tokens_used, response_time_ms, provider, model)
61
+ """
62
+ ...
63
+
64
+ @abstractmethod
65
+ async def post_stream(
66
+ self, message: str, response: str, context: BotContext
67
+ ) -> None:
68
+ """Called after streaming response completes.
69
+
70
+ This hook is called after stream_chat() finishes streaming all chunks.
71
+ It provides both the original user message and the complete accumulated
72
+ response, useful for logging, analytics, or post-processing.
73
+
74
+ Args:
75
+ message: Original user message that triggered the stream
76
+ response: Complete accumulated response from streaming
77
+ context: Bot context
78
+ """
79
+ ...
80
+
81
+ @abstractmethod
82
+ async def on_error(
83
+ self, error: Exception, message: str, context: BotContext
84
+ ) -> None:
85
+ """Called when an error occurs during message processing.
86
+
87
+ Args:
88
+ error: The exception that occurred
89
+ message: User message that caused the error
90
+ context: Bot context
91
+ """
92
+ ...