dataknobs-bots 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataknobs_bots/__init__.py +42 -0
- dataknobs_bots/api/__init__.py +42 -0
- dataknobs_bots/api/dependencies.py +140 -0
- dataknobs_bots/api/exceptions.py +289 -0
- dataknobs_bots/bot/__init__.py +15 -0
- dataknobs_bots/bot/base.py +1091 -0
- dataknobs_bots/bot/context.py +102 -0
- dataknobs_bots/bot/manager.py +430 -0
- dataknobs_bots/bot/registry.py +629 -0
- dataknobs_bots/config/__init__.py +39 -0
- dataknobs_bots/config/resolution.py +353 -0
- dataknobs_bots/knowledge/__init__.py +82 -0
- dataknobs_bots/knowledge/query/__init__.py +25 -0
- dataknobs_bots/knowledge/query/expander.py +262 -0
- dataknobs_bots/knowledge/query/transformer.py +288 -0
- dataknobs_bots/knowledge/rag.py +738 -0
- dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
- dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
- dataknobs_bots/knowledge/retrieval/merger.py +279 -0
- dataknobs_bots/memory/__init__.py +56 -0
- dataknobs_bots/memory/base.py +38 -0
- dataknobs_bots/memory/buffer.py +58 -0
- dataknobs_bots/memory/vector.py +188 -0
- dataknobs_bots/middleware/__init__.py +11 -0
- dataknobs_bots/middleware/base.py +92 -0
- dataknobs_bots/middleware/cost.py +421 -0
- dataknobs_bots/middleware/logging.py +184 -0
- dataknobs_bots/reasoning/__init__.py +65 -0
- dataknobs_bots/reasoning/base.py +50 -0
- dataknobs_bots/reasoning/react.py +299 -0
- dataknobs_bots/reasoning/simple.py +51 -0
- dataknobs_bots/registry/__init__.py +41 -0
- dataknobs_bots/registry/backend.py +181 -0
- dataknobs_bots/registry/memory.py +244 -0
- dataknobs_bots/registry/models.py +102 -0
- dataknobs_bots/registry/portability.py +210 -0
- dataknobs_bots/tools/__init__.py +5 -0
- dataknobs_bots/tools/knowledge_search.py +113 -0
- dataknobs_bots/utils/__init__.py +1 -0
- dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
- dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
- dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Vector-based semantic memory implementation."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from .base import Memory
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VectorMemory(Memory):
|
|
13
|
+
"""Vector-based semantic memory using dataknobs-data vector stores.
|
|
14
|
+
|
|
15
|
+
This implementation stores messages with vector embeddings and retrieves
|
|
16
|
+
relevant messages based on semantic similarity.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
vector_store: Vector store backend from dataknobs_data.vector.stores
|
|
20
|
+
embedding_provider: LLM provider for generating embeddings
|
|
21
|
+
max_results: Maximum number of results to return
|
|
22
|
+
similarity_threshold: Minimum similarity score for results
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
vector_store: Any,
|
|
28
|
+
embedding_provider: Any,
|
|
29
|
+
max_results: int = 5,
|
|
30
|
+
similarity_threshold: float = 0.7,
|
|
31
|
+
):
|
|
32
|
+
"""Initialize vector memory.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
vector_store: Vector store backend instance
|
|
36
|
+
embedding_provider: LLM provider with embed() method
|
|
37
|
+
max_results: Maximum number of similar messages to return
|
|
38
|
+
similarity_threshold: Minimum similarity score (0-1)
|
|
39
|
+
"""
|
|
40
|
+
self.vector_store = vector_store
|
|
41
|
+
self.embedding_provider = embedding_provider
|
|
42
|
+
self.max_results = max_results
|
|
43
|
+
self.similarity_threshold = similarity_threshold
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
async def from_config(cls, config: dict[str, Any]) -> "VectorMemory":
|
|
47
|
+
"""Create VectorMemory from configuration.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
config: Configuration dictionary with:
|
|
51
|
+
- backend: Vector store backend type
|
|
52
|
+
- dimension: Vector dimension (optional, depends on backend)
|
|
53
|
+
- collection: Collection/index name (optional)
|
|
54
|
+
- embedding_provider: LLM provider name for embeddings
|
|
55
|
+
- embedding_model: Model to use for embeddings
|
|
56
|
+
- max_results: Max results to return (default 5)
|
|
57
|
+
- similarity_threshold: Min similarity score (default 0.7)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Configured VectorMemory instance
|
|
61
|
+
"""
|
|
62
|
+
from dataknobs_data.vector.stores import VectorStoreFactory
|
|
63
|
+
from dataknobs_llm.llm import LLMProviderFactory
|
|
64
|
+
|
|
65
|
+
# Create vector store
|
|
66
|
+
store_config = {
|
|
67
|
+
"backend": config.get("backend", "memory"),
|
|
68
|
+
"dimensions": config.get("dimension", 1536),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Add optional store parameters
|
|
72
|
+
if "collection" in config:
|
|
73
|
+
store_config["collection_name"] = config["collection"]
|
|
74
|
+
if "persist_path" in config:
|
|
75
|
+
store_config["persist_path"] = config["persist_path"]
|
|
76
|
+
|
|
77
|
+
# Merge any additional store_params
|
|
78
|
+
if "store_params" in config:
|
|
79
|
+
store_config.update(config["store_params"])
|
|
80
|
+
|
|
81
|
+
factory = VectorStoreFactory()
|
|
82
|
+
vector_store = factory.create(**store_config)
|
|
83
|
+
await vector_store.initialize()
|
|
84
|
+
|
|
85
|
+
# Create embedding provider
|
|
86
|
+
llm_factory = LLMProviderFactory(is_async=True)
|
|
87
|
+
embedding_provider = llm_factory.create({
|
|
88
|
+
"provider": config.get("embedding_provider", "openai"),
|
|
89
|
+
"model": config.get("embedding_model", "text-embedding-ada-002"),
|
|
90
|
+
})
|
|
91
|
+
await embedding_provider.initialize()
|
|
92
|
+
|
|
93
|
+
return cls(
|
|
94
|
+
vector_store=vector_store,
|
|
95
|
+
embedding_provider=embedding_provider,
|
|
96
|
+
max_results=config.get("max_results", 5),
|
|
97
|
+
similarity_threshold=config.get("similarity_threshold", 0.7),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
async def add_message(
|
|
101
|
+
self, content: str, role: str, metadata: dict[str, Any] | None = None
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Add message with vector embedding.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
content: Message content
|
|
107
|
+
role: Message role
|
|
108
|
+
metadata: Optional metadata
|
|
109
|
+
"""
|
|
110
|
+
# Generate embedding
|
|
111
|
+
embedding = await self.embedding_provider.embed(content)
|
|
112
|
+
|
|
113
|
+
# Convert to numpy array if needed
|
|
114
|
+
if not isinstance(embedding, np.ndarray):
|
|
115
|
+
embedding = np.array(embedding, dtype=np.float32)
|
|
116
|
+
|
|
117
|
+
# Prepare metadata
|
|
118
|
+
msg_metadata = {
|
|
119
|
+
"content": content,
|
|
120
|
+
"role": role,
|
|
121
|
+
"timestamp": datetime.now().isoformat(),
|
|
122
|
+
"id": str(uuid4()),
|
|
123
|
+
}
|
|
124
|
+
if metadata:
|
|
125
|
+
msg_metadata.update(metadata)
|
|
126
|
+
|
|
127
|
+
# Store in vector store
|
|
128
|
+
await self.vector_store.add_vectors(
|
|
129
|
+
vectors=[embedding], ids=[msg_metadata["id"]], metadata=[msg_metadata]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
async def get_context(self, current_message: str) -> list[dict[str, Any]]:
|
|
133
|
+
"""Get semantically relevant messages.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
current_message: Current message to find context for
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
List of relevant message dictionaries sorted by similarity
|
|
140
|
+
"""
|
|
141
|
+
# Generate query embedding
|
|
142
|
+
query_embedding = await self.embedding_provider.embed(current_message)
|
|
143
|
+
|
|
144
|
+
# Convert to numpy array if needed
|
|
145
|
+
if not isinstance(query_embedding, np.ndarray):
|
|
146
|
+
query_embedding = np.array(query_embedding, dtype=np.float32)
|
|
147
|
+
|
|
148
|
+
# Search for similar vectors
|
|
149
|
+
results = await self.vector_store.search(
|
|
150
|
+
query_vector=query_embedding,
|
|
151
|
+
k=self.max_results,
|
|
152
|
+
include_metadata=True,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Format results
|
|
156
|
+
context = []
|
|
157
|
+
for _vector_id, similarity, msg_metadata in results:
|
|
158
|
+
if msg_metadata and similarity >= self.similarity_threshold:
|
|
159
|
+
context.append(
|
|
160
|
+
{
|
|
161
|
+
"content": msg_metadata.get("content", ""),
|
|
162
|
+
"role": msg_metadata.get("role", ""),
|
|
163
|
+
"similarity": similarity,
|
|
164
|
+
"metadata": msg_metadata,
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return context
|
|
169
|
+
|
|
170
|
+
async def clear(self) -> None:
|
|
171
|
+
"""Clear all vectors from memory.
|
|
172
|
+
|
|
173
|
+
Note: This deletes all vectors in the store. Use with caution
|
|
174
|
+
if the store is shared across multiple memory instances.
|
|
175
|
+
"""
|
|
176
|
+
# Get all vector IDs and delete them
|
|
177
|
+
# Note: This is a simplified implementation
|
|
178
|
+
# In production, you might want to track IDs separately
|
|
179
|
+
# or use collection-level clearing if supported
|
|
180
|
+
if hasattr(self.vector_store, "clear"):
|
|
181
|
+
await self.vector_store.clear()
|
|
182
|
+
else:
|
|
183
|
+
# Fallback: delete individual vectors if we track them
|
|
184
|
+
# For now, we'll raise an error suggesting to use a new instance
|
|
185
|
+
raise NotImplementedError(
|
|
186
|
+
"Vector store does not support clearing. "
|
|
187
|
+
"Consider creating a new VectorMemory instance with a fresh collection."
|
|
188
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Middleware components for bot request/response lifecycle."""
|
|
2
|
+
|
|
3
|
+
from .base import Middleware
|
|
4
|
+
from .cost import CostTrackingMiddleware
|
|
5
|
+
from .logging import LoggingMiddleware
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Middleware",
|
|
9
|
+
"CostTrackingMiddleware",
|
|
10
|
+
"LoggingMiddleware",
|
|
11
|
+
]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Base middleware interface for bot request/response lifecycle."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from dataknobs_bots.bot.context import BotContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Middleware(ABC):
|
|
10
|
+
"""Abstract base class for bot middleware.
|
|
11
|
+
|
|
12
|
+
Middleware provides hooks into the bot request/response lifecycle:
|
|
13
|
+
- before_message: Called before processing user message
|
|
14
|
+
- after_message: Called after generating bot response (non-streaming)
|
|
15
|
+
- post_stream: Called after streaming response completes
|
|
16
|
+
- on_error: Called when an error occurs
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
```python
|
|
20
|
+
class MyMiddleware(Middleware):
|
|
21
|
+
async def before_message(self, message: str, context: BotContext) -> None:
|
|
22
|
+
print(f"Processing: {message}")
|
|
23
|
+
|
|
24
|
+
async def after_message(
|
|
25
|
+
self, response: str, context: BotContext, **kwargs: Any
|
|
26
|
+
) -> None:
|
|
27
|
+
print(f"Response: {response}")
|
|
28
|
+
|
|
29
|
+
async def post_stream(
|
|
30
|
+
self, message: str, response: str, context: BotContext
|
|
31
|
+
) -> None:
|
|
32
|
+
print(f"Streamed response to '{message}': {response}")
|
|
33
|
+
|
|
34
|
+
async def on_error(
|
|
35
|
+
self, error: Exception, message: str, context: BotContext
|
|
36
|
+
) -> None:
|
|
37
|
+
print(f"Error: {error}")
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
async def before_message(self, message: str, context: BotContext) -> None:
|
|
43
|
+
"""Called before processing user message.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
message: User's input message
|
|
47
|
+
context: Bot context with conversation and user info
|
|
48
|
+
"""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
async def after_message(
|
|
53
|
+
self, response: str, context: BotContext, **kwargs: Any
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Called after generating bot response (non-streaming).
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
response: Bot's generated response
|
|
59
|
+
context: Bot context
|
|
60
|
+
**kwargs: Additional data (e.g., tokens_used, response_time_ms, provider, model)
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def post_stream(
|
|
66
|
+
self, message: str, response: str, context: BotContext
|
|
67
|
+
) -> None:
|
|
68
|
+
"""Called after streaming response completes.
|
|
69
|
+
|
|
70
|
+
This hook is called after stream_chat() finishes streaming all chunks.
|
|
71
|
+
It provides both the original user message and the complete accumulated
|
|
72
|
+
response, useful for logging, analytics, or post-processing.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
message: Original user message that triggered the stream
|
|
76
|
+
response: Complete accumulated response from streaming
|
|
77
|
+
context: Bot context
|
|
78
|
+
"""
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
async def on_error(
|
|
83
|
+
self, error: Exception, message: str, context: BotContext
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Called when an error occurs during message processing.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
error: The exception that occurred
|
|
89
|
+
message: User message that caused the error
|
|
90
|
+
context: Bot context
|
|
91
|
+
"""
|
|
92
|
+
...
|