dataknobs-bots 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataknobs_bots/__init__.py +42 -0
- dataknobs_bots/api/__init__.py +42 -0
- dataknobs_bots/api/dependencies.py +140 -0
- dataknobs_bots/api/exceptions.py +289 -0
- dataknobs_bots/bot/__init__.py +15 -0
- dataknobs_bots/bot/base.py +1091 -0
- dataknobs_bots/bot/context.py +102 -0
- dataknobs_bots/bot/manager.py +430 -0
- dataknobs_bots/bot/registry.py +629 -0
- dataknobs_bots/config/__init__.py +39 -0
- dataknobs_bots/config/resolution.py +353 -0
- dataknobs_bots/knowledge/__init__.py +82 -0
- dataknobs_bots/knowledge/query/__init__.py +25 -0
- dataknobs_bots/knowledge/query/expander.py +262 -0
- dataknobs_bots/knowledge/query/transformer.py +288 -0
- dataknobs_bots/knowledge/rag.py +738 -0
- dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
- dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
- dataknobs_bots/knowledge/retrieval/merger.py +279 -0
- dataknobs_bots/memory/__init__.py +56 -0
- dataknobs_bots/memory/base.py +38 -0
- dataknobs_bots/memory/buffer.py +58 -0
- dataknobs_bots/memory/vector.py +188 -0
- dataknobs_bots/middleware/__init__.py +11 -0
- dataknobs_bots/middleware/base.py +92 -0
- dataknobs_bots/middleware/cost.py +421 -0
- dataknobs_bots/middleware/logging.py +184 -0
- dataknobs_bots/reasoning/__init__.py +65 -0
- dataknobs_bots/reasoning/base.py +50 -0
- dataknobs_bots/reasoning/react.py +299 -0
- dataknobs_bots/reasoning/simple.py +51 -0
- dataknobs_bots/registry/__init__.py +41 -0
- dataknobs_bots/registry/backend.py +181 -0
- dataknobs_bots/registry/memory.py +244 -0
- dataknobs_bots/registry/models.py +102 -0
- dataknobs_bots/registry/portability.py +210 -0
- dataknobs_bots/tools/__init__.py +5 -0
- dataknobs_bots/tools/knowledge_search.py +113 -0
- dataknobs_bots/utils/__init__.py +1 -0
- dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
- dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
- dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""In-memory implementation of RegistryBackend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .models import Registration
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class InMemoryBackend:
|
|
13
|
+
"""In-memory implementation of RegistryBackend.
|
|
14
|
+
|
|
15
|
+
Simple dict-based storage suitable for:
|
|
16
|
+
- Testing without database dependencies
|
|
17
|
+
- Single-instance deployments
|
|
18
|
+
- Development environments
|
|
19
|
+
|
|
20
|
+
Not suitable for:
|
|
21
|
+
- Multi-instance deployments (no persistence)
|
|
22
|
+
- Production with persistence requirements
|
|
23
|
+
|
|
24
|
+
Thread-safety is provided via asyncio.Lock.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
```python
|
|
28
|
+
backend = InMemoryBackend()
|
|
29
|
+
await backend.initialize()
|
|
30
|
+
|
|
31
|
+
reg = await backend.register("my-bot", {"llm": {...}})
|
|
32
|
+
print(f"Created at: {reg.created_at}")
|
|
33
|
+
|
|
34
|
+
config = await backend.get_config("my-bot")
|
|
35
|
+
print(f"Config: {config}")
|
|
36
|
+
|
|
37
|
+
# List all bots
|
|
38
|
+
for reg in await backend.list_active():
|
|
39
|
+
print(f"Bot: {reg.bot_id}")
|
|
40
|
+
|
|
41
|
+
# Cleanup
|
|
42
|
+
await backend.close()
|
|
43
|
+
```
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
"""Initialize the in-memory backend."""
|
|
48
|
+
self._registrations: dict[str, Registration] = {}
|
|
49
|
+
self._lock = asyncio.Lock()
|
|
50
|
+
self._initialized = False
|
|
51
|
+
|
|
52
|
+
async def initialize(self) -> None:
|
|
53
|
+
"""Initialize the backend (no-op for in-memory)."""
|
|
54
|
+
self._initialized = True
|
|
55
|
+
|
|
56
|
+
async def close(self) -> None:
|
|
57
|
+
"""Close the backend (clears all data)."""
|
|
58
|
+
async with self._lock:
|
|
59
|
+
self._registrations.clear()
|
|
60
|
+
self._initialized = False
|
|
61
|
+
|
|
62
|
+
async def register(
|
|
63
|
+
self,
|
|
64
|
+
bot_id: str,
|
|
65
|
+
config: dict[str, Any],
|
|
66
|
+
status: str = "active",
|
|
67
|
+
) -> Registration:
|
|
68
|
+
"""Register or update a bot.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
bot_id: Unique bot identifier
|
|
72
|
+
config: Bot configuration dictionary
|
|
73
|
+
status: Registration status (default: active)
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Registration object with metadata
|
|
77
|
+
"""
|
|
78
|
+
async with self._lock:
|
|
79
|
+
now = datetime.now(timezone.utc)
|
|
80
|
+
|
|
81
|
+
if bot_id in self._registrations:
|
|
82
|
+
# Update existing - preserve created_at
|
|
83
|
+
old = self._registrations[bot_id]
|
|
84
|
+
reg = Registration(
|
|
85
|
+
bot_id=bot_id,
|
|
86
|
+
config=config,
|
|
87
|
+
status=status,
|
|
88
|
+
created_at=old.created_at,
|
|
89
|
+
updated_at=now,
|
|
90
|
+
last_accessed_at=now,
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
# Create new
|
|
94
|
+
reg = Registration(
|
|
95
|
+
bot_id=bot_id,
|
|
96
|
+
config=config,
|
|
97
|
+
status=status,
|
|
98
|
+
created_at=now,
|
|
99
|
+
updated_at=now,
|
|
100
|
+
last_accessed_at=now,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self._registrations[bot_id] = reg
|
|
104
|
+
return reg
|
|
105
|
+
|
|
106
|
+
async def get(self, bot_id: str) -> Registration | None:
|
|
107
|
+
"""Get registration and update access time.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
bot_id: Bot identifier
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Registration if found, None otherwise
|
|
114
|
+
"""
|
|
115
|
+
async with self._lock:
|
|
116
|
+
reg = self._registrations.get(bot_id)
|
|
117
|
+
if reg:
|
|
118
|
+
# Update access time
|
|
119
|
+
self._registrations[bot_id] = Registration(
|
|
120
|
+
bot_id=reg.bot_id,
|
|
121
|
+
config=reg.config,
|
|
122
|
+
status=reg.status,
|
|
123
|
+
created_at=reg.created_at,
|
|
124
|
+
updated_at=reg.updated_at,
|
|
125
|
+
last_accessed_at=datetime.now(timezone.utc),
|
|
126
|
+
)
|
|
127
|
+
return self._registrations[bot_id]
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
async def get_config(self, bot_id: str) -> dict[str, Any] | None:
|
|
131
|
+
"""Get just the config.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
bot_id: Bot identifier
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Config dict if found, None otherwise
|
|
138
|
+
"""
|
|
139
|
+
reg = await self.get(bot_id)
|
|
140
|
+
return reg.config if reg else None
|
|
141
|
+
|
|
142
|
+
async def exists(self, bot_id: str) -> bool:
|
|
143
|
+
"""Check if active registration exists.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
bot_id: Bot identifier
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
True if registration exists and is active
|
|
150
|
+
"""
|
|
151
|
+
async with self._lock:
|
|
152
|
+
reg = self._registrations.get(bot_id)
|
|
153
|
+
return reg is not None and reg.status == "active"
|
|
154
|
+
|
|
155
|
+
async def unregister(self, bot_id: str) -> bool:
|
|
156
|
+
"""Hard delete registration.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
bot_id: Bot identifier
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
True if deleted, False if not found
|
|
163
|
+
"""
|
|
164
|
+
async with self._lock:
|
|
165
|
+
if bot_id in self._registrations:
|
|
166
|
+
del self._registrations[bot_id]
|
|
167
|
+
return True
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
async def deactivate(self, bot_id: str) -> bool:
|
|
171
|
+
"""Soft delete (set inactive).
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
bot_id: Bot identifier
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
True if deactivated, False if not found
|
|
178
|
+
"""
|
|
179
|
+
async with self._lock:
|
|
180
|
+
if bot_id in self._registrations:
|
|
181
|
+
reg = self._registrations[bot_id]
|
|
182
|
+
self._registrations[bot_id] = Registration(
|
|
183
|
+
bot_id=reg.bot_id,
|
|
184
|
+
config=reg.config,
|
|
185
|
+
status="inactive",
|
|
186
|
+
created_at=reg.created_at,
|
|
187
|
+
updated_at=datetime.now(timezone.utc),
|
|
188
|
+
last_accessed_at=reg.last_accessed_at,
|
|
189
|
+
)
|
|
190
|
+
return True
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
async def list_active(self) -> list[Registration]:
|
|
194
|
+
"""List active registrations.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of active Registration objects
|
|
198
|
+
"""
|
|
199
|
+
async with self._lock:
|
|
200
|
+
return [
|
|
201
|
+
reg for reg in self._registrations.values() if reg.status == "active"
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
async def list_all(self) -> list[Registration]:
|
|
205
|
+
"""List all registrations.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
List of all Registration objects
|
|
209
|
+
"""
|
|
210
|
+
async with self._lock:
|
|
211
|
+
return list(self._registrations.values())
|
|
212
|
+
|
|
213
|
+
async def list_ids(self) -> list[str]:
|
|
214
|
+
"""List active bot IDs.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List of active bot IDs
|
|
218
|
+
"""
|
|
219
|
+
async with self._lock:
|
|
220
|
+
return [
|
|
221
|
+
reg.bot_id
|
|
222
|
+
for reg in self._registrations.values()
|
|
223
|
+
if reg.status == "active"
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
async def count(self) -> int:
|
|
227
|
+
"""Count active registrations.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Number of active registrations
|
|
231
|
+
"""
|
|
232
|
+
async with self._lock:
|
|
233
|
+
return sum(
|
|
234
|
+
1 for reg in self._registrations.values() if reg.status == "active"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
async def clear(self) -> None:
|
|
238
|
+
"""Clear all registrations."""
|
|
239
|
+
async with self._lock:
|
|
240
|
+
self._registrations.clear()
|
|
241
|
+
|
|
242
|
+
def __repr__(self) -> str:
|
|
243
|
+
"""String representation."""
|
|
244
|
+
return f"InMemoryBackend(count={len(self._registrations)})"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Registration model for bot registry."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Registration:
|
|
12
|
+
"""Bot registration with metadata.
|
|
13
|
+
|
|
14
|
+
Stores a bot configuration along with lifecycle metadata like
|
|
15
|
+
timestamps and status. Used by registry backends to persist
|
|
16
|
+
bot configurations.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
bot_id: Unique bot identifier
|
|
20
|
+
config: Bot configuration dictionary (should be portable)
|
|
21
|
+
status: Registration status (active, inactive, error)
|
|
22
|
+
created_at: When the registration was created
|
|
23
|
+
updated_at: When the registration was last updated
|
|
24
|
+
last_accessed_at: When the bot was last accessed
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
```python
|
|
28
|
+
reg = Registration(
|
|
29
|
+
bot_id="my-bot",
|
|
30
|
+
config={"bot": {"llm": {"$resource": "default", "type": "llm_providers"}}},
|
|
31
|
+
)
|
|
32
|
+
print(f"Bot {reg.bot_id} created at {reg.created_at}")
|
|
33
|
+
|
|
34
|
+
# Serialize for storage
|
|
35
|
+
data = reg.to_dict()
|
|
36
|
+
|
|
37
|
+
# Restore from storage
|
|
38
|
+
restored = Registration.from_dict(data)
|
|
39
|
+
```
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
bot_id: str
|
|
43
|
+
config: dict[str, Any]
|
|
44
|
+
status: str = "active"
|
|
45
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
46
|
+
updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
47
|
+
last_accessed_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
48
|
+
|
|
49
|
+
def to_dict(self) -> dict[str, Any]:
|
|
50
|
+
"""Convert to dictionary for serialization.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Dictionary representation with ISO format timestamps
|
|
54
|
+
"""
|
|
55
|
+
return {
|
|
56
|
+
"bot_id": self.bot_id,
|
|
57
|
+
"config": self.config,
|
|
58
|
+
"status": self.status,
|
|
59
|
+
"created_at": self.created_at.isoformat() if self.created_at else None,
|
|
60
|
+
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
|
61
|
+
"last_accessed_at": (
|
|
62
|
+
self.last_accessed_at.isoformat() if self.last_accessed_at else None
|
|
63
|
+
),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def from_dict(cls, data: dict[str, Any]) -> Registration:
|
|
68
|
+
"""Create from dictionary.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
data: Dictionary with registration data
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Registration instance
|
|
75
|
+
"""
|
|
76
|
+
return cls(
|
|
77
|
+
bot_id=data["bot_id"],
|
|
78
|
+
config=data["config"],
|
|
79
|
+
status=data.get("status", "active"),
|
|
80
|
+
created_at=(
|
|
81
|
+
datetime.fromisoformat(data["created_at"])
|
|
82
|
+
if data.get("created_at")
|
|
83
|
+
else datetime.now(timezone.utc)
|
|
84
|
+
),
|
|
85
|
+
updated_at=(
|
|
86
|
+
datetime.fromisoformat(data["updated_at"])
|
|
87
|
+
if data.get("updated_at")
|
|
88
|
+
else datetime.now(timezone.utc)
|
|
89
|
+
),
|
|
90
|
+
last_accessed_at=(
|
|
91
|
+
datetime.fromisoformat(data["last_accessed_at"])
|
|
92
|
+
if data.get("last_accessed_at")
|
|
93
|
+
else datetime.now(timezone.utc)
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def __repr__(self) -> str:
|
|
98
|
+
"""String representation."""
|
|
99
|
+
return (
|
|
100
|
+
f"Registration(bot_id={self.bot_id!r}, status={self.status!r}, "
|
|
101
|
+
f"created_at={self.created_at.isoformat() if self.created_at else None})"
|
|
102
|
+
)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Portability validation utilities for bot configurations.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to validate that bot configurations
|
|
4
|
+
are portable across environments. Portable configs use $resource
|
|
5
|
+
references instead of hardcoded values like local paths or localhost URLs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PortabilityError(Exception):
|
|
15
|
+
r"""Raised when a config contains non-portable values.
|
|
16
|
+
|
|
17
|
+
Non-portable values include:
|
|
18
|
+
- Local file paths (/Users/..., /home/..., C:\Users\...)
|
|
19
|
+
- Localhost URLs (localhost:port, 127.0.0.1, 0.0.0.0)
|
|
20
|
+
|
|
21
|
+
Portable configs should use $resource references that are
|
|
22
|
+
resolved at runtime based on the environment.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
```python
|
|
26
|
+
# This will raise PortabilityError
|
|
27
|
+
validate_portability({
|
|
28
|
+
"storage": {"path": "/Users/dev/data"} # Local path!
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
# This is OK
|
|
32
|
+
validate_portability({
|
|
33
|
+
"storage": {"$resource": "default", "type": "databases"}
|
|
34
|
+
})
|
|
35
|
+
```
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Patterns that indicate resolved local values (not portable)
|
|
42
|
+
# Note: Windows paths may appear with single or double backslashes depending
|
|
43
|
+
# on whether we're matching against repr() output or actual string values
|
|
44
|
+
SUSPICIOUS_PATTERNS: list[tuple[str, str]] = [
|
|
45
|
+
(r"/Users/\w+", "macOS home directory"),
|
|
46
|
+
(r"/home/\w+", "Linux home directory"),
|
|
47
|
+
(r"C:\\+Users\\+\w+", "Windows home directory"), # Matches C:\Users or C:\\Users
|
|
48
|
+
(r"localhost:\d+", "localhost with port"),
|
|
49
|
+
(r"127\.0\.0\.1", "localhost IP"),
|
|
50
|
+
(r"0\.0\.0\.0", "all interfaces IP"),
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
# Patterns that are OK (environment variable placeholders)
|
|
54
|
+
SAFE_PATTERNS: list[str] = [
|
|
55
|
+
r"\$\{[^}]+\}", # ${VAR} or ${VAR:default}
|
|
56
|
+
r"\$[A-Z_][A-Z0-9_]*", # $VAR
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def validate_portability(
|
|
61
|
+
config: dict[str, Any],
|
|
62
|
+
raise_on_error: bool = True,
|
|
63
|
+
) -> list[str]:
|
|
64
|
+
"""Validate that a config is portable (no resolved local values).
|
|
65
|
+
|
|
66
|
+
Checks for patterns that indicate resolved environment values
|
|
67
|
+
that would break portability across environments.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
config: Configuration dictionary to validate
|
|
71
|
+
raise_on_error: If True, raise PortabilityError; otherwise return issues
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of portability issues found (empty if portable)
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
PortabilityError: If non-portable and raise_on_error=True
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
```python
|
|
81
|
+
# This will raise PortabilityError
|
|
82
|
+
validate_portability({
|
|
83
|
+
"llm": {"api_key": "sk-..."}, # OK - not a path
|
|
84
|
+
"storage": {"path": "/Users/dev/data"}, # NOT OK - local path
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
# Check without raising
|
|
88
|
+
issues = validate_portability(config, raise_on_error=False)
|
|
89
|
+
if issues:
|
|
90
|
+
print(f"Found {len(issues)} portability issues")
|
|
91
|
+
|
|
92
|
+
# This is OK - uses $resource references
|
|
93
|
+
validate_portability({
|
|
94
|
+
"llm": {"$resource": "default", "type": "llm_providers"},
|
|
95
|
+
"storage": {"$resource": "db", "type": "databases"},
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
# Environment variables are OK
|
|
99
|
+
validate_portability({
|
|
100
|
+
"storage": {"path": "${DATA_PATH}"}, # OK - env var placeholder
|
|
101
|
+
})
|
|
102
|
+
```
|
|
103
|
+
"""
|
|
104
|
+
config_str = str(config)
|
|
105
|
+
issues: list[str] = []
|
|
106
|
+
|
|
107
|
+
for pattern, description in SUSPICIOUS_PATTERNS:
|
|
108
|
+
matches = re.findall(pattern, config_str)
|
|
109
|
+
for match in matches:
|
|
110
|
+
# Check if this match is inside a safe pattern (env var)
|
|
111
|
+
is_safe = _is_in_safe_pattern(match, config_str)
|
|
112
|
+
|
|
113
|
+
if not is_safe:
|
|
114
|
+
issues.append(f"Found {description}: '{match}'")
|
|
115
|
+
|
|
116
|
+
if issues and raise_on_error:
|
|
117
|
+
raise PortabilityError(
|
|
118
|
+
"Config appears to contain resolved local values that would break "
|
|
119
|
+
"portability. Store portable config with $resource references instead.\n"
|
|
120
|
+
"Issues found:\n" + "\n".join(f" - {issue}" for issue in issues)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return issues
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _is_in_safe_pattern(match: str, config_str: str) -> bool:
|
|
127
|
+
"""Check if a suspicious match is inside a safe pattern (env var).
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
match: The suspicious string that was matched
|
|
131
|
+
config_str: The full config string
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
True if the match appears inside an env var pattern
|
|
135
|
+
"""
|
|
136
|
+
for safe_pattern in SAFE_PATTERNS:
|
|
137
|
+
# Check if the suspicious pattern appears inside a safe pattern
|
|
138
|
+
# e.g., "${HOME}/data" contains "/home" but it's inside ${...}
|
|
139
|
+
combined_pattern = f"{safe_pattern}[^'\"]*{re.escape(match)}"
|
|
140
|
+
if re.search(combined_pattern, config_str):
|
|
141
|
+
return True
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def has_resource_references(config: dict[str, Any]) -> bool:
|
|
146
|
+
"""Check if config contains $resource references.
|
|
147
|
+
|
|
148
|
+
$resource references indicate a portable config that needs
|
|
149
|
+
environment resolution before use.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
config: Configuration dictionary
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
True if config contains $resource references
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
```python
|
|
159
|
+
# Portable config with $resource refs
|
|
160
|
+
config = {
|
|
161
|
+
"bot": {
|
|
162
|
+
"llm": {"$resource": "default", "type": "llm_providers"},
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
assert has_resource_references(config) is True
|
|
166
|
+
|
|
167
|
+
# Resolved config (no $resource refs)
|
|
168
|
+
config = {
|
|
169
|
+
"bot": {
|
|
170
|
+
"llm": {"provider": "openai", "model": "gpt-4"},
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
assert has_resource_references(config) is False
|
|
174
|
+
```
|
|
175
|
+
"""
|
|
176
|
+
return "$resource" in str(config)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def is_portable(config: dict[str, Any]) -> bool:
|
|
180
|
+
"""Check if config appears to be portable.
|
|
181
|
+
|
|
182
|
+
A config is considered portable if it either:
|
|
183
|
+
- Contains $resource references (for late binding), or
|
|
184
|
+
- Contains no suspicious local values
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
config: Configuration dictionary
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
True if config appears to be portable
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
```python
|
|
194
|
+
# Portable: uses $resource
|
|
195
|
+
assert is_portable({"llm": {"$resource": "default"}}) is True
|
|
196
|
+
|
|
197
|
+
# Portable: no local paths
|
|
198
|
+
assert is_portable({"llm": {"provider": "openai"}}) is True
|
|
199
|
+
|
|
200
|
+
# Not portable: contains local path
|
|
201
|
+
assert is_portable({"path": "/Users/dev/data"}) is False
|
|
202
|
+
```
|
|
203
|
+
"""
|
|
204
|
+
# If it has $resource refs, it's portable (will be resolved later)
|
|
205
|
+
if has_resource_references(config):
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
# Otherwise, check for suspicious patterns
|
|
209
|
+
issues = validate_portability(config, raise_on_error=False)
|
|
210
|
+
return len(issues) == 0
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Knowledge search tool for RAG integration."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from dataknobs_llm.tools import Tool
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KnowledgeSearchTool(Tool):
|
|
9
|
+
"""Tool for searching the knowledge base.
|
|
10
|
+
|
|
11
|
+
This tool allows LLMs to search the bot's knowledge base
|
|
12
|
+
for relevant information during conversations.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
```python
|
|
16
|
+
# Create tool with knowledge base
|
|
17
|
+
tool = KnowledgeSearchTool(knowledge_base=kb)
|
|
18
|
+
|
|
19
|
+
# Register with bot
|
|
20
|
+
bot.tool_registry.register_tool(tool)
|
|
21
|
+
|
|
22
|
+
# LLM can now call the tool
|
|
23
|
+
results = await tool.execute(
|
|
24
|
+
query="How do I configure the database?",
|
|
25
|
+
max_results=3
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, knowledge_base: Any, name: str = "knowledge_search"):
|
|
31
|
+
"""Initialize knowledge search tool.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
knowledge_base: RAGKnowledgeBase instance to search
|
|
35
|
+
name: Tool name (default: knowledge_search)
|
|
36
|
+
"""
|
|
37
|
+
super().__init__(
|
|
38
|
+
name=name,
|
|
39
|
+
description="Search the knowledge base for relevant information. "
|
|
40
|
+
"Use this when you need to find documentation, examples, or "
|
|
41
|
+
"specific information to answer user questions.",
|
|
42
|
+
)
|
|
43
|
+
self.knowledge_base = knowledge_base
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def schema(self) -> dict[str, Any]:
|
|
47
|
+
"""Get JSON schema for tool parameters.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
JSON Schema for the tool parameters
|
|
51
|
+
"""
|
|
52
|
+
return {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"properties": {
|
|
55
|
+
"query": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"description": "The search query or question to find information about",
|
|
58
|
+
},
|
|
59
|
+
"max_results": {
|
|
60
|
+
"type": "integer",
|
|
61
|
+
"description": "Maximum number of results to return",
|
|
62
|
+
"default": 3,
|
|
63
|
+
"minimum": 1,
|
|
64
|
+
"maximum": 10,
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
"required": ["query"],
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async def execute(self, query: str, max_results: int = 3, **kwargs: Any) -> dict[str, Any]:
|
|
71
|
+
"""Execute knowledge base search.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
query: Search query text
|
|
75
|
+
max_results: Maximum number of results (default: 3)
|
|
76
|
+
**kwargs: Additional arguments (ignored)
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dictionary with search results:
|
|
80
|
+
- query: Original query
|
|
81
|
+
- results: List of relevant chunks
|
|
82
|
+
- num_results: Number of results found
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
86
|
+
result = await tool.execute(
|
|
87
|
+
query="How do I configure the database?",
|
|
88
|
+
max_results=3
|
|
89
|
+
)
|
|
90
|
+
for chunk in result['results']:
|
|
91
|
+
print(f"{chunk['heading_path']}: {chunk['text']}")
|
|
92
|
+
```
|
|
93
|
+
"""
|
|
94
|
+
# Clamp max_results to valid range
|
|
95
|
+
max_results = max(1, min(10, max_results))
|
|
96
|
+
|
|
97
|
+
# Search knowledge base
|
|
98
|
+
results = await self.knowledge_base.query(query, k=max_results)
|
|
99
|
+
|
|
100
|
+
# Format response
|
|
101
|
+
return {
|
|
102
|
+
"query": query,
|
|
103
|
+
"results": [
|
|
104
|
+
{
|
|
105
|
+
"text": r["text"],
|
|
106
|
+
"source": r["source"],
|
|
107
|
+
"heading": r["heading_path"],
|
|
108
|
+
"similarity": round(r["similarity"], 3),
|
|
109
|
+
}
|
|
110
|
+
for r in results
|
|
111
|
+
],
|
|
112
|
+
"num_results": len(results),
|
|
113
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility functions and helpers for the dataknobs_bots package."""
|