dataknobs-bots 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. dataknobs_bots/__init__.py +42 -0
  2. dataknobs_bots/api/__init__.py +42 -0
  3. dataknobs_bots/api/dependencies.py +140 -0
  4. dataknobs_bots/api/exceptions.py +289 -0
  5. dataknobs_bots/bot/__init__.py +15 -0
  6. dataknobs_bots/bot/base.py +1091 -0
  7. dataknobs_bots/bot/context.py +102 -0
  8. dataknobs_bots/bot/manager.py +430 -0
  9. dataknobs_bots/bot/registry.py +629 -0
  10. dataknobs_bots/config/__init__.py +39 -0
  11. dataknobs_bots/config/resolution.py +353 -0
  12. dataknobs_bots/knowledge/__init__.py +82 -0
  13. dataknobs_bots/knowledge/query/__init__.py +25 -0
  14. dataknobs_bots/knowledge/query/expander.py +262 -0
  15. dataknobs_bots/knowledge/query/transformer.py +288 -0
  16. dataknobs_bots/knowledge/rag.py +738 -0
  17. dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
  18. dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
  19. dataknobs_bots/knowledge/retrieval/merger.py +279 -0
  20. dataknobs_bots/memory/__init__.py +56 -0
  21. dataknobs_bots/memory/base.py +38 -0
  22. dataknobs_bots/memory/buffer.py +58 -0
  23. dataknobs_bots/memory/vector.py +188 -0
  24. dataknobs_bots/middleware/__init__.py +11 -0
  25. dataknobs_bots/middleware/base.py +92 -0
  26. dataknobs_bots/middleware/cost.py +421 -0
  27. dataknobs_bots/middleware/logging.py +184 -0
  28. dataknobs_bots/reasoning/__init__.py +65 -0
  29. dataknobs_bots/reasoning/base.py +50 -0
  30. dataknobs_bots/reasoning/react.py +299 -0
  31. dataknobs_bots/reasoning/simple.py +51 -0
  32. dataknobs_bots/registry/__init__.py +41 -0
  33. dataknobs_bots/registry/backend.py +181 -0
  34. dataknobs_bots/registry/memory.py +244 -0
  35. dataknobs_bots/registry/models.py +102 -0
  36. dataknobs_bots/registry/portability.py +210 -0
  37. dataknobs_bots/tools/__init__.py +5 -0
  38. dataknobs_bots/tools/knowledge_search.py +113 -0
  39. dataknobs_bots/utils/__init__.py +1 -0
  40. dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
  41. dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
  42. dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
@@ -0,0 +1,244 @@
1
+ """In-memory implementation of RegistryBackend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from datetime import datetime, timezone
7
+ from typing import Any
8
+
9
+ from .models import Registration
10
+
11
+
12
+ class InMemoryBackend:
13
+ """In-memory implementation of RegistryBackend.
14
+
15
+ Simple dict-based storage suitable for:
16
+ - Testing without database dependencies
17
+ - Single-instance deployments
18
+ - Development environments
19
+
20
+ Not suitable for:
21
+ - Multi-instance deployments (no persistence)
22
+ - Production with persistence requirements
23
+
24
+ Thread-safety is provided via asyncio.Lock.
25
+
26
+ Example:
27
+ ```python
28
+ backend = InMemoryBackend()
29
+ await backend.initialize()
30
+
31
+ reg = await backend.register("my-bot", {"llm": {...}})
32
+ print(f"Created at: {reg.created_at}")
33
+
34
+ config = await backend.get_config("my-bot")
35
+ print(f"Config: {config}")
36
+
37
+ # List all bots
38
+ for reg in await backend.list_active():
39
+ print(f"Bot: {reg.bot_id}")
40
+
41
+ # Cleanup
42
+ await backend.close()
43
+ ```
44
+ """
45
+
46
+ def __init__(self) -> None:
47
+ """Initialize the in-memory backend."""
48
+ self._registrations: dict[str, Registration] = {}
49
+ self._lock = asyncio.Lock()
50
+ self._initialized = False
51
+
52
+ async def initialize(self) -> None:
53
+ """Initialize the backend (no-op for in-memory)."""
54
+ self._initialized = True
55
+
56
+ async def close(self) -> None:
57
+ """Close the backend (clears all data)."""
58
+ async with self._lock:
59
+ self._registrations.clear()
60
+ self._initialized = False
61
+
62
+ async def register(
63
+ self,
64
+ bot_id: str,
65
+ config: dict[str, Any],
66
+ status: str = "active",
67
+ ) -> Registration:
68
+ """Register or update a bot.
69
+
70
+ Args:
71
+ bot_id: Unique bot identifier
72
+ config: Bot configuration dictionary
73
+ status: Registration status (default: active)
74
+
75
+ Returns:
76
+ Registration object with metadata
77
+ """
78
+ async with self._lock:
79
+ now = datetime.now(timezone.utc)
80
+
81
+ if bot_id in self._registrations:
82
+ # Update existing - preserve created_at
83
+ old = self._registrations[bot_id]
84
+ reg = Registration(
85
+ bot_id=bot_id,
86
+ config=config,
87
+ status=status,
88
+ created_at=old.created_at,
89
+ updated_at=now,
90
+ last_accessed_at=now,
91
+ )
92
+ else:
93
+ # Create new
94
+ reg = Registration(
95
+ bot_id=bot_id,
96
+ config=config,
97
+ status=status,
98
+ created_at=now,
99
+ updated_at=now,
100
+ last_accessed_at=now,
101
+ )
102
+
103
+ self._registrations[bot_id] = reg
104
+ return reg
105
+
106
+ async def get(self, bot_id: str) -> Registration | None:
107
+ """Get registration and update access time.
108
+
109
+ Args:
110
+ bot_id: Bot identifier
111
+
112
+ Returns:
113
+ Registration if found, None otherwise
114
+ """
115
+ async with self._lock:
116
+ reg = self._registrations.get(bot_id)
117
+ if reg:
118
+ # Update access time
119
+ self._registrations[bot_id] = Registration(
120
+ bot_id=reg.bot_id,
121
+ config=reg.config,
122
+ status=reg.status,
123
+ created_at=reg.created_at,
124
+ updated_at=reg.updated_at,
125
+ last_accessed_at=datetime.now(timezone.utc),
126
+ )
127
+ return self._registrations[bot_id]
128
+ return None
129
+
130
+ async def get_config(self, bot_id: str) -> dict[str, Any] | None:
131
+ """Get just the config.
132
+
133
+ Args:
134
+ bot_id: Bot identifier
135
+
136
+ Returns:
137
+ Config dict if found, None otherwise
138
+ """
139
+ reg = await self.get(bot_id)
140
+ return reg.config if reg else None
141
+
142
+ async def exists(self, bot_id: str) -> bool:
143
+ """Check if active registration exists.
144
+
145
+ Args:
146
+ bot_id: Bot identifier
147
+
148
+ Returns:
149
+ True if registration exists and is active
150
+ """
151
+ async with self._lock:
152
+ reg = self._registrations.get(bot_id)
153
+ return reg is not None and reg.status == "active"
154
+
155
+ async def unregister(self, bot_id: str) -> bool:
156
+ """Hard delete registration.
157
+
158
+ Args:
159
+ bot_id: Bot identifier
160
+
161
+ Returns:
162
+ True if deleted, False if not found
163
+ """
164
+ async with self._lock:
165
+ if bot_id in self._registrations:
166
+ del self._registrations[bot_id]
167
+ return True
168
+ return False
169
+
170
+ async def deactivate(self, bot_id: str) -> bool:
171
+ """Soft delete (set inactive).
172
+
173
+ Args:
174
+ bot_id: Bot identifier
175
+
176
+ Returns:
177
+ True if deactivated, False if not found
178
+ """
179
+ async with self._lock:
180
+ if bot_id in self._registrations:
181
+ reg = self._registrations[bot_id]
182
+ self._registrations[bot_id] = Registration(
183
+ bot_id=reg.bot_id,
184
+ config=reg.config,
185
+ status="inactive",
186
+ created_at=reg.created_at,
187
+ updated_at=datetime.now(timezone.utc),
188
+ last_accessed_at=reg.last_accessed_at,
189
+ )
190
+ return True
191
+ return False
192
+
193
+ async def list_active(self) -> list[Registration]:
194
+ """List active registrations.
195
+
196
+ Returns:
197
+ List of active Registration objects
198
+ """
199
+ async with self._lock:
200
+ return [
201
+ reg for reg in self._registrations.values() if reg.status == "active"
202
+ ]
203
+
204
+ async def list_all(self) -> list[Registration]:
205
+ """List all registrations.
206
+
207
+ Returns:
208
+ List of all Registration objects
209
+ """
210
+ async with self._lock:
211
+ return list(self._registrations.values())
212
+
213
+ async def list_ids(self) -> list[str]:
214
+ """List active bot IDs.
215
+
216
+ Returns:
217
+ List of active bot IDs
218
+ """
219
+ async with self._lock:
220
+ return [
221
+ reg.bot_id
222
+ for reg in self._registrations.values()
223
+ if reg.status == "active"
224
+ ]
225
+
226
+ async def count(self) -> int:
227
+ """Count active registrations.
228
+
229
+ Returns:
230
+ Number of active registrations
231
+ """
232
+ async with self._lock:
233
+ return sum(
234
+ 1 for reg in self._registrations.values() if reg.status == "active"
235
+ )
236
+
237
+ async def clear(self) -> None:
238
+ """Clear all registrations."""
239
+ async with self._lock:
240
+ self._registrations.clear()
241
+
242
+ def __repr__(self) -> str:
243
+ """String representation."""
244
+ return f"InMemoryBackend(count={len(self._registrations)})"
@@ -0,0 +1,102 @@
1
+ """Registration model for bot registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class Registration:
12
+ """Bot registration with metadata.
13
+
14
+ Stores a bot configuration along with lifecycle metadata like
15
+ timestamps and status. Used by registry backends to persist
16
+ bot configurations.
17
+
18
+ Attributes:
19
+ bot_id: Unique bot identifier
20
+ config: Bot configuration dictionary (should be portable)
21
+ status: Registration status (active, inactive, error)
22
+ created_at: When the registration was created
23
+ updated_at: When the registration was last updated
24
+ last_accessed_at: When the bot was last accessed
25
+
26
+ Example:
27
+ ```python
28
+ reg = Registration(
29
+ bot_id="my-bot",
30
+ config={"bot": {"llm": {"$resource": "default", "type": "llm_providers"}}},
31
+ )
32
+ print(f"Bot {reg.bot_id} created at {reg.created_at}")
33
+
34
+ # Serialize for storage
35
+ data = reg.to_dict()
36
+
37
+ # Restore from storage
38
+ restored = Registration.from_dict(data)
39
+ ```
40
+ """
41
+
42
+ bot_id: str
43
+ config: dict[str, Any]
44
+ status: str = "active"
45
+ created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
46
+ updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
47
+ last_accessed_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
48
+
49
+ def to_dict(self) -> dict[str, Any]:
50
+ """Convert to dictionary for serialization.
51
+
52
+ Returns:
53
+ Dictionary representation with ISO format timestamps
54
+ """
55
+ return {
56
+ "bot_id": self.bot_id,
57
+ "config": self.config,
58
+ "status": self.status,
59
+ "created_at": self.created_at.isoformat() if self.created_at else None,
60
+ "updated_at": self.updated_at.isoformat() if self.updated_at else None,
61
+ "last_accessed_at": (
62
+ self.last_accessed_at.isoformat() if self.last_accessed_at else None
63
+ ),
64
+ }
65
+
66
+ @classmethod
67
+ def from_dict(cls, data: dict[str, Any]) -> Registration:
68
+ """Create from dictionary.
69
+
70
+ Args:
71
+ data: Dictionary with registration data
72
+
73
+ Returns:
74
+ Registration instance
75
+ """
76
+ return cls(
77
+ bot_id=data["bot_id"],
78
+ config=data["config"],
79
+ status=data.get("status", "active"),
80
+ created_at=(
81
+ datetime.fromisoformat(data["created_at"])
82
+ if data.get("created_at")
83
+ else datetime.now(timezone.utc)
84
+ ),
85
+ updated_at=(
86
+ datetime.fromisoformat(data["updated_at"])
87
+ if data.get("updated_at")
88
+ else datetime.now(timezone.utc)
89
+ ),
90
+ last_accessed_at=(
91
+ datetime.fromisoformat(data["last_accessed_at"])
92
+ if data.get("last_accessed_at")
93
+ else datetime.now(timezone.utc)
94
+ ),
95
+ )
96
+
97
+ def __repr__(self) -> str:
98
+ """String representation."""
99
+ return (
100
+ f"Registration(bot_id={self.bot_id!r}, status={self.status!r}, "
101
+ f"created_at={self.created_at.isoformat() if self.created_at else None})"
102
+ )
@@ -0,0 +1,210 @@
1
+ """Portability validation utilities for bot configurations.
2
+
3
+ This module provides utilities to validate that bot configurations
4
+ are portable across environments. Portable configs use $resource
5
+ references instead of hardcoded values like local paths or localhost URLs.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from typing import Any
12
+
13
+
14
+ class PortabilityError(Exception):
15
+ r"""Raised when a config contains non-portable values.
16
+
17
+ Non-portable values include:
18
+ - Local file paths (/Users/..., /home/..., C:\Users\...)
19
+ - Localhost URLs (localhost:port, 127.0.0.1, 0.0.0.0)
20
+
21
+ Portable configs should use $resource references that are
22
+ resolved at runtime based on the environment.
23
+
24
+ Example:
25
+ ```python
26
+ # This will raise PortabilityError
27
+ validate_portability({
28
+ "storage": {"path": "/Users/dev/data"} # Local path!
29
+ })
30
+
31
+ # This is OK
32
+ validate_portability({
33
+ "storage": {"$resource": "default", "type": "databases"}
34
+ })
35
+ ```
36
+ """
37
+
38
+ pass
39
+
40
+
41
+ # Patterns that indicate resolved local values (not portable)
42
+ # Note: Windows paths may appear with single or double backslashes depending
43
+ # on whether we're matching against repr() output or actual string values
44
+ SUSPICIOUS_PATTERNS: list[tuple[str, str]] = [
45
+ (r"/Users/\w+", "macOS home directory"),
46
+ (r"/home/\w+", "Linux home directory"),
47
+ (r"C:\\+Users\\+\w+", "Windows home directory"), # Matches C:\Users or C:\\Users
48
+ (r"localhost:\d+", "localhost with port"),
49
+ (r"127\.0\.0\.1", "localhost IP"),
50
+ (r"0\.0\.0\.0", "all interfaces IP"),
51
+ ]
52
+
53
+ # Patterns that are OK (environment variable placeholders)
54
+ SAFE_PATTERNS: list[str] = [
55
+ r"\$\{[^}]+\}", # ${VAR} or ${VAR:default}
56
+ r"\$[A-Z_][A-Z0-9_]*", # $VAR
57
+ ]
58
+
59
+
60
+ def validate_portability(
61
+ config: dict[str, Any],
62
+ raise_on_error: bool = True,
63
+ ) -> list[str]:
64
+ """Validate that a config is portable (no resolved local values).
65
+
66
+ Checks for patterns that indicate resolved environment values
67
+ that would break portability across environments.
68
+
69
+ Args:
70
+ config: Configuration dictionary to validate
71
+ raise_on_error: If True, raise PortabilityError; otherwise return issues
72
+
73
+ Returns:
74
+ List of portability issues found (empty if portable)
75
+
76
+ Raises:
77
+ PortabilityError: If non-portable and raise_on_error=True
78
+
79
+ Example:
80
+ ```python
81
+ # This will raise PortabilityError
82
+ validate_portability({
83
+ "llm": {"api_key": "sk-..."}, # OK - not a path
84
+ "storage": {"path": "/Users/dev/data"}, # NOT OK - local path
85
+ })
86
+
87
+ # Check without raising
88
+ issues = validate_portability(config, raise_on_error=False)
89
+ if issues:
90
+ print(f"Found {len(issues)} portability issues")
91
+
92
+ # This is OK - uses $resource references
93
+ validate_portability({
94
+ "llm": {"$resource": "default", "type": "llm_providers"},
95
+ "storage": {"$resource": "db", "type": "databases"},
96
+ })
97
+
98
+ # Environment variables are OK
99
+ validate_portability({
100
+ "storage": {"path": "${DATA_PATH}"}, # OK - env var placeholder
101
+ })
102
+ ```
103
+ """
104
+ config_str = str(config)
105
+ issues: list[str] = []
106
+
107
+ for pattern, description in SUSPICIOUS_PATTERNS:
108
+ matches = re.findall(pattern, config_str)
109
+ for match in matches:
110
+ # Check if this match is inside a safe pattern (env var)
111
+ is_safe = _is_in_safe_pattern(match, config_str)
112
+
113
+ if not is_safe:
114
+ issues.append(f"Found {description}: '{match}'")
115
+
116
+ if issues and raise_on_error:
117
+ raise PortabilityError(
118
+ "Config appears to contain resolved local values that would break "
119
+ "portability. Store portable config with $resource references instead.\n"
120
+ "Issues found:\n" + "\n".join(f" - {issue}" for issue in issues)
121
+ )
122
+
123
+ return issues
124
+
125
+
126
+ def _is_in_safe_pattern(match: str, config_str: str) -> bool:
127
+ """Check if a suspicious match is inside a safe pattern (env var).
128
+
129
+ Args:
130
+ match: The suspicious string that was matched
131
+ config_str: The full config string
132
+
133
+ Returns:
134
+ True if the match appears inside an env var pattern
135
+ """
136
+ for safe_pattern in SAFE_PATTERNS:
137
+ # Check if the suspicious pattern appears inside a safe pattern
138
+ # e.g., "${HOME}/data" contains "/home" but it's inside ${...}
139
+ combined_pattern = f"{safe_pattern}[^'\"]*{re.escape(match)}"
140
+ if re.search(combined_pattern, config_str):
141
+ return True
142
+ return False
143
+
144
+
145
+ def has_resource_references(config: dict[str, Any]) -> bool:
146
+ """Check if config contains $resource references.
147
+
148
+ $resource references indicate a portable config that needs
149
+ environment resolution before use.
150
+
151
+ Args:
152
+ config: Configuration dictionary
153
+
154
+ Returns:
155
+ True if config contains $resource references
156
+
157
+ Example:
158
+ ```python
159
+ # Portable config with $resource refs
160
+ config = {
161
+ "bot": {
162
+ "llm": {"$resource": "default", "type": "llm_providers"},
163
+ }
164
+ }
165
+ assert has_resource_references(config) is True
166
+
167
+ # Resolved config (no $resource refs)
168
+ config = {
169
+ "bot": {
170
+ "llm": {"provider": "openai", "model": "gpt-4"},
171
+ }
172
+ }
173
+ assert has_resource_references(config) is False
174
+ ```
175
+ """
176
+ return "$resource" in str(config)
177
+
178
+
179
+ def is_portable(config: dict[str, Any]) -> bool:
180
+ """Check if config appears to be portable.
181
+
182
+ A config is considered portable if it either:
183
+ - Contains $resource references (for late binding), or
184
+ - Contains no suspicious local values
185
+
186
+ Args:
187
+ config: Configuration dictionary
188
+
189
+ Returns:
190
+ True if config appears to be portable
191
+
192
+ Example:
193
+ ```python
194
+ # Portable: uses $resource
195
+ assert is_portable({"llm": {"$resource": "default"}}) is True
196
+
197
+ # Portable: no local paths
198
+ assert is_portable({"llm": {"provider": "openai"}}) is True
199
+
200
+ # Not portable: contains local path
201
+ assert is_portable({"path": "/Users/dev/data"}) is False
202
+ ```
203
+ """
204
+ # If it has $resource refs, it's portable (will be resolved later)
205
+ if has_resource_references(config):
206
+ return True
207
+
208
+ # Otherwise, check for suspicious patterns
209
+ issues = validate_portability(config, raise_on_error=False)
210
+ return len(issues) == 0
@@ -0,0 +1,5 @@
1
+ """Tools for DynaBot."""
2
+
3
+ from .knowledge_search import KnowledgeSearchTool
4
+
5
+ __all__ = ["KnowledgeSearchTool"]
@@ -0,0 +1,113 @@
1
+ """Knowledge search tool for RAG integration."""
2
+
3
+ from typing import Any
4
+
5
+ from dataknobs_llm.tools import Tool
6
+
7
+
8
+ class KnowledgeSearchTool(Tool):
9
+ """Tool for searching the knowledge base.
10
+
11
+ This tool allows LLMs to search the bot's knowledge base
12
+ for relevant information during conversations.
13
+
14
+ Example:
15
+ ```python
16
+ # Create tool with knowledge base
17
+ tool = KnowledgeSearchTool(knowledge_base=kb)
18
+
19
+ # Register with bot
20
+ bot.tool_registry.register_tool(tool)
21
+
22
+ # LLM can now call the tool
23
+ results = await tool.execute(
24
+ query="How do I configure the database?",
25
+ max_results=3
26
+ )
27
+ ```
28
+ """
29
+
30
+ def __init__(self, knowledge_base: Any, name: str = "knowledge_search"):
31
+ """Initialize knowledge search tool.
32
+
33
+ Args:
34
+ knowledge_base: RAGKnowledgeBase instance to search
35
+ name: Tool name (default: knowledge_search)
36
+ """
37
+ super().__init__(
38
+ name=name,
39
+ description="Search the knowledge base for relevant information. "
40
+ "Use this when you need to find documentation, examples, or "
41
+ "specific information to answer user questions.",
42
+ )
43
+ self.knowledge_base = knowledge_base
44
+
45
+ @property
46
+ def schema(self) -> dict[str, Any]:
47
+ """Get JSON schema for tool parameters.
48
+
49
+ Returns:
50
+ JSON Schema for the tool parameters
51
+ """
52
+ return {
53
+ "type": "object",
54
+ "properties": {
55
+ "query": {
56
+ "type": "string",
57
+ "description": "The search query or question to find information about",
58
+ },
59
+ "max_results": {
60
+ "type": "integer",
61
+ "description": "Maximum number of results to return",
62
+ "default": 3,
63
+ "minimum": 1,
64
+ "maximum": 10,
65
+ },
66
+ },
67
+ "required": ["query"],
68
+ }
69
+
70
+ async def execute(self, query: str, max_results: int = 3, **kwargs: Any) -> dict[str, Any]:
71
+ """Execute knowledge base search.
72
+
73
+ Args:
74
+ query: Search query text
75
+ max_results: Maximum number of results (default: 3)
76
+ **kwargs: Additional arguments (ignored)
77
+
78
+ Returns:
79
+ Dictionary with search results:
80
+ - query: Original query
81
+ - results: List of relevant chunks
82
+ - num_results: Number of results found
83
+
84
+ Example:
85
+ ```python
86
+ result = await tool.execute(
87
+ query="How do I configure the database?",
88
+ max_results=3
89
+ )
90
+ for chunk in result['results']:
91
+ print(f"{chunk['heading_path']}: {chunk['text']}")
92
+ ```
93
+ """
94
+ # Clamp max_results to valid range
95
+ max_results = max(1, min(10, max_results))
96
+
97
+ # Search knowledge base
98
+ results = await self.knowledge_base.query(query, k=max_results)
99
+
100
+ # Format response
101
+ return {
102
+ "query": query,
103
+ "results": [
104
+ {
105
+ "text": r["text"],
106
+ "source": r["source"],
107
+ "heading": r["heading_path"],
108
+ "similarity": round(r["similarity"], 3),
109
+ }
110
+ for r in results
111
+ ],
112
+ "num_results": len(results),
113
+ }
@@ -0,0 +1 @@
1
+ """Utility functions and helpers for the dataknobs_bots package."""