dtSpark 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/__init__.py +0 -0
- dtSpark/_description.txt +1 -0
- dtSpark/_full_name.txt +1 -0
- dtSpark/_licence.txt +21 -0
- dtSpark/_metadata.yaml +6 -0
- dtSpark/_name.txt +1 -0
- dtSpark/_version.txt +1 -0
- dtSpark/aws/__init__.py +7 -0
- dtSpark/aws/authentication.py +296 -0
- dtSpark/aws/bedrock.py +578 -0
- dtSpark/aws/costs.py +318 -0
- dtSpark/aws/pricing.py +580 -0
- dtSpark/cli_interface.py +2645 -0
- dtSpark/conversation_manager.py +3050 -0
- dtSpark/core/__init__.py +12 -0
- dtSpark/core/application.py +3355 -0
- dtSpark/core/context_compaction.py +735 -0
- dtSpark/daemon/__init__.py +104 -0
- dtSpark/daemon/__main__.py +10 -0
- dtSpark/daemon/action_monitor.py +213 -0
- dtSpark/daemon/daemon_app.py +730 -0
- dtSpark/daemon/daemon_manager.py +289 -0
- dtSpark/daemon/execution_coordinator.py +194 -0
- dtSpark/daemon/pid_file.py +169 -0
- dtSpark/database/__init__.py +482 -0
- dtSpark/database/autonomous_actions.py +1191 -0
- dtSpark/database/backends.py +329 -0
- dtSpark/database/connection.py +122 -0
- dtSpark/database/conversations.py +520 -0
- dtSpark/database/credential_prompt.py +218 -0
- dtSpark/database/files.py +205 -0
- dtSpark/database/mcp_ops.py +355 -0
- dtSpark/database/messages.py +161 -0
- dtSpark/database/schema.py +673 -0
- dtSpark/database/tool_permissions.py +186 -0
- dtSpark/database/usage.py +167 -0
- dtSpark/files/__init__.py +4 -0
- dtSpark/files/manager.py +322 -0
- dtSpark/launch.py +39 -0
- dtSpark/limits/__init__.py +10 -0
- dtSpark/limits/costs.py +296 -0
- dtSpark/limits/tokens.py +342 -0
- dtSpark/llm/__init__.py +17 -0
- dtSpark/llm/anthropic_direct.py +446 -0
- dtSpark/llm/base.py +146 -0
- dtSpark/llm/context_limits.py +438 -0
- dtSpark/llm/manager.py +177 -0
- dtSpark/llm/ollama.py +578 -0
- dtSpark/mcp_integration/__init__.py +5 -0
- dtSpark/mcp_integration/manager.py +653 -0
- dtSpark/mcp_integration/tool_selector.py +225 -0
- dtSpark/resources/config.yaml.template +631 -0
- dtSpark/safety/__init__.py +22 -0
- dtSpark/safety/llm_service.py +111 -0
- dtSpark/safety/patterns.py +229 -0
- dtSpark/safety/prompt_inspector.py +442 -0
- dtSpark/safety/violation_logger.py +346 -0
- dtSpark/scheduler/__init__.py +20 -0
- dtSpark/scheduler/creation_tools.py +599 -0
- dtSpark/scheduler/execution_queue.py +159 -0
- dtSpark/scheduler/executor.py +1152 -0
- dtSpark/scheduler/manager.py +395 -0
- dtSpark/tools/__init__.py +4 -0
- dtSpark/tools/builtin.py +833 -0
- dtSpark/web/__init__.py +20 -0
- dtSpark/web/auth.py +152 -0
- dtSpark/web/dependencies.py +37 -0
- dtSpark/web/endpoints/__init__.py +17 -0
- dtSpark/web/endpoints/autonomous_actions.py +1125 -0
- dtSpark/web/endpoints/chat.py +621 -0
- dtSpark/web/endpoints/conversations.py +353 -0
- dtSpark/web/endpoints/main_menu.py +547 -0
- dtSpark/web/endpoints/streaming.py +421 -0
- dtSpark/web/server.py +578 -0
- dtSpark/web/session.py +167 -0
- dtSpark/web/ssl_utils.py +195 -0
- dtSpark/web/static/css/dark-theme.css +427 -0
- dtSpark/web/static/js/actions.js +1101 -0
- dtSpark/web/static/js/chat.js +614 -0
- dtSpark/web/static/js/main.js +496 -0
- dtSpark/web/static/js/sse-client.js +242 -0
- dtSpark/web/templates/actions.html +408 -0
- dtSpark/web/templates/base.html +93 -0
- dtSpark/web/templates/chat.html +814 -0
- dtSpark/web/templates/conversations.html +350 -0
- dtSpark/web/templates/goodbye.html +81 -0
- dtSpark/web/templates/login.html +90 -0
- dtSpark/web/templates/main_menu.html +983 -0
- dtSpark/web/templates/new_conversation.html +191 -0
- dtSpark/web/web_interface.py +137 -0
- dtspark-1.0.4.dist-info/METADATA +187 -0
- dtspark-1.0.4.dist-info/RECORD +96 -0
- dtspark-1.0.4.dist-info/WHEEL +5 -0
- dtspark-1.0.4.dist-info/entry_points.txt +3 -0
- dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
- dtspark-1.0.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tool selection module for optimising token usage.
|
|
3
|
+
|
|
4
|
+
This module implements intelligent tool selection to reduce the number
|
|
5
|
+
of tools sent with each API request, significantly reducing token consumption.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import List, Dict, Any, Set
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ToolSelector:
|
|
13
|
+
"""Selects relevant tools based on conversation context."""
|
|
14
|
+
|
|
15
|
+
# Define tool categories and their associated tool name patterns
|
|
16
|
+
TOOL_CATEGORIES = {
|
|
17
|
+
'aws_security': ['prowler', 'scan', 'findings', 'security', 'compliance', 'vulnerability'],
|
|
18
|
+
'docker': ['container', 'docker', 'image', 'compose', 'network', 'volume'],
|
|
19
|
+
'documentation': ['note', 'obsidian', 'create_note', 'update_note', 'search_notes', 'vault'],
|
|
20
|
+
'threat_intelligence': ['opencti', 'indicator', 'threat', 'malware', 'stix', 'observable'],
|
|
21
|
+
'aws_infrastructure': ['ec2', 's3', 'lambda', 'cloudwatch', 'iam', 'vpc', 'rds', 'dynamodb', 'diagram'],
|
|
22
|
+
'elasticsearch': ['elasticsearch', 'search', 'index', 'query', 'aggregation'],
|
|
23
|
+
'ragstore': ['ragstore', 'rag', 'embedding', 'vector', 'semantic'],
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Keywords in user messages that trigger specific categories
|
|
27
|
+
CATEGORY_KEYWORDS = {
|
|
28
|
+
'aws_security': ['security', 'prowler', 'scan', 'findings', 'vulnerabilities', 'compliance',
|
|
29
|
+
'threat', 'risk', 'audit', 'cis', 'benchmark'],
|
|
30
|
+
'docker': ['container', 'docker', 'image', 'compose', 'containerised', 'containerized'],
|
|
31
|
+
'documentation': ['note', 'obsidian', 'document', 'report', 'write', 'create note',
|
|
32
|
+
'update note', 'markdown', 'vault'],
|
|
33
|
+
'threat_intelligence': ['threat', 'indicator', 'malware', 'opencti', 'ioc', 'attack',
|
|
34
|
+
'campaign', 'actor', 'ttp'],
|
|
35
|
+
'aws_infrastructure': ['ec2', 's3', 'lambda', 'resource', 'aws', 'cloud', 'infrastructure',
|
|
36
|
+
'vpc', 'subnet', 'instance', 'bucket', 'function', 'diagram'],
|
|
37
|
+
'elasticsearch': ['elasticsearch', 'search', 'query', 'index', 'log', 'aggregate'],
|
|
38
|
+
'ragstore': ['ragstore', 'rag', 'embedding', 'semantic', 'vector', 'similarity'],
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def __init__(self, max_tools_per_request: int = 30):
|
|
42
|
+
"""
|
|
43
|
+
Initialise the tool selector.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
max_tools_per_request: Maximum number of tools to include in a single request
|
|
47
|
+
"""
|
|
48
|
+
self.max_tools_per_request = max_tools_per_request
|
|
49
|
+
logging.info(f"ToolSelector initialised with max {max_tools_per_request} tools per request")
|
|
50
|
+
|
|
51
|
+
def select_tools(self, all_tools: List[Dict[str, Any]], user_message: str,
|
|
52
|
+
conversation_history: List[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
|
|
53
|
+
"""
|
|
54
|
+
Select relevant tools based on user message and conversation context.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
all_tools: List of all available tools
|
|
58
|
+
user_message: The current user message
|
|
59
|
+
conversation_history: Recent conversation messages
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of selected tools
|
|
63
|
+
"""
|
|
64
|
+
if not all_tools:
|
|
65
|
+
logging.debug("No tools available for selection")
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
# Always include built-in tools (they're small and always useful)
|
|
69
|
+
selected_tools = [t for t in all_tools if t.get('server') == 'builtin']
|
|
70
|
+
logging.debug(f"Included {len(selected_tools)} built-in tools")
|
|
71
|
+
|
|
72
|
+
# Detect relevant categories based on user message and history
|
|
73
|
+
relevant_categories = self._detect_categories(user_message, conversation_history)
|
|
74
|
+
|
|
75
|
+
if not relevant_categories:
|
|
76
|
+
# If no specific categories detected, include a diverse sample
|
|
77
|
+
logging.info("No specific tool categories detected, selecting diverse sample")
|
|
78
|
+
return self._select_diverse_sample(all_tools, selected_tools)
|
|
79
|
+
|
|
80
|
+
logging.info(f"Detected relevant categories: {relevant_categories}")
|
|
81
|
+
|
|
82
|
+
# Build set of relevant tool name patterns
|
|
83
|
+
relevant_patterns = set()
|
|
84
|
+
for category in relevant_categories:
|
|
85
|
+
relevant_patterns.update(self.TOOL_CATEGORIES.get(category, []))
|
|
86
|
+
|
|
87
|
+
# Track selected tool names to avoid duplicates
|
|
88
|
+
selected_tool_names = {t.get('name') for t in selected_tools}
|
|
89
|
+
|
|
90
|
+
# Select tools that match the relevant patterns
|
|
91
|
+
for tool in all_tools:
|
|
92
|
+
if len(selected_tools) >= self.max_tools_per_request:
|
|
93
|
+
break
|
|
94
|
+
|
|
95
|
+
tool_name = tool.get('name', '')
|
|
96
|
+
if tool_name in selected_tool_names:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
tool_name_lower = tool_name.lower()
|
|
100
|
+
tool_desc = tool.get('description', '').lower()
|
|
101
|
+
|
|
102
|
+
# Check if tool name or description matches any relevant pattern
|
|
103
|
+
if any(pattern in tool_name_lower or pattern in tool_desc for pattern in relevant_patterns):
|
|
104
|
+
selected_tools.append(tool)
|
|
105
|
+
selected_tool_names.add(tool_name)
|
|
106
|
+
|
|
107
|
+
# If still below limit and we have room, add some general-purpose tools
|
|
108
|
+
if len(selected_tools) < self.max_tools_per_request:
|
|
109
|
+
remaining = self.max_tools_per_request - len(selected_tools)
|
|
110
|
+
logging.debug(f"Adding up to {remaining} additional tools to reach limit")
|
|
111
|
+
|
|
112
|
+
for tool in all_tools:
|
|
113
|
+
if len(selected_tools) >= self.max_tools_per_request:
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
tool_name = tool.get('name', '')
|
|
117
|
+
if tool_name not in selected_tool_names:
|
|
118
|
+
selected_tools.append(tool)
|
|
119
|
+
selected_tool_names.add(tool_name)
|
|
120
|
+
|
|
121
|
+
logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available " +
|
|
122
|
+
f"(categories: {', '.join(relevant_categories)})")
|
|
123
|
+
|
|
124
|
+
# Log which tools were selected for debugging
|
|
125
|
+
tool_names = [t.get('name') for t in selected_tools]
|
|
126
|
+
logging.debug(f"Selected tools: {', '.join(tool_names[:10])}{'...' if len(tool_names) > 10 else ''}")
|
|
127
|
+
|
|
128
|
+
return selected_tools
|
|
129
|
+
|
|
130
|
+
def _detect_categories(self, user_message: str,
|
|
131
|
+
conversation_history: List[Dict[str, Any]] = None) -> Set[str]:
|
|
132
|
+
"""
|
|
133
|
+
Detect relevant tool categories from user message and history.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
user_message: Current user message
|
|
137
|
+
conversation_history: Recent conversation messages
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Set of relevant category names
|
|
141
|
+
"""
|
|
142
|
+
categories = set()
|
|
143
|
+
|
|
144
|
+
# Analyse user message
|
|
145
|
+
message_lower = user_message.lower()
|
|
146
|
+
for category, keywords in self.CATEGORY_KEYWORDS.items():
|
|
147
|
+
if any(keyword in message_lower for keyword in keywords):
|
|
148
|
+
categories.add(category)
|
|
149
|
+
logging.debug(f"Category '{category}' detected from user message")
|
|
150
|
+
|
|
151
|
+
# Analyse recent conversation history (last 5 messages)
|
|
152
|
+
if conversation_history:
|
|
153
|
+
recent_messages = conversation_history[-5:]
|
|
154
|
+
for msg in recent_messages:
|
|
155
|
+
# Handle both string content and dict content
|
|
156
|
+
if isinstance(msg, dict):
|
|
157
|
+
content = str(msg.get('content', '')).lower()
|
|
158
|
+
else:
|
|
159
|
+
content = str(msg).lower()
|
|
160
|
+
|
|
161
|
+
for category, keywords in self.CATEGORY_KEYWORDS.items():
|
|
162
|
+
if any(keyword in content for keyword in keywords):
|
|
163
|
+
if category not in categories:
|
|
164
|
+
categories.add(category)
|
|
165
|
+
logging.debug(f"Category '{category}' detected from conversation history")
|
|
166
|
+
|
|
167
|
+
return categories
|
|
168
|
+
|
|
169
|
+
def _select_diverse_sample(self, all_tools: List[Dict[str, Any]],
|
|
170
|
+
already_selected: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
171
|
+
"""
|
|
172
|
+
Select a diverse sample of tools when no specific categories are detected.
|
|
173
|
+
|
|
174
|
+
Attempts to get tools from each category to provide broad coverage.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
all_tools: All available tools
|
|
178
|
+
already_selected: Tools already selected (e.g., built-in tools)
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of selected tools
|
|
182
|
+
"""
|
|
183
|
+
selected = list(already_selected)
|
|
184
|
+
selected_tool_names = {t.get('name') for t in selected}
|
|
185
|
+
|
|
186
|
+
# Calculate how many tools to get from each category
|
|
187
|
+
remaining_slots = self.max_tools_per_request - len(selected)
|
|
188
|
+
tools_per_category = max(1, remaining_slots // len(self.TOOL_CATEGORIES))
|
|
189
|
+
|
|
190
|
+
logging.debug(f"Selecting ~{tools_per_category} tools from each category for diversity")
|
|
191
|
+
|
|
192
|
+
for category, category_patterns in self.TOOL_CATEGORIES.items():
|
|
193
|
+
if len(selected) >= self.max_tools_per_request:
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
added = 0
|
|
197
|
+
for tool in all_tools:
|
|
198
|
+
tool_name = tool.get('name', '')
|
|
199
|
+
if tool_name in selected_tool_names:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
tool_name_lower = tool_name.lower()
|
|
203
|
+
tool_desc = tool.get('description', '').lower()
|
|
204
|
+
|
|
205
|
+
# Check if tool matches this category
|
|
206
|
+
if any(pattern in tool_name_lower or pattern in tool_desc for pattern in category_patterns):
|
|
207
|
+
selected.append(tool)
|
|
208
|
+
selected_tool_names.add(tool_name)
|
|
209
|
+
added += 1
|
|
210
|
+
if added >= tools_per_category or len(selected) >= self.max_tools_per_request:
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
# If still below limit, add remaining tools
|
|
214
|
+
if len(selected) < self.max_tools_per_request:
|
|
215
|
+
for tool in all_tools:
|
|
216
|
+
if len(selected) >= self.max_tools_per_request:
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
tool_name = tool.get('name', '')
|
|
220
|
+
if tool_name not in selected_tool_names:
|
|
221
|
+
selected.append(tool)
|
|
222
|
+
selected_tool_names.add(tool_name)
|
|
223
|
+
|
|
224
|
+
logging.info(f"Selected {len(selected)} diverse tools (no specific category detected)")
|
|
225
|
+
return selected
|