dtSpark 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. dtSpark/__init__.py +0 -0
  2. dtSpark/_description.txt +1 -0
  3. dtSpark/_full_name.txt +1 -0
  4. dtSpark/_licence.txt +21 -0
  5. dtSpark/_metadata.yaml +6 -0
  6. dtSpark/_name.txt +1 -0
  7. dtSpark/_version.txt +1 -0
  8. dtSpark/aws/__init__.py +7 -0
  9. dtSpark/aws/authentication.py +296 -0
  10. dtSpark/aws/bedrock.py +578 -0
  11. dtSpark/aws/costs.py +318 -0
  12. dtSpark/aws/pricing.py +580 -0
  13. dtSpark/cli_interface.py +2645 -0
  14. dtSpark/conversation_manager.py +3050 -0
  15. dtSpark/core/__init__.py +12 -0
  16. dtSpark/core/application.py +3355 -0
  17. dtSpark/core/context_compaction.py +735 -0
  18. dtSpark/daemon/__init__.py +104 -0
  19. dtSpark/daemon/__main__.py +10 -0
  20. dtSpark/daemon/action_monitor.py +213 -0
  21. dtSpark/daemon/daemon_app.py +730 -0
  22. dtSpark/daemon/daemon_manager.py +289 -0
  23. dtSpark/daemon/execution_coordinator.py +194 -0
  24. dtSpark/daemon/pid_file.py +169 -0
  25. dtSpark/database/__init__.py +482 -0
  26. dtSpark/database/autonomous_actions.py +1191 -0
  27. dtSpark/database/backends.py +329 -0
  28. dtSpark/database/connection.py +122 -0
  29. dtSpark/database/conversations.py +520 -0
  30. dtSpark/database/credential_prompt.py +218 -0
  31. dtSpark/database/files.py +205 -0
  32. dtSpark/database/mcp_ops.py +355 -0
  33. dtSpark/database/messages.py +161 -0
  34. dtSpark/database/schema.py +673 -0
  35. dtSpark/database/tool_permissions.py +186 -0
  36. dtSpark/database/usage.py +167 -0
  37. dtSpark/files/__init__.py +4 -0
  38. dtSpark/files/manager.py +322 -0
  39. dtSpark/launch.py +39 -0
  40. dtSpark/limits/__init__.py +10 -0
  41. dtSpark/limits/costs.py +296 -0
  42. dtSpark/limits/tokens.py +342 -0
  43. dtSpark/llm/__init__.py +17 -0
  44. dtSpark/llm/anthropic_direct.py +446 -0
  45. dtSpark/llm/base.py +146 -0
  46. dtSpark/llm/context_limits.py +438 -0
  47. dtSpark/llm/manager.py +177 -0
  48. dtSpark/llm/ollama.py +578 -0
  49. dtSpark/mcp_integration/__init__.py +5 -0
  50. dtSpark/mcp_integration/manager.py +653 -0
  51. dtSpark/mcp_integration/tool_selector.py +225 -0
  52. dtSpark/resources/config.yaml.template +631 -0
  53. dtSpark/safety/__init__.py +22 -0
  54. dtSpark/safety/llm_service.py +111 -0
  55. dtSpark/safety/patterns.py +229 -0
  56. dtSpark/safety/prompt_inspector.py +442 -0
  57. dtSpark/safety/violation_logger.py +346 -0
  58. dtSpark/scheduler/__init__.py +20 -0
  59. dtSpark/scheduler/creation_tools.py +599 -0
  60. dtSpark/scheduler/execution_queue.py +159 -0
  61. dtSpark/scheduler/executor.py +1152 -0
  62. dtSpark/scheduler/manager.py +395 -0
  63. dtSpark/tools/__init__.py +4 -0
  64. dtSpark/tools/builtin.py +833 -0
  65. dtSpark/web/__init__.py +20 -0
  66. dtSpark/web/auth.py +152 -0
  67. dtSpark/web/dependencies.py +37 -0
  68. dtSpark/web/endpoints/__init__.py +17 -0
  69. dtSpark/web/endpoints/autonomous_actions.py +1125 -0
  70. dtSpark/web/endpoints/chat.py +621 -0
  71. dtSpark/web/endpoints/conversations.py +353 -0
  72. dtSpark/web/endpoints/main_menu.py +547 -0
  73. dtSpark/web/endpoints/streaming.py +421 -0
  74. dtSpark/web/server.py +578 -0
  75. dtSpark/web/session.py +167 -0
  76. dtSpark/web/ssl_utils.py +195 -0
  77. dtSpark/web/static/css/dark-theme.css +427 -0
  78. dtSpark/web/static/js/actions.js +1101 -0
  79. dtSpark/web/static/js/chat.js +614 -0
  80. dtSpark/web/static/js/main.js +496 -0
  81. dtSpark/web/static/js/sse-client.js +242 -0
  82. dtSpark/web/templates/actions.html +408 -0
  83. dtSpark/web/templates/base.html +93 -0
  84. dtSpark/web/templates/chat.html +814 -0
  85. dtSpark/web/templates/conversations.html +350 -0
  86. dtSpark/web/templates/goodbye.html +81 -0
  87. dtSpark/web/templates/login.html +90 -0
  88. dtSpark/web/templates/main_menu.html +983 -0
  89. dtSpark/web/templates/new_conversation.html +191 -0
  90. dtSpark/web/web_interface.py +137 -0
  91. dtspark-1.0.4.dist-info/METADATA +187 -0
  92. dtspark-1.0.4.dist-info/RECORD +96 -0
  93. dtspark-1.0.4.dist-info/WHEEL +5 -0
  94. dtspark-1.0.4.dist-info/entry_points.txt +3 -0
  95. dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
  96. dtspark-1.0.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,225 @@
1
+ """
2
+ Tool selection module for optimising token usage.
3
+
4
+ This module implements intelligent tool selection to reduce the number
5
+ of tools sent with each API request, significantly reducing token consumption.
6
+ """
7
+
8
+ import logging
9
+ from typing import List, Dict, Any, Set
10
+
11
+
12
+ class ToolSelector:
13
+ """Selects relevant tools based on conversation context."""
14
+
15
+ # Define tool categories and their associated tool name patterns
16
+ TOOL_CATEGORIES = {
17
+ 'aws_security': ['prowler', 'scan', 'findings', 'security', 'compliance', 'vulnerability'],
18
+ 'docker': ['container', 'docker', 'image', 'compose', 'network', 'volume'],
19
+ 'documentation': ['note', 'obsidian', 'create_note', 'update_note', 'search_notes', 'vault'],
20
+ 'threat_intelligence': ['opencti', 'indicator', 'threat', 'malware', 'stix', 'observable'],
21
+ 'aws_infrastructure': ['ec2', 's3', 'lambda', 'cloudwatch', 'iam', 'vpc', 'rds', 'dynamodb', 'diagram'],
22
+ 'elasticsearch': ['elasticsearch', 'search', 'index', 'query', 'aggregation'],
23
+ 'ragstore': ['ragstore', 'rag', 'embedding', 'vector', 'semantic'],
24
+ }
25
+
26
+ # Keywords in user messages that trigger specific categories
27
+ CATEGORY_KEYWORDS = {
28
+ 'aws_security': ['security', 'prowler', 'scan', 'findings', 'vulnerabilities', 'compliance',
29
+ 'threat', 'risk', 'audit', 'cis', 'benchmark'],
30
+ 'docker': ['container', 'docker', 'image', 'compose', 'containerised', 'containerized'],
31
+ 'documentation': ['note', 'obsidian', 'document', 'report', 'write', 'create note',
32
+ 'update note', 'markdown', 'vault'],
33
+ 'threat_intelligence': ['threat', 'indicator', 'malware', 'opencti', 'ioc', 'attack',
34
+ 'campaign', 'actor', 'ttp'],
35
+ 'aws_infrastructure': ['ec2', 's3', 'lambda', 'resource', 'aws', 'cloud', 'infrastructure',
36
+ 'vpc', 'subnet', 'instance', 'bucket', 'function', 'diagram'],
37
+ 'elasticsearch': ['elasticsearch', 'search', 'query', 'index', 'log', 'aggregate'],
38
+ 'ragstore': ['ragstore', 'rag', 'embedding', 'semantic', 'vector', 'similarity'],
39
+ }
40
+
41
+ def __init__(self, max_tools_per_request: int = 30):
42
+ """
43
+ Initialise the tool selector.
44
+
45
+ Args:
46
+ max_tools_per_request: Maximum number of tools to include in a single request
47
+ """
48
+ self.max_tools_per_request = max_tools_per_request
49
+ logging.info(f"ToolSelector initialised with max {max_tools_per_request} tools per request")
50
+
51
+ def select_tools(self, all_tools: List[Dict[str, Any]], user_message: str,
52
+ conversation_history: List[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
53
+ """
54
+ Select relevant tools based on user message and conversation context.
55
+
56
+ Args:
57
+ all_tools: List of all available tools
58
+ user_message: The current user message
59
+ conversation_history: Recent conversation messages
60
+
61
+ Returns:
62
+ List of selected tools
63
+ """
64
+ if not all_tools:
65
+ logging.debug("No tools available for selection")
66
+ return []
67
+
68
+ # Always include built-in tools (they're small and always useful)
69
+ selected_tools = [t for t in all_tools if t.get('server') == 'builtin']
70
+ logging.debug(f"Included {len(selected_tools)} built-in tools")
71
+
72
+ # Detect relevant categories based on user message and history
73
+ relevant_categories = self._detect_categories(user_message, conversation_history)
74
+
75
+ if not relevant_categories:
76
+ # If no specific categories detected, include a diverse sample
77
+ logging.info("No specific tool categories detected, selecting diverse sample")
78
+ return self._select_diverse_sample(all_tools, selected_tools)
79
+
80
+ logging.info(f"Detected relevant categories: {relevant_categories}")
81
+
82
+ # Build set of relevant tool name patterns
83
+ relevant_patterns = set()
84
+ for category in relevant_categories:
85
+ relevant_patterns.update(self.TOOL_CATEGORIES.get(category, []))
86
+
87
+ # Track selected tool names to avoid duplicates
88
+ selected_tool_names = {t.get('name') for t in selected_tools}
89
+
90
+ # Select tools that match the relevant patterns
91
+ for tool in all_tools:
92
+ if len(selected_tools) >= self.max_tools_per_request:
93
+ break
94
+
95
+ tool_name = tool.get('name', '')
96
+ if tool_name in selected_tool_names:
97
+ continue
98
+
99
+ tool_name_lower = tool_name.lower()
100
+ tool_desc = tool.get('description', '').lower()
101
+
102
+ # Check if tool name or description matches any relevant pattern
103
+ if any(pattern in tool_name_lower or pattern in tool_desc for pattern in relevant_patterns):
104
+ selected_tools.append(tool)
105
+ selected_tool_names.add(tool_name)
106
+
107
+ # If still below limit and we have room, add some general-purpose tools
108
+ if len(selected_tools) < self.max_tools_per_request:
109
+ remaining = self.max_tools_per_request - len(selected_tools)
110
+ logging.debug(f"Adding up to {remaining} additional tools to reach limit")
111
+
112
+ for tool in all_tools:
113
+ if len(selected_tools) >= self.max_tools_per_request:
114
+ break
115
+
116
+ tool_name = tool.get('name', '')
117
+ if tool_name not in selected_tool_names:
118
+ selected_tools.append(tool)
119
+ selected_tool_names.add(tool_name)
120
+
121
+ logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available " +
122
+ f"(categories: {', '.join(relevant_categories)})")
123
+
124
+ # Log which tools were selected for debugging
125
+ tool_names = [t.get('name') for t in selected_tools]
126
+ logging.debug(f"Selected tools: {', '.join(tool_names[:10])}{'...' if len(tool_names) > 10 else ''}")
127
+
128
+ return selected_tools
129
+
130
+ def _detect_categories(self, user_message: str,
131
+ conversation_history: List[Dict[str, Any]] = None) -> Set[str]:
132
+ """
133
+ Detect relevant tool categories from user message and history.
134
+
135
+ Args:
136
+ user_message: Current user message
137
+ conversation_history: Recent conversation messages
138
+
139
+ Returns:
140
+ Set of relevant category names
141
+ """
142
+ categories = set()
143
+
144
+ # Analyse user message
145
+ message_lower = user_message.lower()
146
+ for category, keywords in self.CATEGORY_KEYWORDS.items():
147
+ if any(keyword in message_lower for keyword in keywords):
148
+ categories.add(category)
149
+ logging.debug(f"Category '{category}' detected from user message")
150
+
151
+ # Analyse recent conversation history (last 5 messages)
152
+ if conversation_history:
153
+ recent_messages = conversation_history[-5:]
154
+ for msg in recent_messages:
155
+ # Handle both string content and dict content
156
+ if isinstance(msg, dict):
157
+ content = str(msg.get('content', '')).lower()
158
+ else:
159
+ content = str(msg).lower()
160
+
161
+ for category, keywords in self.CATEGORY_KEYWORDS.items():
162
+ if any(keyword in content for keyword in keywords):
163
+ if category not in categories:
164
+ categories.add(category)
165
+ logging.debug(f"Category '{category}' detected from conversation history")
166
+
167
+ return categories
168
+
169
+ def _select_diverse_sample(self, all_tools: List[Dict[str, Any]],
170
+ already_selected: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
171
+ """
172
+ Select a diverse sample of tools when no specific categories are detected.
173
+
174
+ Attempts to get tools from each category to provide broad coverage.
175
+
176
+ Args:
177
+ all_tools: All available tools
178
+ already_selected: Tools already selected (e.g., built-in tools)
179
+
180
+ Returns:
181
+ List of selected tools
182
+ """
183
+ selected = list(already_selected)
184
+ selected_tool_names = {t.get('name') for t in selected}
185
+
186
+ # Calculate how many tools to get from each category
187
+ remaining_slots = self.max_tools_per_request - len(selected)
188
+ tools_per_category = max(1, remaining_slots // len(self.TOOL_CATEGORIES))
189
+
190
+ logging.debug(f"Selecting ~{tools_per_category} tools from each category for diversity")
191
+
192
+ for category, category_patterns in self.TOOL_CATEGORIES.items():
193
+ if len(selected) >= self.max_tools_per_request:
194
+ break
195
+
196
+ added = 0
197
+ for tool in all_tools:
198
+ tool_name = tool.get('name', '')
199
+ if tool_name in selected_tool_names:
200
+ continue
201
+
202
+ tool_name_lower = tool_name.lower()
203
+ tool_desc = tool.get('description', '').lower()
204
+
205
+ # Check if tool matches this category
206
+ if any(pattern in tool_name_lower or pattern in tool_desc for pattern in category_patterns):
207
+ selected.append(tool)
208
+ selected_tool_names.add(tool_name)
209
+ added += 1
210
+ if added >= tools_per_category or len(selected) >= self.max_tools_per_request:
211
+ break
212
+
213
+ # If still below limit, add remaining tools
214
+ if len(selected) < self.max_tools_per_request:
215
+ for tool in all_tools:
216
+ if len(selected) >= self.max_tools_per_request:
217
+ break
218
+
219
+ tool_name = tool.get('name', '')
220
+ if tool_name not in selected_tool_names:
221
+ selected.append(tool)
222
+ selected_tool_names.add(tool_name)
223
+
224
+ logging.info(f"Selected {len(selected)} diverse tools (no specific category detected)")
225
+ return selected