dtSpark 1.1.0a3__py3-none-any.whl → 1.1.0a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. dtSpark/_version.txt +1 -1
  2. dtSpark/aws/authentication.py +1 -1
  3. dtSpark/aws/bedrock.py +238 -239
  4. dtSpark/aws/costs.py +9 -5
  5. dtSpark/aws/pricing.py +25 -21
  6. dtSpark/cli_interface.py +77 -68
  7. dtSpark/conversation_manager.py +54 -47
  8. dtSpark/core/application.py +114 -91
  9. dtSpark/core/context_compaction.py +241 -226
  10. dtSpark/daemon/__init__.py +36 -22
  11. dtSpark/daemon/action_monitor.py +46 -17
  12. dtSpark/daemon/daemon_app.py +126 -104
  13. dtSpark/daemon/daemon_manager.py +59 -23
  14. dtSpark/daemon/pid_file.py +3 -2
  15. dtSpark/database/autonomous_actions.py +3 -0
  16. dtSpark/database/credential_prompt.py +52 -54
  17. dtSpark/files/manager.py +6 -12
  18. dtSpark/limits/__init__.py +1 -1
  19. dtSpark/limits/tokens.py +2 -2
  20. dtSpark/llm/anthropic_direct.py +246 -141
  21. dtSpark/llm/ollama.py +3 -1
  22. dtSpark/mcp_integration/manager.py +4 -4
  23. dtSpark/mcp_integration/tool_selector.py +83 -77
  24. dtSpark/resources/config.yaml.template +11 -0
  25. dtSpark/safety/patterns.py +45 -46
  26. dtSpark/safety/prompt_inspector.py +8 -1
  27. dtSpark/scheduler/creation_tools.py +273 -181
  28. dtSpark/scheduler/executor.py +503 -221
  29. dtSpark/tools/builtin.py +70 -53
  30. dtSpark/web/endpoints/autonomous_actions.py +12 -9
  31. dtSpark/web/endpoints/chat.py +8 -6
  32. dtSpark/web/endpoints/conversations.py +18 -9
  33. dtSpark/web/endpoints/main_menu.py +132 -105
  34. dtSpark/web/endpoints/streaming.py +2 -2
  35. dtSpark/web/server.py +70 -5
  36. dtSpark/web/ssl_utils.py +3 -3
  37. dtSpark/web/static/css/dark-theme.css +8 -29
  38. dtSpark/web/static/js/chat.js +6 -8
  39. dtSpark/web/static/js/main.js +8 -8
  40. dtSpark/web/static/js/sse-client.js +130 -122
  41. dtSpark/web/templates/actions.html +5 -5
  42. dtSpark/web/templates/base.html +15 -0
  43. dtSpark/web/templates/chat.html +10 -10
  44. dtSpark/web/templates/conversations.html +6 -2
  45. dtSpark/web/templates/goodbye.html +2 -2
  46. dtSpark/web/templates/main_menu.html +19 -17
  47. dtSpark/web/web_interface.py +2 -2
  48. {dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/METADATA +9 -2
  49. dtspark-1.1.0a7.dist-info/RECORD +96 -0
  50. dtspark-1.1.0a3.dist-info/RECORD +0 -96
  51. {dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/WHEEL +0 -0
  52. {dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/entry_points.txt +0 -0
  53. {dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/licenses/LICENSE +0 -0
  54. {dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/top_level.txt +0 -0
dtSpark/limits/tokens.py CHANGED
@@ -45,7 +45,7 @@ class TokenManager:
45
45
  self.current_output_override = 0 # Additional output tokens allowed
46
46
  self.override_expires = None # When the override expires
47
47
 
48
- def check_limits_before_request(self, model_id: str, region: str,
48
+ def check_limits_before_request(self, _model_id: str, region: str,
49
49
  input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, LimitStatus]:
50
50
  """
51
51
  Check if a request would exceed the token limits.
@@ -249,7 +249,7 @@ class TokenManager:
249
249
  message = "Token Limit Reached: "
250
250
 
251
251
  if input_exceeded and output_exceeded:
252
- message += f"Both limits exceeded. "
252
+ message += "Both limits exceeded. "
253
253
  elif input_exceeded:
254
254
  message += f"Input limit exceeded: {current_input:,}/{input_limit:,} used, {request_input:,} requested. "
255
255
  else:
@@ -190,147 +190,8 @@ class AnthropicService(LLMService):
190
190
  }
191
191
 
192
192
  try:
193
- # Use provided max_tokens or fall back to default from config
194
- requested_max_tokens = max_tokens if max_tokens != 4096 else self.default_max_tokens
195
-
196
- # Get model's max output tokens to ensure we don't exceed it
197
- model_max_output = self.get_model_max_tokens(self.current_model_id)
198
-
199
- # Cap max_tokens to model's limit
200
- actual_max_tokens = min(requested_max_tokens, model_max_output)
201
- if actual_max_tokens < requested_max_tokens:
202
- logging.info(
203
- f"Capping max_tokens from {requested_max_tokens} to {actual_max_tokens} "
204
- f"(model {self.current_model_id} limit)"
205
- )
206
-
207
- # Convert messages to Anthropic format
208
- anthropic_messages = self._convert_messages_to_anthropic(messages)
209
-
210
- # Build API parameters
211
- api_params = {
212
- 'model': self.current_model_id,
213
- 'messages': anthropic_messages,
214
- 'max_tokens': actual_max_tokens,
215
- 'temperature': temperature
216
- }
217
-
218
- if system:
219
- api_params['system'] = system
220
-
221
- if tools:
222
- api_params['tools'] = self._convert_tools_to_anthropic(tools)
223
- logging.debug(f"Sending {len(api_params['tools'])} tools to Anthropic API")
224
-
225
- logging.debug(f"Invoking Anthropic model: {self.current_model_id}")
226
- logging.debug(f"API params (excluding messages): {{'model': api_params['model'], 'max_tokens': api_params['max_tokens'], 'temperature': api_params['temperature'], 'has_system': 'system' in api_params, 'has_tools': 'tools' in api_params, 'num_tools': len(api_params.get('tools', []))}}")
227
-
228
- # Use streaming to avoid 10-minute timeout
229
- # Accumulate response from stream
230
- text_parts = []
231
- content_blocks = []
232
- tool_use_blocks = []
233
- stop_reason = None
234
- usage_info = {'input_tokens': 0, 'output_tokens': 0}
235
-
236
- # Implement rate limit handling with exponential backoff
237
- for retry_attempt in range(self.rate_limit_max_retries):
238
- try:
239
- with self.client.messages.stream(**api_params) as stream:
240
- for event in stream:
241
- # Handle different event types
242
- if hasattr(event, 'type'):
243
- if event.type == 'content_block_start':
244
- # Track content blocks as they start
245
- pass
246
- elif event.type == 'content_block_delta':
247
- # Accumulate text deltas
248
- if hasattr(event, 'delta'):
249
- if hasattr(event.delta, 'type'):
250
- if event.delta.type == 'text_delta':
251
- text_parts.append(event.delta.text)
252
- elif event.type == 'message_stop':
253
- # Message complete
254
- pass
255
- elif event.type == 'message_delta':
256
- # Update stop reason and usage
257
- if hasattr(event, 'delta') and hasattr(event.delta, 'stop_reason'):
258
- stop_reason = event.delta.stop_reason
259
- if hasattr(event, 'usage'):
260
- usage_info['output_tokens'] = event.usage.output_tokens
261
-
262
- # Get final message to extract full content and usage
263
- final_message = stream.get_final_message()
264
-
265
- # Extract usage information
266
- if hasattr(final_message, 'usage'):
267
- usage_info['input_tokens'] = final_message.usage.input_tokens
268
- usage_info['output_tokens'] = final_message.usage.output_tokens
269
-
270
- # Extract stop reason
271
- if hasattr(final_message, 'stop_reason'):
272
- stop_reason = final_message.stop_reason
273
-
274
- # Extract content blocks (including tool use)
275
- if hasattr(final_message, 'content'):
276
- for block in final_message.content:
277
- if hasattr(block, 'type'):
278
- if block.type == 'text':
279
- content_blocks.append({
280
- 'type': 'text',
281
- 'text': block.text
282
- })
283
- elif block.type == 'tool_use':
284
- tool_block = {
285
- 'type': 'tool_use',
286
- 'id': block.id,
287
- 'name': block.name,
288
- 'input': block.input
289
- }
290
- tool_use_blocks.append(tool_block)
291
- content_blocks.append(tool_block)
292
-
293
- # Successfully completed - break out of retry loop
294
- break
295
-
296
- except RateLimitError as e:
297
- # Handle rate limit errors with exponential backoff
298
- if retry_attempt < self.rate_limit_max_retries - 1:
299
- wait_time = self.rate_limit_base_delay ** retry_attempt
300
- logging.warning(
301
- f"Rate limit exceeded (attempt {retry_attempt + 1}/{self.rate_limit_max_retries}). "
302
- f"Waiting {wait_time:.1f} seconds before retrying..."
303
- )
304
- logging.debug(f"Rate limit error details: {str(e)}")
305
- time.sleep(wait_time)
306
- else:
307
- # Final retry failed
308
- logging.error(
309
- f"Rate limit exceeded after {self.rate_limit_max_retries} attempts. "
310
- f"Please reduce request frequency or contact Anthropic for rate limit increase."
311
- )
312
- logging.error(f"Rate limit error details: {str(e)}")
313
- return {
314
- 'error': True,
315
- 'error_code': 'RateLimitExceeded',
316
- 'error_message': f"Rate limit exceeded after {self.rate_limit_max_retries} retry attempts. {str(e)}",
317
- 'error_type': 'RateLimitError'
318
- }
319
-
320
- # Build response in standard format
321
- response = {
322
- 'stop_reason': stop_reason,
323
- 'usage': usage_info,
324
- 'content_blocks': content_blocks,
325
- 'content': ''.join(text_parts)
326
- }
327
-
328
- # Add tool_use if present
329
- if tool_use_blocks:
330
- response['tool_use'] = tool_use_blocks
331
- response['stop_reason'] = 'tool_use'
332
-
333
- return response
193
+ api_params = self._build_api_params(messages, max_tokens, temperature, tools, system)
194
+ return self._execute_streaming_request(api_params)
334
195
 
335
196
  except Exception as e:
336
197
  logging.error(f"Anthropic API error: {e}")
@@ -341,6 +202,250 @@ class AnthropicService(LLMService):
341
202
  'error_type': 'RequestError'
342
203
  }
343
204
 
205
+ def _build_api_params(
206
+ self,
207
+ messages: List[Dict[str, Any]],
208
+ max_tokens: int,
209
+ temperature: float,
210
+ tools: Optional[List[Dict[str, Any]]],
211
+ system: Optional[str]
212
+ ) -> Dict[str, Any]:
213
+ """
214
+ Build API parameters for an Anthropic request.
215
+
216
+ Handles max_tokens capping, message conversion, and tool conversion.
217
+
218
+ Args:
219
+ messages: Conversation messages
220
+ max_tokens: Maximum tokens to generate
221
+ temperature: Sampling temperature
222
+ tools: Optional tool definitions
223
+ system: Optional system prompt
224
+
225
+ Returns:
226
+ Dictionary of API parameters ready for the Anthropic client
227
+ """
228
+ # Use provided max_tokens or fall back to default from config
229
+ requested_max_tokens = max_tokens if max_tokens != 4096 else self.default_max_tokens
230
+
231
+ # Get model's max output tokens to ensure we don't exceed it
232
+ model_max_output = self.get_model_max_tokens(self.current_model_id)
233
+
234
+ # Cap max_tokens to model's limit
235
+ actual_max_tokens = min(requested_max_tokens, model_max_output)
236
+ if actual_max_tokens < requested_max_tokens:
237
+ logging.info(
238
+ f"Capping max_tokens from {requested_max_tokens} to {actual_max_tokens} "
239
+ f"(model {self.current_model_id} limit)"
240
+ )
241
+
242
+ # Convert messages to Anthropic format
243
+ anthropic_messages = self._convert_messages_to_anthropic(messages)
244
+
245
+ # Build API parameters
246
+ api_params = {
247
+ 'model': self.current_model_id,
248
+ 'messages': anthropic_messages,
249
+ 'max_tokens': actual_max_tokens,
250
+ 'temperature': temperature
251
+ }
252
+
253
+ if system:
254
+ api_params['system'] = system
255
+
256
+ if tools:
257
+ api_params['tools'] = self._convert_tools_to_anthropic(tools)
258
+ logging.debug(f"Sending {len(api_params['tools'])} tools to Anthropic API")
259
+
260
+ logging.debug(f"Invoking Anthropic model: {self.current_model_id}")
261
+ self._log_api_params(api_params)
262
+
263
+ return api_params
264
+
265
+ def _log_api_params(self, api_params: Dict[str, Any]) -> None:
266
+ """Log API parameters for debugging (excluding message content)."""
267
+ debug_info = {
268
+ 'model': api_params['model'],
269
+ 'max_tokens': api_params['max_tokens'],
270
+ 'temperature': api_params['temperature'],
271
+ 'has_system': 'system' in api_params,
272
+ 'has_tools': 'tools' in api_params,
273
+ 'num_tools': len(api_params.get('tools', []))
274
+ }
275
+ logging.debug("API params (excluding messages): %s", debug_info)
276
+
277
+ def _execute_streaming_request(self, api_params: Dict[str, Any]) -> Dict[str, Any]:
278
+ """
279
+ Execute a streaming request with rate limit retry logic.
280
+
281
+ Args:
282
+ api_params: Pre-built API parameters
283
+
284
+ Returns:
285
+ Response dictionary in standard format, or error dictionary
286
+ """
287
+ text_parts = []
288
+ content_blocks = []
289
+ tool_use_blocks = []
290
+ stop_reason = None
291
+ usage_info = {'input_tokens': 0, 'output_tokens': 0}
292
+
293
+ for retry_attempt in range(self.rate_limit_max_retries):
294
+ try:
295
+ with self.client.messages.stream(**api_params) as stream:
296
+ self._process_stream_events(stream, text_parts)
297
+
298
+ final_message = stream.get_final_message()
299
+ self._extract_final_message_data(
300
+ final_message, usage_info, content_blocks, tool_use_blocks
301
+ )
302
+ if hasattr(final_message, 'stop_reason'):
303
+ stop_reason = final_message.stop_reason
304
+
305
+ # Successfully completed - break out of retry loop
306
+ break
307
+
308
+ except RateLimitError as e:
309
+ error_response = self._handle_rate_limit_error(e, retry_attempt)
310
+ if error_response is not None:
311
+ return error_response
312
+
313
+ return self._build_response(text_parts, content_blocks, tool_use_blocks, stop_reason, usage_info)
314
+
315
+ def _process_stream_events(
316
+ self,
317
+ stream,
318
+ text_parts: List[str]
319
+ ) -> None:
320
+ """
321
+ Process streaming events, accumulating text deltas.
322
+
323
+ Args:
324
+ stream: The Anthropic streaming response
325
+ text_parts: List to accumulate text delta strings into
326
+ """
327
+ for event in stream:
328
+ if not hasattr(event, 'type'):
329
+ continue
330
+
331
+ if event.type == 'content_block_delta':
332
+ self._handle_content_block_delta(event, text_parts)
333
+
334
+ def _handle_content_block_delta(self, event, text_parts: List[str]) -> None:
335
+ """Handle a content_block_delta event by extracting text."""
336
+ if not hasattr(event, 'delta'):
337
+ return
338
+ if not hasattr(event.delta, 'type'):
339
+ return
340
+ if event.delta.type == 'text_delta':
341
+ text_parts.append(event.delta.text)
342
+
343
+ def _extract_final_message_data(
344
+ self,
345
+ final_message,
346
+ usage_info: Dict[str, int],
347
+ content_blocks: List[Dict[str, Any]],
348
+ tool_use_blocks: List[Dict[str, Any]]
349
+ ) -> None:
350
+ """
351
+ Extract usage, stop reason, and content blocks from the final message.
352
+
353
+ Args:
354
+ final_message: The final message object from the stream
355
+ usage_info: Dictionary to update with token usage
356
+ content_blocks: List to append content blocks to
357
+ tool_use_blocks: List to append tool use blocks to
358
+ """
359
+ if hasattr(final_message, 'usage'):
360
+ usage_info['input_tokens'] = final_message.usage.input_tokens
361
+ usage_info['output_tokens'] = final_message.usage.output_tokens
362
+
363
+ if not hasattr(final_message, 'content'):
364
+ return
365
+
366
+ for block in final_message.content:
367
+ if not hasattr(block, 'type'):
368
+ continue
369
+ if block.type == 'text':
370
+ content_blocks.append({'type': 'text', 'text': block.text})
371
+ elif block.type == 'tool_use':
372
+ tool_block = {
373
+ 'type': 'tool_use',
374
+ 'id': block.id,
375
+ 'name': block.name,
376
+ 'input': block.input
377
+ }
378
+ tool_use_blocks.append(tool_block)
379
+ content_blocks.append(tool_block)
380
+
381
+ def _handle_rate_limit_error(self, error: Exception, retry_attempt: int) -> Optional[Dict[str, Any]]:
382
+ """
383
+ Handle a rate limit error with exponential backoff.
384
+
385
+ Args:
386
+ error: The RateLimitError exception
387
+ retry_attempt: Current retry attempt index (0-based)
388
+
389
+ Returns:
390
+ None if retrying (caller should continue), or an error dict if retries exhausted
391
+ """
392
+ if retry_attempt < self.rate_limit_max_retries - 1:
393
+ wait_time = self.rate_limit_base_delay ** retry_attempt
394
+ logging.warning(
395
+ f"Rate limit exceeded (attempt {retry_attempt + 1}/{self.rate_limit_max_retries}). "
396
+ f"Waiting {wait_time:.1f} seconds before retrying..."
397
+ )
398
+ logging.debug(f"Rate limit error details: {str(error)}")
399
+ time.sleep(wait_time)
400
+ return None
401
+
402
+ # Final retry failed
403
+ logging.error(
404
+ f"Rate limit exceeded after {self.rate_limit_max_retries} attempts. "
405
+ "Please reduce request frequency or contact Anthropic for rate limit increase."
406
+ )
407
+ logging.error(f"Rate limit error details: {str(error)}")
408
+ return {
409
+ 'error': True,
410
+ 'error_code': 'RateLimitExceeded',
411
+ 'error_message': f"Rate limit exceeded after {self.rate_limit_max_retries} retry attempts. {str(error)}",
412
+ 'error_type': 'RateLimitError'
413
+ }
414
+
415
+ def _build_response(
416
+ self,
417
+ text_parts: List[str],
418
+ content_blocks: List[Dict[str, Any]],
419
+ tool_use_blocks: List[Dict[str, Any]],
420
+ stop_reason: Optional[str],
421
+ usage_info: Dict[str, int]
422
+ ) -> Dict[str, Any]:
423
+ """
424
+ Build the standard response dictionary from accumulated stream data.
425
+
426
+ Args:
427
+ text_parts: Accumulated text parts from streaming
428
+ content_blocks: All content blocks (text and tool use)
429
+ tool_use_blocks: Tool use blocks specifically
430
+ stop_reason: The stop reason from the API
431
+ usage_info: Token usage information
432
+
433
+ Returns:
434
+ Response dictionary in standard format
435
+ """
436
+ response = {
437
+ 'stop_reason': stop_reason,
438
+ 'usage': usage_info,
439
+ 'content_blocks': content_blocks,
440
+ 'content': ''.join(text_parts)
441
+ }
442
+
443
+ if tool_use_blocks:
444
+ response['tool_use'] = tool_use_blocks
445
+ response['stop_reason'] = 'tool_use'
446
+
447
+ return response
448
+
344
449
  def _convert_messages_to_anthropic(
345
450
  self,
346
451
  messages: List[Dict[str, Any]]
dtSpark/llm/ollama.py CHANGED
@@ -79,9 +79,11 @@ class OllamaService(LLMService):
79
79
  base_url = base_url + '/'
80
80
 
81
81
  # Create httpx client with SSL verification disabled and proper base URL
82
+ # SSL verification is intentionally disabled here - controlled by verify_ssl constructor parameter
83
+ # which is set from user configuration (for self-signed certificates on local Ollama instances)
82
84
  custom_http_client = httpx.Client(
83
85
  base_url=base_url,
84
- verify=False,
86
+ verify=False, # NOSONAR - intentional, gated by verify_ssl config
85
87
  timeout=httpx.Timeout(timeout=120.0)
86
88
  )
87
89
 
@@ -185,7 +185,7 @@ class MCPClient:
185
185
  httpx_client = httpx.AsyncClient(
186
186
  headers=headers if headers else None,
187
187
  timeout=self.config.timeout,
188
- verify=False
188
+ verify=False # NOSONAR - intentional, gated by ssl_verify config
189
189
  )
190
190
 
191
191
  # Use streamable HTTP client with headers
@@ -226,7 +226,7 @@ class MCPClient:
226
226
  httpx_client = httpx.AsyncClient(
227
227
  headers=headers if headers else None,
228
228
  timeout=self.config.timeout,
229
- verify=False
229
+ verify=False # NOSONAR - intentional, gated by ssl_verify config
230
230
  )
231
231
 
232
232
  # Use SSE client with headers
@@ -266,7 +266,7 @@ class MCPClient:
266
266
  logging.error(f"MCP session initialization cancelled for {self.config.name} "
267
267
  f"(server may have returned an error)")
268
268
  await self._cleanup_failed_connection()
269
- return False
269
+ raise
270
270
 
271
271
  self._connected = True
272
272
  logging.info(f"Connected to MCP server: {self.config.name} (transport: {self.config.transport})")
@@ -276,7 +276,7 @@ class MCPClient:
276
276
  logging.error(f"Connection cancelled for MCP server {self.config.name} "
277
277
  f"(check server URL and authentication)")
278
278
  await self._cleanup_failed_connection()
279
- return False
279
+ raise
280
280
  except Exception as e:
281
281
  error_msg = str(e)
282
282
  # Provide more helpful error messages for common issues
@@ -78,7 +78,6 @@ class ToolSelector:
78
78
  relevant_categories = self._detect_categories(user_message, conversation_history)
79
79
 
80
80
  if not relevant_categories:
81
- # If no specific categories detected, include a diverse sample
82
81
  logging.info("No specific tool categories detected, selecting diverse sample")
83
82
  return self._select_diverse_sample(all_tools, selected_tools)
84
83
 
@@ -89,49 +88,57 @@ class ToolSelector:
89
88
  for category in relevant_categories:
90
89
  relevant_patterns.update(self.TOOL_CATEGORIES.get(category, []))
91
90
 
92
- # Track selected tool names to avoid duplicates
93
91
  selected_tool_names = {t.get('name') for t in selected_tools}
94
92
 
95
- # Select tools that match the relevant patterns
93
+ # Select tools matching the relevant patterns, then backfill to limit
94
+ self._add_matching_tools(all_tools, selected_tools, selected_tool_names, relevant_patterns)
95
+ self._backfill_tools(all_tools, selected_tools, selected_tool_names)
96
+
97
+ logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available "
98
+ f"(categories: {', '.join(relevant_categories)})")
99
+ self._log_selected_tools(selected_tools)
100
+
101
+ return selected_tools
102
+
103
+ def _add_matching_tools(self, all_tools: List[Dict[str, Any]],
104
+ selected: List[Dict[str, Any]],
105
+ selected_names: Set[str],
106
+ patterns: Set[str]) -> None:
107
+ """Add tools whose name or description matches any of the given patterns."""
96
108
  for tool in all_tools:
97
- if len(selected_tools) >= self.max_tools_per_request:
109
+ if len(selected) >= self.max_tools_per_request:
98
110
  break
99
-
100
111
  tool_name = tool.get('name', '')
101
- if tool_name in selected_tool_names:
112
+ if tool_name in selected_names:
102
113
  continue
103
-
104
114
  tool_name_lower = tool_name.lower()
105
115
  tool_desc = tool.get('description', '').lower()
116
+ if any(p in tool_name_lower or p in tool_desc for p in patterns):
117
+ selected.append(tool)
118
+ selected_names.add(tool_name)
119
+
120
+ def _backfill_tools(self, all_tools: List[Dict[str, Any]],
121
+ selected: List[Dict[str, Any]],
122
+ selected_names: Set[str]) -> None:
123
+ """Fill remaining slots up to max_tools_per_request with unselected tools."""
124
+ if len(selected) >= self.max_tools_per_request:
125
+ return
126
+ remaining = self.max_tools_per_request - len(selected)
127
+ logging.debug(f"Adding up to {remaining} additional tools to reach limit")
128
+ for tool in all_tools:
129
+ if len(selected) >= self.max_tools_per_request:
130
+ break
131
+ tool_name = tool.get('name', '')
132
+ if tool_name not in selected_names:
133
+ selected.append(tool)
134
+ selected_names.add(tool_name)
106
135
 
107
- # Check if tool name or description matches any relevant pattern
108
- if any(pattern in tool_name_lower or pattern in tool_desc for pattern in relevant_patterns):
109
- selected_tools.append(tool)
110
- selected_tool_names.add(tool_name)
111
-
112
- # If still below limit and we have room, add some general-purpose tools
113
- if len(selected_tools) < self.max_tools_per_request:
114
- remaining = self.max_tools_per_request - len(selected_tools)
115
- logging.debug(f"Adding up to {remaining} additional tools to reach limit")
116
-
117
- for tool in all_tools:
118
- if len(selected_tools) >= self.max_tools_per_request:
119
- break
120
-
121
- tool_name = tool.get('name', '')
122
- if tool_name not in selected_tool_names:
123
- selected_tools.append(tool)
124
- selected_tool_names.add(tool_name)
125
-
126
- logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available " +
127
- f"(categories: {', '.join(relevant_categories)})")
128
-
129
- # Log which tools were selected for debugging
136
+ @staticmethod
137
+ def _log_selected_tools(selected_tools: List[Dict[str, Any]]) -> None:
138
+ """Log the names of selected tools for debugging."""
130
139
  tool_names = [t.get('name') for t in selected_tools]
131
140
  logging.debug(f"Selected tools: {', '.join(tool_names[:10])}{'...' if len(tool_names) > 10 else ''}")
132
141
 
133
- return selected_tools
134
-
135
142
  def _detect_categories(self, user_message: str,
136
143
  conversation_history: List[Dict[str, Any]] = None) -> Set[str]:
137
144
  """
@@ -147,30 +154,30 @@ class ToolSelector:
147
154
  categories = set()
148
155
 
149
156
  # Analyse user message
150
- message_lower = user_message.lower()
151
- for category, keywords in self.CATEGORY_KEYWORDS.items():
152
- if any(keyword in message_lower for keyword in keywords):
153
- categories.add(category)
154
- logging.debug(f"Category '{category}' detected from user message")
157
+ self._match_categories(user_message.lower(), categories, source='user message')
155
158
 
156
159
  # Analyse recent conversation history (last 5 messages)
157
160
  if conversation_history:
158
- recent_messages = conversation_history[-5:]
159
- for msg in recent_messages:
160
- # Handle both string content and dict content
161
- if isinstance(msg, dict):
162
- content = str(msg.get('content', '')).lower()
163
- else:
164
- content = str(msg).lower()
165
-
166
- for category, keywords in self.CATEGORY_KEYWORDS.items():
167
- if any(keyword in content for keyword in keywords):
168
- if category not in categories:
169
- categories.add(category)
170
- logging.debug(f"Category '{category}' detected from conversation history")
161
+ for msg in conversation_history[-5:]:
162
+ content = self._extract_message_content(msg)
163
+ self._match_categories(content, categories, source='conversation history')
171
164
 
172
165
  return categories
173
166
 
167
+ def _match_categories(self, text: str, categories: Set[str], source: str) -> None:
168
+ """Match keyword categories against text and add new matches to the set."""
169
+ for category, keywords in self.CATEGORY_KEYWORDS.items():
170
+ if category not in categories and any(kw in text for kw in keywords):
171
+ categories.add(category)
172
+ logging.debug(f"Category '{category}' detected from {source}")
173
+
174
+ @staticmethod
175
+ def _extract_message_content(msg) -> str:
176
+ """Extract lowercased text content from a message (dict or string)."""
177
+ if isinstance(msg, dict):
178
+ return str(msg.get('content', '')).lower()
179
+ return str(msg).lower()
180
+
174
181
  def _select_diverse_sample(self, all_tools: List[Dict[str, Any]],
175
182
  already_selected: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
176
183
  """
@@ -197,34 +204,33 @@ class ToolSelector:
197
204
  for category, category_patterns in self.TOOL_CATEGORIES.items():
198
205
  if len(selected) >= self.max_tools_per_request:
199
206
  break
207
+ self._add_category_tools(
208
+ all_tools, selected, selected_tool_names,
209
+ category_patterns, tools_per_category,
210
+ )
200
211
 
201
- added = 0
202
- for tool in all_tools:
203
- tool_name = tool.get('name', '')
204
- if tool_name in selected_tool_names:
205
- continue
206
-
207
- tool_name_lower = tool_name.lower()
208
- tool_desc = tool.get('description', '').lower()
209
-
210
- # Check if tool matches this category
211
- if any(pattern in tool_name_lower or pattern in tool_desc for pattern in category_patterns):
212
- selected.append(tool)
213
- selected_tool_names.add(tool_name)
214
- added += 1
215
- if added >= tools_per_category or len(selected) >= self.max_tools_per_request:
216
- break
217
-
218
- # If still below limit, add remaining tools
219
- if len(selected) < self.max_tools_per_request:
220
- for tool in all_tools:
221
- if len(selected) >= self.max_tools_per_request:
222
- break
223
-
224
- tool_name = tool.get('name', '')
225
- if tool_name not in selected_tool_names:
226
- selected.append(tool)
227
- selected_tool_names.add(tool_name)
212
+ # Backfill any remaining slots
213
+ self._backfill_tools(all_tools, selected, selected_tool_names)
228
214
 
229
215
  logging.info(f"Selected {len(selected)} diverse tools (no specific category detected)")
230
216
  return selected
217
+
218
+ def _add_category_tools(self, all_tools: List[Dict[str, Any]],
219
+ selected: List[Dict[str, Any]],
220
+ selected_names: Set[str],
221
+ patterns: List[str],
222
+ max_count: int) -> None:
223
+ """Add up to max_count tools matching the given category patterns."""
224
+ added = 0
225
+ for tool in all_tools:
226
+ if added >= max_count or len(selected) >= self.max_tools_per_request:
227
+ break
228
+ tool_name = tool.get('name', '')
229
+ if tool_name in selected_names:
230
+ continue
231
+ tool_name_lower = tool_name.lower()
232
+ tool_desc = tool.get('description', '').lower()
233
+ if any(p in tool_name_lower or p in tool_desc for p in patterns):
234
+ selected.append(tool)
235
+ selected_names.add(tool_name)
236
+ added += 1