PyPI - dtSpark - Versions diffs - 1.1.0a3__py3-none-any.whl → 1.1.0a7__py3-none-any.whl - Mend

dtSpark 1.1.0a3py3-none-any.whl → 1.1.0a7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

dtSpark/_version.txt +1 -1
dtSpark/aws/authentication.py +1 -1
dtSpark/aws/bedrock.py +238 -239
dtSpark/aws/costs.py +9 -5
dtSpark/aws/pricing.py +25 -21
dtSpark/cli_interface.py +77 -68
dtSpark/conversation_manager.py +54 -47
dtSpark/core/application.py +114 -91
dtSpark/core/context_compaction.py +241 -226
dtSpark/daemon/__init__.py +36 -22
dtSpark/daemon/action_monitor.py +46 -17
dtSpark/daemon/daemon_app.py +126 -104
dtSpark/daemon/daemon_manager.py +59 -23
dtSpark/daemon/pid_file.py +3 -2
dtSpark/database/autonomous_actions.py +3 -0
dtSpark/database/credential_prompt.py +52 -54
dtSpark/files/manager.py +6 -12
dtSpark/limits/__init__.py +1 -1
dtSpark/limits/tokens.py +2 -2
dtSpark/llm/anthropic_direct.py +246 -141
dtSpark/llm/ollama.py +3 -1
dtSpark/mcp_integration/manager.py +4 -4
dtSpark/mcp_integration/tool_selector.py +83 -77
dtSpark/resources/config.yaml.template +11 -0
dtSpark/safety/patterns.py +45 -46
dtSpark/safety/prompt_inspector.py +8 -1
dtSpark/scheduler/creation_tools.py +273 -181
dtSpark/scheduler/executor.py +503 -221
dtSpark/tools/builtin.py +70 -53
dtSpark/web/endpoints/autonomous_actions.py +12 -9
dtSpark/web/endpoints/chat.py +8 -6
dtSpark/web/endpoints/conversations.py +18 -9
dtSpark/web/endpoints/main_menu.py +132 -105
dtSpark/web/endpoints/streaming.py +2 -2
dtSpark/web/server.py +70 -5
dtSpark/web/ssl_utils.py +3 -3
dtSpark/web/static/css/dark-theme.css +8 -29
dtSpark/web/static/js/chat.js +6 -8
dtSpark/web/static/js/main.js +8 -8
dtSpark/web/static/js/sse-client.js +130 -122
dtSpark/web/templates/actions.html +5 -5
dtSpark/web/templates/base.html +15 -0
dtSpark/web/templates/chat.html +10 -10
dtSpark/web/templates/conversations.html +6 -2
dtSpark/web/templates/goodbye.html +2 -2
dtSpark/web/templates/main_menu.html +19 -17
dtSpark/web/web_interface.py +2 -2
{dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/METADATA +9 -2
dtspark-1.1.0a7.dist-info/RECORD +96 -0
dtspark-1.1.0a3.dist-info/RECORD +0 -96
{dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/WHEEL +0 -0
{dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/entry_points.txt +0 -0
{dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/licenses/LICENSE +0 -0
{dtspark-1.1.0a3.dist-info → dtspark-1.1.0a7.dist-info}/top_level.txt +0 -0

dtSpark/limits/tokens.py CHANGED Viewed

@@ -45,7 +45,7 @@ class TokenManager:
         self.current_output_override = 0  # Additional output tokens allowed
         self.override_expires = None  # When the override expires
-    def check_limits_before_request(self, model_id: str, region: str,
+    def check_limits_before_request(self, _model_id: str, region: str,
                                     input_tokens: int, max_output_tokens: int) -> Tuple[bool, str, LimitStatus]:
         """
         Check if a request would exceed the token limits.
@@ -249,7 +249,7 @@ class TokenManager:
         message = "Token Limit Reached: "
         if input_exceeded and output_exceeded:
-            message += f"Both limits exceeded. "
+            message += "Both limits exceeded. "
         elif input_exceeded:
             message += f"Input limit exceeded: {current_input:,}/{input_limit:,} used, {request_input:,} requested. "
         else:

dtSpark/llm/anthropic_direct.py CHANGED Viewed

@@ -190,147 +190,8 @@ class AnthropicService(LLMService):
             }
         try:
-            # Use provided max_tokens or fall back to default from config
-            requested_max_tokens = max_tokens if max_tokens != 4096 else self.default_max_tokens
-            # Get model's max output tokens to ensure we don't exceed it
-            model_max_output = self.get_model_max_tokens(self.current_model_id)
-            # Cap max_tokens to model's limit
-            actual_max_tokens = min(requested_max_tokens, model_max_output)
-            if actual_max_tokens < requested_max_tokens:
-                logging.info(
-                    f"Capping max_tokens from {requested_max_tokens} to {actual_max_tokens} "
-                    f"(model {self.current_model_id} limit)"
-                )
-            # Convert messages to Anthropic format
-            anthropic_messages = self._convert_messages_to_anthropic(messages)
-            # Build API parameters
-            api_params = {
-                'model': self.current_model_id,
-                'messages': anthropic_messages,
-                'max_tokens': actual_max_tokens,
-                'temperature': temperature
-            }
-            if system:
-                api_params['system'] = system
-            if tools:
-                api_params['tools'] = self._convert_tools_to_anthropic(tools)
-                logging.debug(f"Sending {len(api_params['tools'])} tools to Anthropic API")
-            logging.debug(f"Invoking Anthropic model: {self.current_model_id}")
-            logging.debug(f"API params (excluding messages): {{'model': api_params['model'], 'max_tokens': api_params['max_tokens'], 'temperature': api_params['temperature'], 'has_system': 'system' in api_params, 'has_tools': 'tools' in api_params, 'num_tools': len(api_params.get('tools', []))}}")
-            # Use streaming to avoid 10-minute timeout
-            # Accumulate response from stream
-            text_parts = []
-            content_blocks = []
-            tool_use_blocks = []
-            stop_reason = None
-            usage_info = {'input_tokens': 0, 'output_tokens': 0}
-            # Implement rate limit handling with exponential backoff
-            for retry_attempt in range(self.rate_limit_max_retries):
-                try:
-                    with self.client.messages.stream(**api_params) as stream:
-                        for event in stream:
-                            # Handle different event types
-                            if hasattr(event, 'type'):
-                                if event.type == 'content_block_start':
-                                    # Track content blocks as they start
-                                    pass
-                                elif event.type == 'content_block_delta':
-                                    # Accumulate text deltas
-                                    if hasattr(event, 'delta'):
-                                        if hasattr(event.delta, 'type'):
-                                            if event.delta.type == 'text_delta':
-                                                text_parts.append(event.delta.text)
-                                elif event.type == 'message_stop':
-                                    # Message complete
-                                    pass
-                                elif event.type == 'message_delta':
-                                    # Update stop reason and usage
-                                    if hasattr(event, 'delta') and hasattr(event.delta, 'stop_reason'):
-                                        stop_reason = event.delta.stop_reason
-                                    if hasattr(event, 'usage'):
-                                        usage_info['output_tokens'] = event.usage.output_tokens
-                        # Get final message to extract full content and usage
-                        final_message = stream.get_final_message()
-                        # Extract usage information
-                        if hasattr(final_message, 'usage'):
-                            usage_info['input_tokens'] = final_message.usage.input_tokens
-                            usage_info['output_tokens'] = final_message.usage.output_tokens
-                        # Extract stop reason
-                        if hasattr(final_message, 'stop_reason'):
-                            stop_reason = final_message.stop_reason
-                        # Extract content blocks (including tool use)
-                        if hasattr(final_message, 'content'):
-                            for block in final_message.content:
-                                if hasattr(block, 'type'):
-                                    if block.type == 'text':
-                                        content_blocks.append({
-                                            'type': 'text',
-                                            'text': block.text
-                                        })
-                                    elif block.type == 'tool_use':
-                                        tool_block = {
-                                            'type': 'tool_use',
-                                            'id': block.id,
-                                            'name': block.name,
-                                            'input': block.input
-                                        }
-                                        tool_use_blocks.append(tool_block)
-                                        content_blocks.append(tool_block)
-                    # Successfully completed - break out of retry loop
-                    break
-                except RateLimitError as e:
-                    # Handle rate limit errors with exponential backoff
-                    if retry_attempt < self.rate_limit_max_retries - 1:
-                        wait_time = self.rate_limit_base_delay ** retry_attempt
-                        logging.warning(
-                            f"Rate limit exceeded (attempt {retry_attempt + 1}/{self.rate_limit_max_retries}). "
-                            f"Waiting {wait_time:.1f} seconds before retrying..."
-                        )
-                        logging.debug(f"Rate limit error details: {str(e)}")
-                        time.sleep(wait_time)
-                    else:
-                        # Final retry failed
-                        logging.error(
-                            f"Rate limit exceeded after {self.rate_limit_max_retries} attempts. "
-                            f"Please reduce request frequency or contact Anthropic for rate limit increase."
-                        )
-                        logging.error(f"Rate limit error details: {str(e)}")
-                        return {
-                            'error': True,
-                            'error_code': 'RateLimitExceeded',
-                            'error_message': f"Rate limit exceeded after {self.rate_limit_max_retries} retry attempts. {str(e)}",
-                            'error_type': 'RateLimitError'
-                        }
-            # Build response in standard format
-            response = {
-                'stop_reason': stop_reason,
-                'usage': usage_info,
-                'content_blocks': content_blocks,
-                'content': ''.join(text_parts)
-            }
-            # Add tool_use if present
-            if tool_use_blocks:
-                response['tool_use'] = tool_use_blocks
-                response['stop_reason'] = 'tool_use'
-            return response
+            api_params = self._build_api_params(messages, max_tokens, temperature, tools, system)
+            return self._execute_streaming_request(api_params)
         except Exception as e:
             logging.error(f"Anthropic API error: {e}")
@@ -341,6 +202,250 @@ class AnthropicService(LLMService):
                 'error_type': 'RequestError'
             }
+    def _build_api_params(
+        self,
+        messages: List[Dict[str, Any]],
+        max_tokens: int,
+        temperature: float,
+        tools: Optional[List[Dict[str, Any]]],
+        system: Optional[str]
+    ) -> Dict[str, Any]:
+        """
+        Build API parameters for an Anthropic request.
+        Handles max_tokens capping, message conversion, and tool conversion.
+        Args:
+            messages: Conversation messages
+            max_tokens: Maximum tokens to generate
+            temperature: Sampling temperature
+            tools: Optional tool definitions
+            system: Optional system prompt
+        Returns:
+            Dictionary of API parameters ready for the Anthropic client
+        """
+        # Use provided max_tokens or fall back to default from config
+        requested_max_tokens = max_tokens if max_tokens != 4096 else self.default_max_tokens
+        # Get model's max output tokens to ensure we don't exceed it
+        model_max_output = self.get_model_max_tokens(self.current_model_id)
+        # Cap max_tokens to model's limit
+        actual_max_tokens = min(requested_max_tokens, model_max_output)
+        if actual_max_tokens < requested_max_tokens:
+            logging.info(
+                f"Capping max_tokens from {requested_max_tokens} to {actual_max_tokens} "
+                f"(model {self.current_model_id} limit)"
+            )
+        # Convert messages to Anthropic format
+        anthropic_messages = self._convert_messages_to_anthropic(messages)
+        # Build API parameters
+        api_params = {
+            'model': self.current_model_id,
+            'messages': anthropic_messages,
+            'max_tokens': actual_max_tokens,
+            'temperature': temperature
+        }
+        if system:
+            api_params['system'] = system
+        if tools:
+            api_params['tools'] = self._convert_tools_to_anthropic(tools)
+            logging.debug(f"Sending {len(api_params['tools'])} tools to Anthropic API")
+        logging.debug(f"Invoking Anthropic model: {self.current_model_id}")
+        self._log_api_params(api_params)
+        return api_params
+    def _log_api_params(self, api_params: Dict[str, Any]) -> None:
+        """Log API parameters for debugging (excluding message content)."""
+        debug_info = {
+            'model': api_params['model'],
+            'max_tokens': api_params['max_tokens'],
+            'temperature': api_params['temperature'],
+            'has_system': 'system' in api_params,
+            'has_tools': 'tools' in api_params,
+            'num_tools': len(api_params.get('tools', []))
+        }
+        logging.debug("API params (excluding messages): %s", debug_info)
+    def _execute_streaming_request(self, api_params: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a streaming request with rate limit retry logic.
+        Args:
+            api_params: Pre-built API parameters
+        Returns:
+            Response dictionary in standard format, or error dictionary
+        """
+        text_parts = []
+        content_blocks = []
+        tool_use_blocks = []
+        stop_reason = None
+        usage_info = {'input_tokens': 0, 'output_tokens': 0}
+        for retry_attempt in range(self.rate_limit_max_retries):
+            try:
+                with self.client.messages.stream(**api_params) as stream:
+                    self._process_stream_events(stream, text_parts)
+                    final_message = stream.get_final_message()
+                    self._extract_final_message_data(
+                        final_message, usage_info, content_blocks, tool_use_blocks
+                    )
+                    if hasattr(final_message, 'stop_reason'):
+                        stop_reason = final_message.stop_reason
+                # Successfully completed - break out of retry loop
+                break
+            except RateLimitError as e:
+                error_response = self._handle_rate_limit_error(e, retry_attempt)
+                if error_response is not None:
+                    return error_response
+        return self._build_response(text_parts, content_blocks, tool_use_blocks, stop_reason, usage_info)
+    def _process_stream_events(
+        self,
+        stream,
+        text_parts: List[str]
+    ) -> None:
+        """
+        Process streaming events, accumulating text deltas.
+        Args:
+            stream: The Anthropic streaming response
+            text_parts: List to accumulate text delta strings into
+        """
+        for event in stream:
+            if not hasattr(event, 'type'):
+                continue
+            if event.type == 'content_block_delta':
+                self._handle_content_block_delta(event, text_parts)
+    def _handle_content_block_delta(self, event, text_parts: List[str]) -> None:
+        """Handle a content_block_delta event by extracting text."""
+        if not hasattr(event, 'delta'):
+            return
+        if not hasattr(event.delta, 'type'):
+            return
+        if event.delta.type == 'text_delta':
+            text_parts.append(event.delta.text)
+    def _extract_final_message_data(
+        self,
+        final_message,
+        usage_info: Dict[str, int],
+        content_blocks: List[Dict[str, Any]],
+        tool_use_blocks: List[Dict[str, Any]]
+    ) -> None:
+        """
+        Extract usage, stop reason, and content blocks from the final message.
+        Args:
+            final_message: The final message object from the stream
+            usage_info: Dictionary to update with token usage
+            content_blocks: List to append content blocks to
+            tool_use_blocks: List to append tool use blocks to
+        """
+        if hasattr(final_message, 'usage'):
+            usage_info['input_tokens'] = final_message.usage.input_tokens
+            usage_info['output_tokens'] = final_message.usage.output_tokens
+        if not hasattr(final_message, 'content'):
+            return
+        for block in final_message.content:
+            if not hasattr(block, 'type'):
+                continue
+            if block.type == 'text':
+                content_blocks.append({'type': 'text', 'text': block.text})
+            elif block.type == 'tool_use':
+                tool_block = {
+                    'type': 'tool_use',
+                    'id': block.id,
+                    'name': block.name,
+                    'input': block.input
+                }
+                tool_use_blocks.append(tool_block)
+                content_blocks.append(tool_block)
+    def _handle_rate_limit_error(self, error: Exception, retry_attempt: int) -> Optional[Dict[str, Any]]:
+        """
+        Handle a rate limit error with exponential backoff.
+        Args:
+            error: The RateLimitError exception
+            retry_attempt: Current retry attempt index (0-based)
+        Returns:
+            None if retrying (caller should continue), or an error dict if retries exhausted
+        """
+        if retry_attempt < self.rate_limit_max_retries - 1:
+            wait_time = self.rate_limit_base_delay ** retry_attempt
+            logging.warning(
+                f"Rate limit exceeded (attempt {retry_attempt + 1}/{self.rate_limit_max_retries}). "
+                f"Waiting {wait_time:.1f} seconds before retrying..."
+            )
+            logging.debug(f"Rate limit error details: {str(error)}")
+            time.sleep(wait_time)
+            return None
+        # Final retry failed
+        logging.error(
+            f"Rate limit exceeded after {self.rate_limit_max_retries} attempts. "
+            "Please reduce request frequency or contact Anthropic for rate limit increase."
+        )
+        logging.error(f"Rate limit error details: {str(error)}")
+        return {
+            'error': True,
+            'error_code': 'RateLimitExceeded',
+            'error_message': f"Rate limit exceeded after {self.rate_limit_max_retries} retry attempts. {str(error)}",
+            'error_type': 'RateLimitError'
+        }
+    def _build_response(
+        self,
+        text_parts: List[str],
+        content_blocks: List[Dict[str, Any]],
+        tool_use_blocks: List[Dict[str, Any]],
+        stop_reason: Optional[str],
+        usage_info: Dict[str, int]
+    ) -> Dict[str, Any]:
+        """
+        Build the standard response dictionary from accumulated stream data.
+        Args:
+            text_parts: Accumulated text parts from streaming
+            content_blocks: All content blocks (text and tool use)
+            tool_use_blocks: Tool use blocks specifically
+            stop_reason: The stop reason from the API
+            usage_info: Token usage information
+        Returns:
+            Response dictionary in standard format
+        """
+        response = {
+            'stop_reason': stop_reason,
+            'usage': usage_info,
+            'content_blocks': content_blocks,
+            'content': ''.join(text_parts)
+        }
+        if tool_use_blocks:
+            response['tool_use'] = tool_use_blocks
+            response['stop_reason'] = 'tool_use'
+        return response
     def _convert_messages_to_anthropic(
         self,
         messages: List[Dict[str, Any]]

dtSpark/llm/ollama.py CHANGED Viewed

@@ -79,9 +79,11 @@ class OllamaService(LLMService):
             base_url = base_url + '/'
         # Create httpx client with SSL verification disabled and proper base URL
+        # SSL verification is intentionally disabled here - controlled by verify_ssl constructor parameter
+        # which is set from user configuration (for self-signed certificates on local Ollama instances)
         custom_http_client = httpx.Client(
             base_url=base_url,
-            verify=False,
+            verify=False,  # NOSONAR - intentional, gated by verify_ssl config
             timeout=httpx.Timeout(timeout=120.0)
         )

dtSpark/mcp_integration/manager.py CHANGED Viewed

@@ -185,7 +185,7 @@ class MCPClient:
                     httpx_client = httpx.AsyncClient(
                         headers=headers if headers else None,
                         timeout=self.config.timeout,
-                        verify=False
+                        verify=False  # NOSONAR - intentional, gated by ssl_verify config
                     )
                 # Use streamable HTTP client with headers
@@ -226,7 +226,7 @@ class MCPClient:
                     httpx_client = httpx.AsyncClient(
                         headers=headers if headers else None,
                         timeout=self.config.timeout,
-                        verify=False
+                        verify=False  # NOSONAR - intentional, gated by ssl_verify config
                     )
                 # Use SSE client with headers
@@ -266,7 +266,7 @@ class MCPClient:
                 logging.error(f"MCP session initialization cancelled for {self.config.name} "
                              f"(server may have returned an error)")
                 await self._cleanup_failed_connection()
-                return False
+                raise
             self._connected = True
             logging.info(f"Connected to MCP server: {self.config.name} (transport: {self.config.transport})")
@@ -276,7 +276,7 @@ class MCPClient:
             logging.error(f"Connection cancelled for MCP server {self.config.name} "
                          f"(check server URL and authentication)")
             await self._cleanup_failed_connection()
-            return False
+            raise
         except Exception as e:
             error_msg = str(e)
             # Provide more helpful error messages for common issues

dtSpark/mcp_integration/tool_selector.py CHANGED Viewed

@@ -78,7 +78,6 @@ class ToolSelector:
         relevant_categories = self._detect_categories(user_message, conversation_history)
         if not relevant_categories:
-            # If no specific categories detected, include a diverse sample
             logging.info("No specific tool categories detected, selecting diverse sample")
             return self._select_diverse_sample(all_tools, selected_tools)
@@ -89,49 +88,57 @@ class ToolSelector:
         for category in relevant_categories:
             relevant_patterns.update(self.TOOL_CATEGORIES.get(category, []))
-        # Track selected tool names to avoid duplicates
         selected_tool_names = {t.get('name') for t in selected_tools}
-        # Select tools that match the relevant patterns
+        # Select tools matching the relevant patterns, then backfill to limit
+        self._add_matching_tools(all_tools, selected_tools, selected_tool_names, relevant_patterns)
+        self._backfill_tools(all_tools, selected_tools, selected_tool_names)
+        logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available "
+                     f"(categories: {', '.join(relevant_categories)})")
+        self._log_selected_tools(selected_tools)
+        return selected_tools
+    def _add_matching_tools(self, all_tools: List[Dict[str, Any]],
+                            selected: List[Dict[str, Any]],
+                            selected_names: Set[str],
+                            patterns: Set[str]) -> None:
+        """Add tools whose name or description matches any of the given patterns."""
         for tool in all_tools:
-            if len(selected_tools) >= self.max_tools_per_request:
+            if len(selected) >= self.max_tools_per_request:
                 break
             tool_name = tool.get('name', '')
-            if tool_name in selected_tool_names:
+            if tool_name in selected_names:
                 continue
             tool_name_lower = tool_name.lower()
             tool_desc = tool.get('description', '').lower()
+            if any(p in tool_name_lower or p in tool_desc for p in patterns):
+                selected.append(tool)
+                selected_names.add(tool_name)
+    def _backfill_tools(self, all_tools: List[Dict[str, Any]],
+                        selected: List[Dict[str, Any]],
+                        selected_names: Set[str]) -> None:
+        """Fill remaining slots up to max_tools_per_request with unselected tools."""
+        if len(selected) >= self.max_tools_per_request:
+            return
+        remaining = self.max_tools_per_request - len(selected)
+        logging.debug(f"Adding up to {remaining} additional tools to reach limit")
+        for tool in all_tools:
+            if len(selected) >= self.max_tools_per_request:
+                break
+            tool_name = tool.get('name', '')
+            if tool_name not in selected_names:
+                selected.append(tool)
+                selected_names.add(tool_name)
-            # Check if tool name or description matches any relevant pattern
-            if any(pattern in tool_name_lower or pattern in tool_desc for pattern in relevant_patterns):
-                selected_tools.append(tool)
-                selected_tool_names.add(tool_name)
-        # If still below limit and we have room, add some general-purpose tools
-        if len(selected_tools) < self.max_tools_per_request:
-            remaining = self.max_tools_per_request - len(selected_tools)
-            logging.debug(f"Adding up to {remaining} additional tools to reach limit")
-            for tool in all_tools:
-                if len(selected_tools) >= self.max_tools_per_request:
-                    break
-                tool_name = tool.get('name', '')
-                if tool_name not in selected_tool_names:
-                    selected_tools.append(tool)
-                    selected_tool_names.add(tool_name)
-        logging.info(f"Selected {len(selected_tools)} tools from {len(all_tools)} available " +
-                    f"(categories: {', '.join(relevant_categories)})")
-        # Log which tools were selected for debugging
+    @staticmethod
+    def _log_selected_tools(selected_tools: List[Dict[str, Any]]) -> None:
+        """Log the names of selected tools for debugging."""
         tool_names = [t.get('name') for t in selected_tools]
         logging.debug(f"Selected tools: {', '.join(tool_names[:10])}{'...' if len(tool_names) > 10 else ''}")
-        return selected_tools
     def _detect_categories(self, user_message: str,
                           conversation_history: List[Dict[str, Any]] = None) -> Set[str]:
         """
@@ -147,30 +154,30 @@ class ToolSelector:
         categories = set()
         # Analyse user message
-        message_lower = user_message.lower()
-        for category, keywords in self.CATEGORY_KEYWORDS.items():
-            if any(keyword in message_lower for keyword in keywords):
-                categories.add(category)
-                logging.debug(f"Category '{category}' detected from user message")
+        self._match_categories(user_message.lower(), categories, source='user message')
         # Analyse recent conversation history (last 5 messages)
         if conversation_history:
-            recent_messages = conversation_history[-5:]
-            for msg in recent_messages:
-                # Handle both string content and dict content
-                if isinstance(msg, dict):
-                    content = str(msg.get('content', '')).lower()
-                else:
-                    content = str(msg).lower()
-                for category, keywords in self.CATEGORY_KEYWORDS.items():
-                    if any(keyword in content for keyword in keywords):
-                        if category not in categories:
-                            categories.add(category)
-                            logging.debug(f"Category '{category}' detected from conversation history")
+            for msg in conversation_history[-5:]:
+                content = self._extract_message_content(msg)
+                self._match_categories(content, categories, source='conversation history')
         return categories
+    def _match_categories(self, text: str, categories: Set[str], source: str) -> None:
+        """Match keyword categories against text and add new matches to the set."""
+        for category, keywords in self.CATEGORY_KEYWORDS.items():
+            if category not in categories and any(kw in text for kw in keywords):
+                categories.add(category)
+                logging.debug(f"Category '{category}' detected from {source}")
+    @staticmethod
+    def _extract_message_content(msg) -> str:
+        """Extract lowercased text content from a message (dict or string)."""
+        if isinstance(msg, dict):
+            return str(msg.get('content', '')).lower()
+        return str(msg).lower()
     def _select_diverse_sample(self, all_tools: List[Dict[str, Any]],
                                already_selected: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
@@ -197,34 +204,33 @@ class ToolSelector:
         for category, category_patterns in self.TOOL_CATEGORIES.items():
             if len(selected) >= self.max_tools_per_request:
                 break
+            self._add_category_tools(
+                all_tools, selected, selected_tool_names,
+                category_patterns, tools_per_category,
+            )
-            added = 0
-            for tool in all_tools:
-                tool_name = tool.get('name', '')
-                if tool_name in selected_tool_names:
-                    continue
-                tool_name_lower = tool_name.lower()
-                tool_desc = tool.get('description', '').lower()
-                # Check if tool matches this category
-                if any(pattern in tool_name_lower or pattern in tool_desc for pattern in category_patterns):
-                    selected.append(tool)
-                    selected_tool_names.add(tool_name)
-                    added += 1
-                    if added >= tools_per_category or len(selected) >= self.max_tools_per_request:
-                        break
-        # If still below limit, add remaining tools
-        if len(selected) < self.max_tools_per_request:
-            for tool in all_tools:
-                if len(selected) >= self.max_tools_per_request:
-                    break
-                tool_name = tool.get('name', '')
-                if tool_name not in selected_tool_names:
-                    selected.append(tool)
-                    selected_tool_names.add(tool_name)
+        # Backfill any remaining slots
+        self._backfill_tools(all_tools, selected, selected_tool_names)
         logging.info(f"Selected {len(selected)} diverse tools (no specific category detected)")
         return selected
+    def _add_category_tools(self, all_tools: List[Dict[str, Any]],
+                            selected: List[Dict[str, Any]],
+                            selected_names: Set[str],
+                            patterns: List[str],
+                            max_count: int) -> None:
+        """Add up to max_count tools matching the given category patterns."""
+        added = 0
+        for tool in all_tools:
+            if added >= max_count or len(selected) >= self.max_tools_per_request:
+                break
+            tool_name = tool.get('name', '')
+            if tool_name in selected_names:
+                continue
+            tool_name_lower = tool_name.lower()
+            tool_desc = tool.get('description', '').lower()
+            if any(p in tool_name_lower or p in tool_desc for p in patterns):
+                selected.append(tool)
+                selected_names.add(tool_name)
+                added += 1

dtSpark 1.1.0a3__py3-none-any.whl → 1.1.0a7__py3-none-any.whl

dtSpark 1.1.0a3py3-none-any.whl → 1.1.0a7py3-none-any.whl