PyPI - chuk-tool-processor - Versions diffs - 0.6__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend - Supply Chain Defender

chuk-tool-processor 0.6py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chuk-tool-processor might be problematic. Click here for more details.

Files changed (10) hide show

chuk_tool_processor/mcp/mcp_tool.py CHANGED Viewed

@@ -1,15 +1,22 @@
 #!/usr/bin/env python
 # chuk_tool_processor/mcp/mcp_tool.py
 """
-MCP tool shim that delegates execution to a StreamManager,
-handling its own lazy bootstrap when needed.
+MCP tool shim that delegates execution to a StreamManager.
-FIXED: Added subprocess serialization support by implementing __getstate__ and __setstate__
+FIXED: Removed config file management - MCPTool should only handle execution,
+not configuration or bootstrapping. Configuration is handled at registration time.
+CORE PRINCIPLE: MCPTool wraps a StreamManager and delegates calls to it.
+If the StreamManager becomes unavailable, return graceful errors rather than
+trying to recreate it with config files.
 """
 from __future__ import annotations
 import asyncio
+import time
+from enum import Enum
 from typing import Any, Dict, List, Optional
+from dataclasses import dataclass
 from chuk_tool_processor.logging import get_logger
 from chuk_tool_processor.mcp.stream_manager import StreamManager
@@ -17,98 +24,101 @@ from chuk_tool_processor.mcp.stream_manager import StreamManager
 logger = get_logger("chuk_tool_processor.mcp.mcp_tool")
+class ConnectionState(Enum):
+    """Connection states for the MCP tool."""
+    HEALTHY = "healthy"
+    DEGRADED = "degraded"
+    DISCONNECTED = "disconnected"
+    FAILED = "failed"
+@dataclass
+class RecoveryConfig:
+    """Configuration for connection recovery behavior."""
+    max_retries: int = 3
+    initial_backoff: float = 1.0
+    max_backoff: float = 30.0
+    backoff_multiplier: float = 2.0
+    circuit_breaker_threshold: int = 5
+    circuit_breaker_timeout: float = 60.0
+@dataclass
+class ConnectionStats:
+    """Statistics for connection monitoring."""
+    total_calls: int = 0
+    successful_calls: int = 0
+    failed_calls: int = 0
+    connection_errors: int = 0
+    last_success_time: Optional[float] = None
+    last_failure_time: Optional[float] = None
 class MCPTool:
     """
     Wrap a remote MCP tool so it can be called like a local tool.
-    You may pass an existing ``StreamManager`` *positionally* (for legacy
-    code) or via the named parameter.
-    If no ``StreamManager`` is supplied the class will start one on first
-    use via ``setup_mcp_stdio``.
-    FIXED: Added serialization support for subprocess execution.
+    SIMPLIFIED: This class now focuses only on execution delegation.
+    It does NOT handle configuration files or StreamManager bootstrapping.
     """
-    # ------------------------------------------------------------------ #
     def __init__(
         self,
         tool_name: str = "",
         stream_manager: Optional[StreamManager] = None,
         *,
-        cfg_file: str = "",
-        servers: Optional[List[str]] = None,
-        server_names: Optional[Dict[int, str]] = None,
-        namespace: str = "stdio",
-        default_timeout: Optional[float] = None
+        default_timeout: Optional[float] = None,
+        enable_resilience: bool = True,
+        recovery_config: Optional[RecoveryConfig] = None,
     ) -> None:
         if not tool_name:
-            raise ValueError(
-                "MCPTool requires a tool_name. "
-                "This error usually occurs during subprocess serialization. "
-                "Make sure the tool is properly registered with a name."
-            )
+            raise ValueError("MCPTool requires a tool_name")
         self.tool_name = tool_name
         self._sm: Optional[StreamManager] = stream_manager
-        self.default_timeout = default_timeout
-        # Boot-strap parameters (only needed if _sm is None)
-        self._cfg_file = cfg_file
-        self._servers = servers or []
-        self._server_names = server_names or {}
-        self._namespace = namespace
-        # Create lock only when needed (not during deserialization)
-        self._sm_lock: Optional[asyncio.Lock] = None
+        self.default_timeout = default_timeout or 30.0
-    def _ensure_lock(self) -> asyncio.Lock:
-        """Ensure the lock exists, creating it if necessary."""
-        if self._sm_lock is None:
-            self._sm_lock = asyncio.Lock()
-        return self._sm_lock
+        # Resilience features
+        self.enable_resilience = enable_resilience
+        self.recovery_config = recovery_config or RecoveryConfig()
+        # State tracking (only if resilience enabled)
+        if self.enable_resilience:
+            self.connection_state = ConnectionState.HEALTHY if stream_manager else ConnectionState.DISCONNECTED
+            self.stats = ConnectionStats()
+            # Circuit breaker state
+            self._circuit_open = False
+            self._circuit_open_time: Optional[float] = None
+            self._consecutive_failures = 0
     # ------------------------------------------------------------------ #
     # Serialization support for subprocess execution
     # ------------------------------------------------------------------ #
     def __getstate__(self) -> Dict[str, Any]:
         """
-        Custom serialization for pickle support.
+        Serialize for subprocess execution.
-        Excludes non-serializable async components and stream manager.
-        The subprocess will recreate these as needed.
+        SIMPLIFIED: Only preserve essential execution state, not configuration.
+        The StreamManager will be None after deserialization - that's expected.
         """
         state = self.__dict__.copy()
         # Remove non-serializable items
-        state['_sm'] = None  # StreamManager will be recreated in subprocess
-        state['_sm_lock'] = None  # Lock will be recreated when needed
-        # Ensure we have the necessary configuration for subprocess
-        # If no servers specified, default to the tool name (common pattern)
-        if not state.get('_servers'):
-            # Extract server name from tool_name (e.g., "get_current_time" -> "time")
-            # This is a heuristic - adjust based on your naming convention
-            if 'time' in self.tool_name.lower():
-                state['_servers'] = ['time']
-                state['_server_names'] = {0: 'time'}
-            else:
-                # Default fallback - use the tool name itself
-                state['_servers'] = [self.tool_name]
-                state['_server_names'] = {0: self.tool_name}
+        state['_sm'] = None  # StreamManager will be None in subprocess
-        # Ensure we have a config file path
-        if not state.get('_cfg_file'):
-            state['_cfg_file'] = 'server_config.json'
+        # Reset connection state for subprocess
+        if self.enable_resilience:
+            state['connection_state'] = ConnectionState.DISCONNECTED
-        logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess with servers: {state['_servers']}")
+        logger.debug(f"Serializing MCPTool '{self.tool_name}' for subprocess")
         return state
     def __setstate__(self, state: Dict[str, Any]) -> None:
         """
-        Custom deserialization for pickle support.
+        Deserialize after subprocess execution.
-        Restores the object state and ensures required fields are set.
+        SIMPLIFIED: Just restore state. StreamManager will be None and that's fine.
         """
         self.__dict__.update(state)
@@ -116,102 +126,246 @@ class MCPTool:
         if not hasattr(self, 'tool_name') or not self.tool_name:
             raise ValueError("Invalid MCPTool state: missing tool_name")
-        # Initialize transient fields
+        # StreamManager will be None in subprocess - that's expected
         self._sm = None
-        self._sm_lock = None
+        # Initialize resilience state if enabled
+        if self.enable_resilience:
+            if not hasattr(self, 'connection_state'):
+                self.connection_state = ConnectionState.DISCONNECTED
+            if not hasattr(self, 'stats'):
+                self.stats = ConnectionStats()
         logger.debug(f"Deserialized MCPTool '{self.tool_name}' in subprocess")
     # ------------------------------------------------------------------ #
-    async def _ensure_stream_manager(self) -> StreamManager:
-        """
-        Lazily create / attach a StreamManager.
-        Importing ``setup_mcp_stdio`` *inside* this function prevents the
-        circular-import seen earlier.  ★
-        """
-        if self._sm is not None:
-            return self._sm
-        # Use the lock, creating it if needed
-        async with self._ensure_lock():
-            if self._sm is None:  # re-check inside lock
-                logger.info(
-                    "Boot-strapping MCP stdio transport for '%s'", self.tool_name
-                )
-                # ★  local import avoids circular dependency
-                from chuk_tool_processor.mcp.setup_mcp_stdio import setup_mcp_stdio
-                _, self._sm = await setup_mcp_stdio(
-                    config_file=self._cfg_file,
-                    servers=self._servers,
-                    server_names=self._server_names,
-                    namespace=self._namespace,
-                )
-        return self._sm  # type: ignore[return-value]
     async def execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
         """
-        Invoke the remote MCP tool, guaranteeing that *one* timeout is enforced.
-        Parameters
-        ----------
-        timeout : float | None
-            If provided, forward this to StreamManager.  Otherwise fall back
-            to ``self.default_timeout``.
-        **kwargs
-            Arguments forwarded to the tool.
-        Returns
-        -------
-        Any
-            The ``content`` of the remote tool response.
-        Raises
-        ------
-        RuntimeError
-            The remote tool returned an error payload.
-        asyncio.TimeoutError
-            The call exceeded the chosen timeout.
+        Execute the tool, returning graceful errors if StreamManager unavailable.
+        SIMPLIFIED: If no StreamManager, return a structured error response
+        instead of trying to bootstrap one.
         """
-        sm = await self._ensure_stream_manager()
+        # Check if we have a StreamManager
+        if self._sm is None:
+            return {
+                "error": f"Tool '{self.tool_name}' is not available (no stream manager)",
+                "tool_name": self.tool_name,
+                "available": False,
+                "reason": "disconnected"
+            }
-        # Pick the timeout we will enforce (may be None = no limit).
-        effective_timeout: Optional[float] = (
-            timeout if timeout is not None else self.default_timeout
-        )
+        # If resilience is disabled, use simple execution
+        if not self.enable_resilience:
+            return await self._simple_execute(timeout, **kwargs)
+        # Resilient execution
+        return await self._resilient_execute(timeout, **kwargs)
+    async def _simple_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
+        """Simple execution without resilience features."""
+        effective_timeout = timeout if timeout is not None else self.default_timeout
-        call_kwargs: dict[str, Any] = {
+        call_kwargs = {
             "tool_name": self.tool_name,
             "arguments": kwargs,
         }
         if effective_timeout is not None:
             call_kwargs["timeout"] = effective_timeout
-            logger.debug(
-                "Forwarding timeout=%ss to StreamManager for tool '%s'",
-                effective_timeout,
-                self.tool_name,
-            )
         try:
-            result = await sm.call_tool(**call_kwargs)
+            result = await self._sm.call_tool(**call_kwargs)
         except asyncio.TimeoutError:
-            logger.warning(
-                "MCP tool '%s' timed out after %ss",
-                self.tool_name,
-                effective_timeout,
-            )
+            logger.warning(f"MCP tool '{self.tool_name}' timed out after {effective_timeout}s")
             raise
         if result.get("isError"):
             err = result.get("error", "Unknown error")
-            logger.error("Remote MCP error from '%s': %s", self.tool_name, err)
+            logger.error(f"Remote MCP error from '{self.tool_name}': {err}")
             raise RuntimeError(err)
         return result.get("content")
+    async def _resilient_execute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
+        """Resilient execution with circuit breaker and health checks."""
+        # Check circuit breaker
+        if self._is_circuit_open():
+            return {
+                "error": f"Circuit breaker open for tool '{self.tool_name}' - too many recent failures",
+                "tool_name": self.tool_name,
+                "available": False,
+                "reason": "circuit_breaker"
+            }
+        effective_timeout = timeout if timeout is not None else self.default_timeout
+        self.stats.total_calls += 1
+        # Check if StreamManager is healthy
+        if not await self._is_stream_manager_healthy():
+            await self._record_failure(is_connection_error=True)
+            return {
+                "error": f"Tool '{self.tool_name}' is not available (unhealthy connection)",
+                "tool_name": self.tool_name,
+                "available": False,
+                "reason": "unhealthy"
+            }
+        # Try execution with retries
+        max_attempts = self.recovery_config.max_retries + 1
+        backoff = self.recovery_config.initial_backoff
+        for attempt in range(max_attempts):
+            try:
+                result = await self._execute_with_timeout(effective_timeout, **kwargs)
+                await self._record_success()
+                return result
+            except asyncio.TimeoutError:
+                error_msg = f"Tool '{self.tool_name}' timed out after {effective_timeout}s"
+                logger.warning(error_msg)
+                await self._record_failure()
+                if attempt == max_attempts - 1:
+                    return {
+                        "error": error_msg,
+                        "tool_name": self.tool_name,
+                        "available": False,
+                        "reason": "timeout"
+                    }
+            except Exception as e:
+                error_str = str(e)
+                is_connection_error = self._is_connection_error(e)
+                logger.warning(f"Tool '{self.tool_name}' attempt {attempt + 1} failed: {error_str}")
+                await self._record_failure(is_connection_error)
+                if attempt == max_attempts - 1:
+                    return {
+                        "error": f"Tool execution failed after {max_attempts} attempts: {error_str}",
+                        "tool_name": self.tool_name,
+                        "available": False,
+                        "reason": "execution_failed"
+                    }
+                # Exponential backoff
+                if attempt < max_attempts - 1:
+                    logger.debug(f"Waiting {backoff:.1f}s before retry {attempt + 2}")
+                    await asyncio.sleep(backoff)
+                    backoff = min(backoff * self.recovery_config.backoff_multiplier, self.recovery_config.max_backoff)
+        # Should never reach here
+        return {
+            "error": f"Tool '{self.tool_name}' failed after all attempts",
+            "tool_name": self.tool_name,
+            "available": False,
+            "reason": "exhausted_retries"
+        }
+    async def _execute_with_timeout(self, timeout: float, **kwargs: Any) -> Any:
+        """Execute the tool with timeout."""
+        call_kwargs = {
+            "tool_name": self.tool_name,
+            "arguments": kwargs,
+        }
+        if timeout is not None:
+            call_kwargs["timeout"] = timeout
+        try:
+            result = await asyncio.wait_for(
+                self._sm.call_tool(**call_kwargs),
+                timeout=(timeout + 5.0) if timeout else None
+            )
+            if result.get("isError"):
+                error = result.get("error", "Unknown error")
+                raise RuntimeError(f"Tool execution failed: {error}")
+            return result.get("content")
+        except asyncio.TimeoutError:
+            self.connection_state = ConnectionState.DEGRADED
+            raise
+        except Exception as e:
+            if self._is_connection_error(e):
+                self.connection_state = ConnectionState.DISCONNECTED
+            else:
+                self.connection_state = ConnectionState.DEGRADED
+            raise
+    async def _is_stream_manager_healthy(self) -> bool:
+        """Check if the StreamManager is healthy."""
+        if self._sm is None:
+            return False
+        try:
+            ping_results = await asyncio.wait_for(self._sm.ping_servers(), timeout=3.0)
+            healthy_count = sum(1 for result in ping_results if result.get("ok", False))
+            return healthy_count > 0
+        except Exception as e:
+            logger.debug(f"Health check failed for '{self.tool_name}': {e}")
+            return False
+    def _is_connection_error(self, exception: Exception) -> bool:
+        """Determine if an exception indicates a connection problem."""
+        error_str = str(exception).lower()
+        connection_indicators = [
+            "connection lost", "connection closed", "connection refused",
+            "broken pipe", "timeout", "eof", "pipe closed", "process died",
+            "no route to host", "no server found"
+        ]
+        return any(indicator in error_str for indicator in connection_indicators)
+    async def _record_success(self) -> None:
+        """Record a successful execution."""
+        self.stats.successful_calls += 1
+        self.stats.last_success_time = time.time()
+        self._consecutive_failures = 0
+        # Close circuit breaker if it was open
+        if self._circuit_open:
+            self._circuit_open = False
+            self._circuit_open_time = None
+            self.connection_state = ConnectionState.HEALTHY
+            logger.info(f"Circuit breaker closed for tool '{self.tool_name}' after successful execution")
+    async def _record_failure(self, is_connection_error: bool = False) -> None:
+        """Record a failed execution."""
+        self.stats.failed_calls += 1
+        self.stats.last_failure_time = time.time()
+        if is_connection_error:
+            self.stats.connection_errors += 1
+            self.connection_state = ConnectionState.DISCONNECTED
+        else:
+            self.connection_state = ConnectionState.DEGRADED
+        self._consecutive_failures += 1
+        # Check if we should open the circuit breaker
+        if (self._consecutive_failures >= self.recovery_config.circuit_breaker_threshold and
+            not self._circuit_open):
+            self._circuit_open = True
+            self._circuit_open_time = time.time()
+            self.connection_state = ConnectionState.FAILED
+            logger.error(f"Circuit breaker opened for tool '{self.tool_name}' after {self._consecutive_failures} consecutive failures")
+    def _is_circuit_open(self) -> bool:
+        """Check if the circuit breaker is currently open."""
+        if not self._circuit_open:
+            return False
+        # Check if enough time has passed to close the circuit
+        if (self._circuit_open_time and
+            time.time() - self._circuit_open_time >= self.recovery_config.circuit_breaker_timeout):
+            self._circuit_open = False
+            self._circuit_open_time = None
+            self.connection_state = ConnectionState.HEALTHY
+            logger.info(f"Circuit breaker reset for tool '{self.tool_name}' after timeout")
+            return False
+        return True
     # ------------------------------------------------------------------ #
     # Legacy method name support
     async def _aexecute(self, timeout: Optional[float] = None, **kwargs: Any) -> Any:
@@ -219,25 +373,73 @@ class MCPTool:
         return await self.execute(timeout=timeout, **kwargs)
     # ------------------------------------------------------------------ #
-    # Utility methods for debugging
+    # Utility and monitoring methods
     # ------------------------------------------------------------------ #
-    def is_serializable(self) -> bool:
-        """Check if this tool can be serialized (for debugging)."""
-        try:
-            import pickle
-            pickle.dumps(self)
-            return True
-        except Exception:
-            return False
+    def is_available(self) -> bool:
+        """Check if this tool is currently available."""
+        return (self._sm is not None and
+                not self._is_circuit_open() and
+                self.connection_state in [ConnectionState.HEALTHY, ConnectionState.DEGRADED])
-    def get_serialization_info(self) -> Dict[str, Any]:
-        """Get information about what would be serialized."""
-        state = self.__getstate__()
+    def get_stats(self) -> Dict[str, Any]:
+        """Get connection and execution statistics."""
+        if not self.enable_resilience:
+            return {
+                "tool_name": self.tool_name,
+                "resilience_enabled": False,
+                "available": self._sm is not None
+            }
+        success_rate = 0.0
+        if self.stats.total_calls > 0:
+            success_rate = (self.stats.successful_calls / self.stats.total_calls) * 100
         return {
-            "tool_name": state.get("tool_name"),
-            "namespace": state.get("_namespace"),
-            "servers": state.get("_servers"),
-            "cfg_file": state.get("_cfg_file"),
-            "has_stream_manager": state.get("_sm") is not None,
-            "serializable_size": len(str(state))
-        }
+            "tool_name": self.tool_name,
+            "resilience_enabled": True,
+            "available": self.is_available(),
+            "state": self.connection_state.value,
+            "circuit_open": self._circuit_open,
+            "total_calls": self.stats.total_calls,
+            "successful_calls": self.stats.successful_calls,
+            "failed_calls": self.stats.failed_calls,
+            "connection_errors": self.stats.connection_errors,
+            "success_rate": success_rate,
+            "consecutive_failures": self._consecutive_failures,
+            "has_stream_manager": self._sm is not None,
+        }
+    def reset_circuit_breaker(self) -> None:
+        """Manually reset the circuit breaker."""
+        if not self.enable_resilience:
+            return
+        self._circuit_open = False
+        self._circuit_open_time = None
+        self._consecutive_failures = 0
+        self.connection_state = ConnectionState.HEALTHY
+        logger.info(f"Circuit breaker manually reset for tool '{self.tool_name}'")
+    def disable_resilience(self) -> None:
+        """Disable resilience features for this tool instance."""
+        self.enable_resilience = False
+        logger.info(f"Resilience features disabled for tool '{self.tool_name}'")
+    def set_stream_manager(self, stream_manager: Optional[StreamManager]) -> None:
+        """
+        Set or update the StreamManager for this tool.
+        This can be used by external systems to reconnect tools after
+        StreamManager recovery at a higher level.
+        """
+        self._sm = stream_manager
+        if stream_manager is not None:
+            self.connection_state = ConnectionState.HEALTHY
+            if self._circuit_open:
+                self._circuit_open = False
+                self._circuit_open_time = None
+                logger.info(f"Circuit breaker closed for tool '{self.tool_name}' due to new stream manager")
+        else:
+            self.connection_state = ConnectionState.DISCONNECTED
+        logger.debug(f"StreamManager {'set' if stream_manager else 'cleared'} for tool '{self.tool_name}'")