PyPI - kailash - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

kailash 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

kailash/__init__.py +1 -1
kailash/client/__init__.py +12 -0
kailash/client/enhanced_client.py +306 -0
kailash/core/actors/__init__.py +16 -0
kailash/core/actors/connection_actor.py +566 -0
kailash/core/actors/supervisor.py +364 -0
kailash/edge/__init__.py +16 -0
kailash/edge/compliance.py +834 -0
kailash/edge/discovery.py +659 -0
kailash/edge/location.py +582 -0
kailash/gateway/__init__.py +33 -0
kailash/gateway/api.py +289 -0
kailash/gateway/enhanced_gateway.py +357 -0
kailash/gateway/resource_resolver.py +217 -0
kailash/gateway/security.py +227 -0
kailash/middleware/auth/models.py +2 -2
kailash/middleware/database/base_models.py +1 -7
kailash/middleware/gateway/__init__.py +22 -0
kailash/middleware/gateway/checkpoint_manager.py +398 -0
kailash/middleware/gateway/deduplicator.py +382 -0
kailash/middleware/gateway/durable_gateway.py +417 -0
kailash/middleware/gateway/durable_request.py +498 -0
kailash/middleware/gateway/event_store.py +459 -0
kailash/nodes/admin/permission_check.py +817 -33
kailash/nodes/admin/role_management.py +1242 -108
kailash/nodes/admin/schema_manager.py +438 -0
kailash/nodes/admin/user_management.py +1124 -1582
kailash/nodes/code/__init__.py +8 -1
kailash/nodes/code/async_python.py +1035 -0
kailash/nodes/code/python.py +1 -0
kailash/nodes/data/async_sql.py +9 -3
kailash/nodes/data/sql.py +20 -11
kailash/nodes/data/workflow_connection_pool.py +643 -0
kailash/nodes/rag/__init__.py +1 -4
kailash/resources/__init__.py +40 -0
kailash/resources/factory.py +533 -0
kailash/resources/health.py +319 -0
kailash/resources/reference.py +288 -0
kailash/resources/registry.py +392 -0
kailash/runtime/async_local.py +711 -302
kailash/testing/__init__.py +34 -0
kailash/testing/async_test_case.py +353 -0
kailash/testing/async_utils.py +345 -0
kailash/testing/fixtures.py +458 -0
kailash/testing/mock_registry.py +495 -0
kailash/workflow/__init__.py +8 -0
kailash/workflow/async_builder.py +621 -0
kailash/workflow/async_patterns.py +766 -0
kailash/workflow/cyclic_runner.py +107 -16
kailash/workflow/graph.py +7 -2
kailash/workflow/resilience.py +11 -1
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/METADATA +7 -4
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/RECORD +57 -22
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/WHEEL +0 -0
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/entry_points.txt +0 -0
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/licenses/LICENSE +0 -0
{kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/top_level.txt +0 -0

kailash/runtime/async_local.py CHANGED Viewed

@@ -1,388 +1,797 @@
-"""Asynchronous local runtime engine for executing workflows.
-DEPRECATED: This module is deprecated. The LocalRuntime in local.py now provides
-unified async/sync execution capabilities. For backward compatibility, this module
-exports LocalRuntime as AsyncLocalRuntime.
-This module provides an asynchronous execution engine for Kailash workflows,
-particularly useful for workflows with I/O-bound nodes such as API calls,
-database queries, or LLM interactions.
+"""
+Unified Async Runtime for Kailash Workflows.
+This module provides the AsyncLocalRuntime, a specialized async-first runtime that
+extends LocalRuntime with advanced concurrent execution, workflow optimization,
+and integrated resource management.
+Key Features:
+- Native async/await execution with concurrent node processing
+- Workflow analysis and optimization for parallel execution
+- Integrated ResourceRegistry support
+- Advanced execution context and tracking
+- Performance profiling and metrics
+- Circuit breaker patterns for resilient execution
 """
+import asyncio
 import logging
+import time
+import weakref
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 import networkx as nx
-from kailash.sdk_exceptions import (
-    RuntimeExecutionError,
-    WorkflowExecutionError,
-    WorkflowValidationError,
-)
+from kailash.nodes.base import Node
+from kailash.nodes.base_async import AsyncNode
+from kailash.resources import ResourceRegistry
+from kailash.runtime.local import LocalRuntime
+from kailash.sdk_exceptions import RuntimeExecutionError, WorkflowExecutionError
 from kailash.tracking import TaskManager, TaskStatus
-from kailash.workflow.graph import Workflow
 logger = logging.getLogger(__name__)
-class AsyncLocalRuntime:
-    """Asynchronous local execution engine for workflows.
+@dataclass
+class ExecutionLevel:
+    """Represents a level of nodes that can execute concurrently."""
+    level: int
+    nodes: Set[str] = field(default_factory=set)
+    dependencies_satisfied: Set[str] = field(default_factory=set)
+@dataclass
+class ExecutionPlan:
+    """Execution plan for optimized workflow execution."""
+    workflow_id: str
+    async_nodes: Set[str] = field(default_factory=set)
+    sync_nodes: Set[str] = field(default_factory=set)
+    execution_levels: List[ExecutionLevel] = field(default_factory=list)
+    required_resources: Set[str] = field(default_factory=set)
+    estimated_duration: float = 0.0
+    max_concurrent_nodes: int = 1
+    @property
+    def is_fully_async(self) -> bool:
+        """Check if workflow contains only async nodes."""
+        return len(self.sync_nodes) == 0 and len(self.async_nodes) > 0
+    @property
+    def has_async_nodes(self) -> bool:
+        """Check if workflow contains any async nodes."""
+        return len(self.async_nodes) > 0
+    @property
+    def can_parallelize(self) -> bool:
+        """Check if workflow can benefit from parallelization."""
+        return self.max_concurrent_nodes > 1
+@dataclass
+class ExecutionMetrics:
+    """Metrics collected during execution."""
+    total_duration: float = 0.0
+    node_durations: Dict[str, float] = field(default_factory=dict)
+    concurrent_executions: int = 0
+    resource_access_count: Dict[str, int] = field(default_factory=dict)
+    error_count: int = 0
+    retry_count: int = 0
+class ExecutionContext:
+    """Context passed through workflow execution with resource access."""
+    def __init__(self, resource_registry: Optional[ResourceRegistry] = None):
+        self.resource_registry = resource_registry
+        self.variables: Dict[str, Any] = {}
+        self.metrics = ExecutionMetrics()
+        self.start_time = time.time()
+        self._weak_refs: Dict[str, weakref.ref] = {}
+    def set_variable(self, key: str, value: Any) -> None:
+        """Set a context variable accessible to all nodes."""
+        self.variables[key] = value
+    def get_variable(self, key: str, default=None) -> Any:
+        """Get a context variable."""
+        return self.variables.get(key, default)
+    async def get_resource(self, name: str) -> Any:
+        """Get resource from registry."""
+        if not self.resource_registry:
+            raise RuntimeError("No resource registry available in execution context")
-    This runtime provides asynchronous execution capabilities for workflows,
-    allowing for more efficient processing of I/O-bound operations and potential
-    parallel execution of independent nodes.
+        # Track resource access
+        self.metrics.resource_access_count[name] = (
+            self.metrics.resource_access_count.get(name, 0) + 1
+        )
+        return await self.resource_registry.get_resource(name)
+class WorkflowAnalyzer:
+    """Analyzes workflows for optimization opportunities."""
+    def __init__(self, enable_profiling: bool = True):
+        self.enable_profiling = enable_profiling
+        self._analysis_cache: Dict[str, ExecutionPlan] = {}
+    def analyze(self, workflow) -> ExecutionPlan:
+        """Analyze workflow and create execution plan."""
+        workflow_id = (
+            workflow.workflow_id
+            if hasattr(workflow, "workflow_id")
+            else str(id(workflow))
+        )
+        # Check cache first
+        if workflow_id in self._analysis_cache:
+            return self._analysis_cache[workflow_id]
+        plan = ExecutionPlan(workflow_id=workflow_id)
+        # Identify node types
+        for node_id, node_instance in workflow._node_instances.items():
+            if isinstance(node_instance, AsyncNode):
+                plan.async_nodes.add(node_id)
+            else:
+                plan.sync_nodes.add(node_id)
-    Key features:
-    - Support for AsyncNode.async_run() execution
-    - Parallel execution of independent nodes (in development)
-    - Task tracking and monitoring
-    - Detailed execution metrics
+        # Identify resource requirements
+        plan.required_resources = self._identify_resources(workflow)
+        # Compute execution levels for parallelization
+        plan.execution_levels = self._compute_execution_levels(workflow)
+        # Calculate max concurrent nodes
+        plan.max_concurrent_nodes = (
+            max(len(level.nodes) for level in plan.execution_levels)
+            if plan.execution_levels
+            else 1
+        )
+        # Estimate execution duration (simplified)
+        plan.estimated_duration = self._estimate_duration(workflow, plan)
+        # Cache the plan
+        self._analysis_cache[workflow_id] = plan
+        logger.debug(
+            f"Workflow analysis complete: {len(plan.async_nodes)} async nodes, "
+            f"{len(plan.sync_nodes)} sync nodes, "
+            f"{len(plan.execution_levels)} execution levels"
+        )
+        return plan
+    def _compute_execution_levels(self, workflow) -> List[ExecutionLevel]:
+        """Compute execution levels for parallel execution."""
+        levels = []
+        remaining_nodes = set(workflow._node_instances.keys())
+        completed_nodes = set()
+        level_num = 0
+        while remaining_nodes:
+            current_level = ExecutionLevel(level=level_num)
+            # Find nodes that can execute at this level
+            for node_id in list(remaining_nodes):
+                # Check if all dependencies are satisfied
+                dependencies = set(workflow.graph.predecessors(node_id))
+                if dependencies.issubset(completed_nodes):
+                    current_level.nodes.add(node_id)
+                    current_level.dependencies_satisfied.update(dependencies)
+            if not current_level.nodes:
+                # No nodes can execute - likely a dependency cycle
+                logger.warning(
+                    f"No executable nodes at level {level_num}, remaining: {remaining_nodes}"
+                )
+                break
+            levels.append(current_level)
+            completed_nodes.update(current_level.nodes)
+            remaining_nodes -= current_level.nodes
+            level_num += 1
+        return levels
+    def _identify_resources(self, workflow) -> Set[str]:
+        """Identify required resources from workflow metadata."""
+        resources = set()
+        # Check workflow-level metadata
+        if hasattr(workflow, "metadata") and workflow.metadata:
+            workflow_resources = workflow.metadata.get("required_resources", [])
+            resources.update(workflow_resources)
+        # Check node-level metadata
+        for node_id, node_instance in workflow._node_instances.items():
+            if hasattr(node_instance, "config") and isinstance(
+                node_instance.config, dict
+            ):
+                node_resources = node_instance.config.get("required_resources", [])
+                resources.update(node_resources)
+        return resources
+    def _estimate_duration(self, workflow, plan: ExecutionPlan) -> float:
+        """Estimate workflow execution duration."""
+        # Simplified estimation based on node count and type
+        base_duration_per_node = 0.1  # 100ms per node
+        async_multiplier = 0.5  # Async nodes are typically faster
+        sync_multiplier = 1.0
+        async_duration = (
+            len(plan.async_nodes) * base_duration_per_node * async_multiplier
+        )
+        sync_duration = len(plan.sync_nodes) * base_duration_per_node * sync_multiplier
+        # Account for parallelization
+        if plan.execution_levels:
+            # Use the longest level as bottleneck
+            max_level_size = max(len(level.nodes) for level in plan.execution_levels)
+            parallelization_factor = (
+                max_level_size / len(plan.execution_levels)
+                if plan.execution_levels
+                else 1
+            )
+        else:
+            parallelization_factor = 1
+        return (async_duration + sync_duration) * parallelization_factor
+class AsyncExecutionTracker:
+    """Tracks async execution state and results."""
+    def __init__(self, workflow, context: ExecutionContext):
+        self.workflow = workflow
+        self.context = context
+        self.results: Dict[str, Any] = {}
+        self.node_outputs: Dict[str, Any] = {}
+        self.errors: Dict[str, Exception] = {}
+        self.execution_times: Dict[str, float] = {}
+        self._locks: Dict[str, asyncio.Lock] = {}
+    def get_lock(self, node_id: str) -> asyncio.Lock:
+        """Get or create a lock for a node."""
+        if node_id not in self._locks:
+            self._locks[node_id] = asyncio.Lock()
+        return self._locks[node_id]
+    async def record_result(
+        self, node_id: str, result: Any, execution_time: float
+    ) -> None:
+        """Record execution result for a node."""
+        async with self.get_lock(node_id):
+            self.results[node_id] = result
+            self.node_outputs[node_id] = result
+            self.execution_times[node_id] = execution_time
+            self.context.metrics.node_durations[node_id] = execution_time
+    async def record_error(self, node_id: str, error: Exception) -> None:
+        """Record execution error for a node."""
+        async with self.get_lock(node_id):
+            self.errors[node_id] = error
+            self.context.metrics.error_count += 1
+    def get_result(self) -> Dict[str, Any]:
+        """Get final execution results."""
+        return {
+            "results": self.results.copy(),
+            "errors": {node_id: str(error) for node_id, error in self.errors.items()},
+            "execution_times": self.execution_times.copy(),
+            "total_duration": time.time() - self.context.start_time,
+            "metrics": self.context.metrics,
+        }
+class AsyncLocalRuntime(LocalRuntime):
+    """
+    Async-optimized runtime for Kailash workflows.
+    Extends LocalRuntime with advanced async execution capabilities:
+    - Concurrent node execution where possible
+    - Integrated ResourceRegistry support
+    - Workflow analysis and optimization
+    - Advanced performance tracking
+    - Circuit breaker patterns
+    Example:
+        ```python
+        from kailash.resources import ResourceRegistry, DatabasePoolFactory
+        from kailash.runtime.async_local import AsyncLocalRuntime
+        # Setup resources
+        registry = ResourceRegistry()
+        registry.register_factory("db", DatabasePoolFactory(...))
+        # Create async runtime
+        runtime = AsyncLocalRuntime(
+            resource_registry=registry,
+            max_concurrent_nodes=10,
+            enable_analysis=True
+        )
-    Usage:
-        runtime = AsyncLocalRuntime()
-        results = await runtime.execute(workflow, parameters={...})
+        # Execute workflow
+        result = await runtime.execute_workflow_async(workflow, inputs)
+        ```
     """
-    def __init__(self, debug: bool = False, max_concurrency: int = 10):
-        """Initialize the async local runtime.
+    def __init__(
+        self,
+        resource_registry: Optional[ResourceRegistry] = None,
+        max_concurrent_nodes: int = 10,
+        enable_analysis: bool = True,
+        enable_profiling: bool = True,
+        thread_pool_size: int = 4,
+        **kwargs,
+    ):
+        """
+        Initialize AsyncLocalRuntime.
         Args:
-            debug: Whether to enable debug logging
-            max_concurrency: Maximum number of nodes to execute concurrently
+            resource_registry: Optional ResourceRegistry for resource management
+            max_concurrent_nodes: Maximum number of nodes to execute concurrently
+            enable_analysis: Whether to analyze workflows for optimization
+            enable_profiling: Whether to collect detailed performance metrics
+            thread_pool_size: Size of thread pool for sync node execution
+            **kwargs: Additional arguments passed to LocalRuntime
         """
-        self.debug = debug
-        self.max_concurrency = max_concurrency
-        self.logger = logger
+        # Ensure async is enabled
+        kwargs["enable_async"] = True
+        super().__init__(**kwargs)
+        self.resource_registry = resource_registry
+        self.max_concurrent_nodes = max_concurrent_nodes
+        self.enable_analysis = enable_analysis
+        self.enable_profiling = enable_profiling
+        # Workflow analyzer
+        self.analyzer = (
+            WorkflowAnalyzer(enable_profiling=enable_profiling)
+            if enable_analysis
+            else None
+        )
-        if debug:
-            self.logger.setLevel(logging.DEBUG)
-        else:
-            self.logger.setLevel(logging.INFO)
+        # Thread pool for sync node execution
+        self.thread_pool = ThreadPoolExecutor(max_workers=thread_pool_size)
+        # Execution semaphore for concurrency control
+        self.execution_semaphore = asyncio.Semaphore(max_concurrent_nodes)
-    async def execute(
+        logger.info(
+            f"AsyncLocalRuntime initialized with max_concurrent_nodes={max_concurrent_nodes}"
+        )
+    async def execute_workflow_async(
         self,
-        workflow: Workflow,
-        task_manager: TaskManager | None = None,
-        parameters: dict[str, dict[str, Any]] | None = None,
-    ) -> tuple[dict[str, Any], str | None]:
-        """Execute a workflow asynchronously.
+        workflow,
+        inputs: Dict[str, Any],
+        context: Optional[ExecutionContext] = None,
+    ) -> Dict[str, Any]:
+        """
+        Execute workflow with native async support.
+        This method provides first-class async execution with:
+        - Concurrent node execution where dependencies allow
+        - Integrated resource management
+        - Performance optimization based on workflow analysis
+        - Advanced error handling and recovery
         Args:
             workflow: Workflow to execute
-            task_manager: Optional task manager for tracking
-            parameters: Optional parameter overrides per node
+            inputs: Input data for the workflow
+            context: Optional execution context
         Returns:
-            Tuple of (results dict, run_id)
+            Dictionary containing execution results and metrics
         Raises:
-            RuntimeExecutionError: If execution fails
-            WorkflowValidationError: If workflow is invalid
+            WorkflowExecutionError: If execution fails
         """
-        if not workflow:
-            raise RuntimeExecutionError("No workflow provided")
+        start_time = time.time()
-        run_id = None
+        # Create execution context
+        if context is None:
+            context = ExecutionContext(resource_registry=self.resource_registry)
+        # Add inputs to context
+        context.variables.update(inputs)
         try:
-            # Validate workflow with runtime parameters (Session 061)
-            workflow.validate(runtime_parameters=parameters)
-            # Initialize tracking
-            if task_manager:
-                try:
-                    run_id = task_manager.create_run(
-                        workflow_name=workflow.name,
-                        metadata={
-                            "parameters": parameters,
-                            "debug": self.debug,
-                            "runtime": "async_local",
-                        },
-                    )
-                except Exception as e:
-                    self.logger.warning(f"Failed to create task run: {e}")
-                    # Continue without tracking
-            # Execute workflow
-            results = await self._execute_workflow(
-                workflow=workflow,
-                task_manager=task_manager,
-                run_id=run_id,
-                parameters=parameters or {},
-            )
+            # Analyze workflow if enabled
+            execution_plan = None
+            if self.analyzer:
+                execution_plan = self.analyzer.analyze(workflow)
+                logger.info(
+                    f"Execution plan: {execution_plan.max_concurrent_nodes} max concurrent, "
+                    f"{len(execution_plan.execution_levels)} levels"
+                )
+            # Choose execution strategy based on analysis
+            if execution_plan and execution_plan.is_fully_async:
+                result = await self._execute_fully_async_workflow(
+                    workflow, context, execution_plan
+                )
+            elif execution_plan and execution_plan.has_async_nodes:
+                result = await self._execute_mixed_workflow(
+                    workflow, context, execution_plan
+                )
+            else:
+                result = await self._execute_sync_workflow(workflow, context)
+            # Update total execution time
+            total_time = time.time() - start_time
+            context.metrics.total_duration = total_time
+            logger.info(f"Workflow execution completed in {total_time:.2f}s")
+            return result
-            # Mark run as completed
-            if task_manager and run_id:
-                try:
-                    task_manager.update_run_status(run_id, "completed")
-                except Exception as e:
-                    self.logger.warning(f"Failed to update run status: {e}")
-            return results, run_id
-        except WorkflowValidationError:
-            # Re-raise validation errors as-is
-            if task_manager and run_id:
-                try:
-                    task_manager.update_run_status(
-                        run_id, "failed", error="Validation failed"
-                    )
-                except Exception:
-                    pass
-            raise
         except Exception as e:
-            # Mark run as failed
-            if task_manager and run_id:
-                try:
-                    task_manager.update_run_status(run_id, "failed", error=str(e))
-                except Exception:
-                    pass
-            # Wrap other errors in RuntimeExecutionError
-            raise RuntimeExecutionError(
-                f"Async workflow execution failed: {type(e).__name__}: {e}"
-            ) from e
+            logger.error(f"Workflow execution failed: {e}")
+            context.metrics.error_count += 1
+            raise WorkflowExecutionError(f"Async execution failed: {e}") from e
-    async def _execute_workflow(
-        self,
-        workflow: Workflow,
-        task_manager: TaskManager | None,
-        run_id: str | None,
-        parameters: dict[str, dict[str, Any]],
-    ) -> dict[str, Any]:
-        """Execute the workflow nodes asynchronously.
+    async def _execute_fully_async_workflow(
+        self, workflow, context: ExecutionContext, execution_plan: ExecutionPlan
+    ) -> Dict[str, Any]:
+        """Execute fully async workflow with maximum concurrency."""
+        logger.debug("Executing fully async workflow with concurrent levels")
-        Args:
-            workflow: Workflow to execute
-            task_manager: Task manager for tracking
-            run_id: Run ID for tracking
-            parameters: Parameter overrides
+        tracker = AsyncExecutionTracker(workflow, context)
-        Returns:
-            Dictionary of node results
+        # Execute by levels to respect dependencies
+        for level in execution_plan.execution_levels:
+            if not level.nodes:
+                continue
+            logger.debug(f"Executing level {level.level} with {len(level.nodes)} nodes")
+            # Create tasks for all nodes in this level
+            tasks = []
+            for node_id in level.nodes:
+                task = self._execute_node_async(workflow, node_id, tracker, context)
+                tasks.append(task)
+            # Execute all tasks in this level concurrently
+            try:
+                await asyncio.gather(*tasks, return_exceptions=False)
+            except Exception as e:
+                logger.error(f"Level {level.level} execution failed: {e}")
+                raise
+        return tracker.get_result()
+    async def _execute_mixed_workflow(
+        self, workflow, context: ExecutionContext, execution_plan: ExecutionPlan
+    ) -> Dict[str, Any]:
+        """Execute workflow with mixed sync/async nodes."""
+        logger.debug("Executing mixed workflow with sync/async optimization")
+        tracker = AsyncExecutionTracker(workflow, context)
+        # Execute by levels, handling sync/async appropriately
+        for level in execution_plan.execution_levels:
+            if not level.nodes:
+                continue
+            # Separate sync and async nodes in this level
+            async_nodes = [n for n in level.nodes if n in execution_plan.async_nodes]
+            sync_nodes = [n for n in level.nodes if n in execution_plan.sync_nodes]
+            tasks = []
+            # Add async node tasks
+            for node_id in async_nodes:
+                task = self._execute_node_async(workflow, node_id, tracker, context)
+                tasks.append(task)
+            # Add sync node tasks (wrapped in thread pool)
+            for node_id in sync_nodes:
+                task = self._execute_sync_node_async(
+                    workflow, node_id, tracker, context
+                )
+                tasks.append(task)
+            # Execute all tasks in this level concurrently
+            if tasks:
+                await asyncio.gather(*tasks, return_exceptions=False)
+        return tracker.get_result()
+    async def _execute_sync_workflow(
+        self, workflow, context: ExecutionContext
+    ) -> Dict[str, Any]:
+        """Execute sync-only workflow in thread pool."""
+        logger.debug("Executing sync-only workflow")
+        # Use parent's sync execution but wrap in async
+        loop = asyncio.get_event_loop()
+        def sync_execute():
+            # Convert context back to inputs for sync execution
+            return self._execute_sync_workflow_internal(workflow, context.variables)
+        result = await loop.run_in_executor(self.thread_pool, sync_execute)
+        # Wrap result in expected format
+        return {
+            "results": result,
+            "errors": {},
+            "execution_times": {},
+            "total_duration": time.time() - context.start_time,
+            "metrics": context.metrics,
+        }
+    def _execute_sync_workflow_internal(
+        self, workflow, inputs: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Internal sync workflow execution."""
+        # Use parent's synchronous execution logic
+        # This is a simplified version - in practice, you'd call the parent's method
+        results = {}
-        Raises:
-            WorkflowExecutionError: If execution fails
-        """
-        # Get execution order
         try:
             execution_order = list(nx.topological_sort(workflow.graph))
-            self.logger.info(f"Determined execution order: {execution_order}")
         except nx.NetworkXError as e:
             raise WorkflowExecutionError(
                 f"Failed to determine execution order: {e}"
             ) from e
-        # Initialize results storage
-        results = {}
         node_outputs = {}
-        failed_nodes = []
-        # Execute each node
         for node_id in execution_order:
-            self.logger.info(f"Executing node: {node_id}")
-            # Get node instance
             node_instance = workflow._node_instances.get(node_id)
             if not node_instance:
-                raise WorkflowExecutionError(
-                    f"Node instance '{node_id}' not found in workflow"
-                )
+                raise WorkflowExecutionError(f"Node instance '{node_id}' not found")
-            # Start task tracking
-            task = None
-            if task_manager and run_id:
-                try:
-                    task = task_manager.create_task(
-                        run_id=run_id,
-                        node_id=node_id,
-                        node_type=node_instance.__class__.__name__,
-                        started_at=datetime.now(UTC),
-                    )
-                except Exception as e:
-                    self.logger.warning(
-                        f"Failed to create task for node '{node_id}': {e}"
-                    )
+            # Prepare inputs (simplified)
+            node_inputs = self._prepare_sync_node_inputs(
+                workflow, node_id, node_outputs, inputs
+            )
+            # Execute node
             try:
-                # Prepare inputs
-                inputs = self._prepare_node_inputs(
-                    workflow=workflow,
-                    node_id=node_id,
-                    node_instance=node_instance,
-                    node_outputs=node_outputs,
-                    parameters=parameters.get(node_id, {}),
-                )
-                if self.debug:
-                    self.logger.debug(f"Node {node_id} inputs: {inputs}")
-                # Update task status
-                if task:
-                    task.update_status(TaskStatus.RUNNING)
-                # Execute node - check if it supports async execution
-                start_time = datetime.now(UTC)
-                if hasattr(node_instance, "execute_async"):
-                    # Use async execution if available
-                    outputs = await node_instance.execute_async(**inputs)
-                else:
-                    # Fall back to synchronous execution using execute()
-                    # This ensures proper validation and error handling
-                    outputs = node_instance.execute(**inputs)
-                execution_time = (datetime.now(UTC) - start_time).total_seconds()
-                # Store outputs
-                node_outputs[node_id] = outputs
-                results[node_id] = outputs
-                if self.debug:
-                    self.logger.debug(f"Node {node_id} outputs: {outputs}")
-                # Update task status
-                if task:
-                    task.update_status(
-                        TaskStatus.COMPLETED,
-                        result=outputs,
-                        ended_at=datetime.now(UTC),
-                        metadata={"execution_time": execution_time},
-                    )
-                self.logger.info(
-                    f"Node {node_id} completed successfully in {execution_time:.3f}s"
-                )
+                result = node_instance.execute(**node_inputs)
+                results[node_id] = result
+                node_outputs[node_id] = result
             except Exception as e:
-                failed_nodes.append(node_id)
-                self.logger.error(f"Node {node_id} failed: {e}", exc_info=self.debug)
-                # Update task status
-                if task:
-                    task.update_status(
-                        TaskStatus.FAILED,
-                        error=str(e),
-                        ended_at=datetime.now(UTC),
-                    )
+                raise WorkflowExecutionError(
+                    f"Node '{node_id}' execution failed: {e}"
+                ) from e
-                # Determine if we should continue or stop
-                if self._should_stop_on_error(workflow, node_id):
-                    error_msg = f"Node '{node_id}' failed: {e}"
-                    if len(failed_nodes) > 1:
-                        error_msg += f" (Previously failed nodes: {failed_nodes[:-1]})"
+        return results
-                    raise WorkflowExecutionError(error_msg) from e
-                else:
-                    # Continue execution but record error
-                    results[node_id] = {
-                        "error": str(e),
-                        "error_type": type(e).__name__,
-                        "failed": True,
-                    }
+    def _prepare_sync_node_inputs(
+        self,
+        workflow,
+        node_id: str,
+        node_outputs: Dict[str, Any],
+        context_inputs: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """Prepare inputs for sync node execution."""
+        # Simplified input preparation
+        inputs = context_inputs.copy()
+        # Add outputs from predecessor nodes
+        for predecessor in workflow.graph.predecessors(node_id):
+            if predecessor in node_outputs:
+                inputs[f"{predecessor}_output"] = node_outputs[predecessor]
-        return results
+        return inputs
-    def _prepare_node_inputs(
+    async def _execute_node_async(
         self,
-        workflow: Workflow,
+        workflow,
         node_id: str,
-        node_instance: Any,
-        node_outputs: dict[str, dict[str, Any]],
-        parameters: dict[str, Any],
-    ) -> dict[str, Any]:
-        """Prepare inputs for a node execution.
+        tracker: AsyncExecutionTracker,
+        context: ExecutionContext,
+    ) -> None:
+        """Execute a single async node."""
+        start_time = time.time()
-        Args:
-            workflow: The workflow being executed
-            node_id: Current node ID
-            node_instance: Current node instance
-            node_outputs: Outputs from previously executed nodes
-            parameters: Parameter overrides
+        async with self.execution_semaphore:
+            try:
+                node_instance = workflow._node_instances.get(node_id)
+                if not node_instance:
+                    raise WorkflowExecutionError(f"Node instance '{node_id}' not found")
-        Returns:
-            Dictionary of inputs for the node
+                # Prepare inputs
+                inputs = await self._prepare_async_node_inputs(
+                    workflow, node_id, tracker, context
+                )
-        Raises:
-            WorkflowExecutionError: If input preparation fails
-        """
-        inputs = {}
+                # Execute async node
+                if isinstance(node_instance, AsyncNode):
+                    # Pass resource registry if available
+                    if context.resource_registry:
+                        result = await node_instance.async_run(
+                            resource_registry=context.resource_registry, **inputs
+                        )
+                    else:
+                        result = await node_instance.async_run(**inputs)
+                else:
+                    # Shouldn't happen in fully async workflow, but handle gracefully
+                    result = await self._execute_sync_node_in_thread(
+                        node_instance, inputs
+                    )
-        # Start with node configuration
-        inputs.update(node_instance.config)
+                execution_time = time.time() - start_time
+                await tracker.record_result(node_id, result, execution_time)
-        # Add connected inputs from other nodes
-        for edge in workflow.graph.in_edges(node_id, data=True):
-            source_node_id = edge[0]
-            mapping = edge[2].get("mapping", {})
+                logger.debug(f"Node '{node_id}' completed in {execution_time:.2f}s")
-            if source_node_id in node_outputs:
-                source_outputs = node_outputs[source_node_id]
+            except Exception as e:
+                execution_time = time.time() - start_time
+                await tracker.record_error(node_id, e)
+                logger.error(
+                    f"Node '{node_id}' failed after {execution_time:.2f}s: {e}"
+                )
+                raise WorkflowExecutionError(
+                    f"Node '{node_id}' execution failed: {e}"
+                ) from e
-                # Check if the source node failed
-                if isinstance(source_outputs, dict) and source_outputs.get("failed"):
-                    raise WorkflowExecutionError(
-                        f"Cannot use outputs from failed node '{source_node_id}'"
-                    )
+    async def _execute_sync_node_async(
+        self,
+        workflow,
+        node_id: str,
+        tracker: AsyncExecutionTracker,
+        context: ExecutionContext,
+    ) -> None:
+        """Execute a sync node in thread pool."""
+        start_time = time.time()
-                for source_key, target_key in mapping.items():
-                    if source_key in source_outputs:
-                        inputs[target_key] = source_outputs[source_key]
-                    else:
-                        self.logger.warning(
-                            f"Source output '{source_key}' not found in node '{source_node_id}'. "
-                            f"Available outputs: {list(source_outputs.keys())}"
-                        )
+        async with self.execution_semaphore:
+            try:
+                node_instance = workflow._node_instances.get(node_id)
+                if not node_instance:
+                    raise WorkflowExecutionError(f"Node instance '{node_id}' not found")
-        # Apply parameter overrides
-        inputs.update(parameters)
+                # Prepare inputs
+                inputs = await self._prepare_async_node_inputs(
+                    workflow, node_id, tracker, context
+                )
-        return inputs
+                # Execute sync node in thread pool
+                result = await self._execute_sync_node_in_thread(node_instance, inputs)
-    def _should_stop_on_error(self, workflow: Workflow, node_id: str) -> bool:
-        """Determine if execution should stop when a node fails.
+                execution_time = time.time() - start_time
+                await tracker.record_result(node_id, result, execution_time)
-        Args:
-            workflow: The workflow being executed
-            node_id: Failed node ID
+                logger.debug(
+                    f"Sync node '{node_id}' completed in {execution_time:.2f}s"
+                )
-        Returns:
-            Whether to stop execution
-        """
-        # Check if any downstream nodes depend on this node
-        has_dependents = workflow.graph.out_degree(node_id) > 0
+            except Exception as e:
+                execution_time = time.time() - start_time
+                await tracker.record_error(node_id, e)
+                logger.error(
+                    f"Sync node '{node_id}' failed after {execution_time:.2f}s: {e}"
+                )
+                raise WorkflowExecutionError(
+                    f"Sync node '{node_id}' execution failed: {e}"
+                ) from e
-        # For now, stop if the failed node has dependents
-        # Future: implement configurable error handling policies
-        return has_dependents
+    async def _execute_sync_node_in_thread(
+        self, node_instance: Node, inputs: Dict[str, Any]
+    ) -> Any:
+        """Execute sync node in thread pool."""
+        loop = asyncio.get_event_loop()
+        def execute_sync():
+            return node_instance.execute(**inputs)
-# Backward compatibility: Use the unified LocalRuntime
-from kailash.runtime.local import LocalRuntime  # noqa: E402
+        return await loop.run_in_executor(self.thread_pool, execute_sync)
-# Export LocalRuntime as AsyncLocalRuntime for backward compatibility
-# AsyncLocalRuntime = LocalRuntime  # Commented out to avoid redefinition warning
+    async def _prepare_async_node_inputs(
+        self,
+        workflow,
+        node_id: str,
+        tracker: AsyncExecutionTracker,
+        context: ExecutionContext,
+    ) -> Dict[str, Any]:
+        """Prepare inputs for async node execution."""
+        inputs = context.variables.copy()
+        # Add outputs from predecessor nodes
+        for predecessor in workflow.graph.predecessors(node_id):
+            if predecessor in tracker.node_outputs:
+                # Use the actual connection mapping if available
+                edge_data = workflow.graph.get_edge_data(predecessor, node_id)
+                if edge_data and "mapping" in edge_data:
+                    # Handle new graph format with mapping
+                    mapping = edge_data["mapping"]
+                    source_data = tracker.node_outputs[predecessor]
+                    for source_path, target_param in mapping.items():
+                        if source_path != "result" and isinstance(source_data, dict):
+                            # Navigate the path (e.g., "result.data")
+                            path_parts = source_path.split(".")
+                            current_data = source_data
+                            for part in path_parts:
+                                if (
+                                    isinstance(current_data, dict)
+                                    and part in current_data
+                                ):
+                                    current_data = current_data[part]
+                                else:
+                                    current_data = None
+                                    break
+                            inputs[target_param] = current_data
+                        else:
+                            # Source path is 'result' or source_data is not a dict
+                            if source_path == "result":
+                                inputs[target_param] = source_data
+                            elif (
+                                isinstance(source_data, dict)
+                                and source_path in source_data
+                            ):
+                                inputs[target_param] = source_data[source_path]
+                            else:
+                                inputs[target_param] = source_data
+                elif edge_data and "connections" in edge_data:
+                    # Handle legacy connection format
+                    connections = edge_data["connections"]
+                    for connection in connections:
+                        source_path = connection.get("source_path", "result")
+                        target_param = connection.get(
+                            "target_param", f"{predecessor}_output"
+                        )
+                        # Extract data using source path
+                        source_data = tracker.node_outputs[predecessor]
+                        if source_path != "result" and isinstance(source_data, dict):
+                            # Navigate the path (e.g., "result.data")
+                            path_parts = source_path.split(".")
+                            current_data = source_data
+                            for part in path_parts:
+                                if (
+                                    isinstance(current_data, dict)
+                                    and part in current_data
+                                ):
+                                    current_data = current_data[part]
+                                else:
+                                    current_data = None
+                                    break
+                            inputs[target_param] = current_data
+                        else:
+                            inputs[target_param] = source_data
+                else:
+                    # Default behavior - use predecessor output directly
+                    inputs[f"{predecessor}_output"] = tracker.node_outputs[predecessor]
-# For better backward compatibility, create a wrapper that sets enable_async=True by default
-class AsyncLocalRuntimeCompat(LocalRuntime):
-    """Backward compatibility wrapper for AsyncLocalRuntime.
+        return inputs
-    This wrapper automatically enables async execution and provides the same
-    interface as the original AsyncLocalRuntime.
-    """
+    async def cleanup(self) -> None:
+        """Clean up runtime resources."""
+        logger.info("Cleaning up AsyncLocalRuntime")
-    def __init__(self, debug: bool = False, max_concurrency: int = 10, **kwargs):
-        """Initialize with async enabled by default."""
-        super().__init__(
-            debug=debug, enable_async=True, max_concurrency=max_concurrency, **kwargs
-        )
+        # Clean up thread pool
+        self.thread_pool.shutdown(wait=True)
-    async def execute(self, *args, **kwargs):
-        """Async execute method for full backward compatibility."""
-        return await self.execute_async(*args, **kwargs)
+        # Clean up resource registry if owned
+        if self.resource_registry:
+            await self.resource_registry.cleanup()
+        logger.info("AsyncLocalRuntime cleanup complete")
-# Use the compatibility wrapper as the main export
-# AsyncLocalRuntime = AsyncLocalRuntimeCompat  # Commented out to avoid redefinition warning - class definition at top takes precedence
+    def __del__(self):
+        """Cleanup on deletion."""
+        try:
+            # Schedule cleanup if event loop is available
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop.create_task(self.cleanup())
+        except Exception:
+            # If no event loop, just shutdown thread pool
+            if hasattr(self, "thread_pool"):
+                self.thread_pool.shutdown(wait=False)

kailash 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

kailash 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl