PyPI - kailash - Versions diffs - 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

kailash 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

kailash/middleware/database/repositories.py +3 -1
kailash/nodes/admin/audit_log.py +364 -6
kailash/nodes/admin/user_management.py +1006 -20
kailash/nodes/api/http.py +95 -71
kailash/nodes/base.py +281 -164
kailash/nodes/base_async.py +30 -31
kailash/nodes/data/async_sql.py +3 -22
kailash/utils/resource_manager.py +420 -0
kailash/workflow/builder.py +93 -10
kailash/workflow/cyclic_runner.py +4 -25
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/METADATA +6 -4
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/RECORD +16 -15
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/WHEEL +0 -0
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/entry_points.txt +0 -0
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/licenses/LICENSE +0 -0
{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/top_level.txt +0 -0

kailash/utils/resource_manager.py ADDED Viewed

@@ -0,0 +1,420 @@
+"""Resource management utilities for the Kailash SDK.
+This module provides context managers and utilities for efficient resource
+management across the SDK, ensuring proper cleanup and preventing memory leaks.
+"""
+import asyncio
+import logging
+import threading
+import weakref
+from collections import defaultdict
+from contextlib import asynccontextmanager, contextmanager
+from datetime import UTC, datetime
+from typing import Any, Callable, Dict, Generic, Optional, Set, TypeVar
+logger = logging.getLogger(__name__)
+T = TypeVar("T")
+class ResourcePool(Generic[T]):
+    """Generic resource pool for connection pooling and resource reuse.
+    This class provides a thread-safe pool for managing expensive resources
+    like database connections, HTTP clients, etc.
+    """
+    def __init__(
+        self,
+        factory: Callable[[], T],
+        max_size: int = 10,
+        timeout: float = 30.0,
+        cleanup: Optional[Callable[[T], None]] = None,
+    ):
+        """Initialize the resource pool.
+        Args:
+            factory: Function to create new resources
+            max_size: Maximum pool size
+            timeout: Timeout for acquiring resources
+            cleanup: Optional cleanup function for resources
+        """
+        self._factory = factory
+        self._max_size = max_size
+        self._timeout = timeout
+        self._cleanup = cleanup
+        self._pool: list[T] = []
+        self._in_use: Set[T] = set()
+        self._lock = threading.Lock()
+        self._semaphore = threading.Semaphore(max_size)
+        self._created_count = 0
+    @contextmanager
+    def acquire(self):
+        """Acquire a resource from the pool.
+        Yields:
+            Resource instance
+        Raises:
+            TimeoutError: If resource cannot be acquired within timeout
+        """
+        if not self._semaphore.acquire(timeout=self._timeout):
+            raise TimeoutError(f"Failed to acquire resource within {self._timeout}s")
+        resource = None
+        try:
+            with self._lock:
+                # Try to get from pool
+                if self._pool:
+                    resource = self._pool.pop()
+                else:
+                    # Create new resource if under limit
+                    if self._created_count < self._max_size:
+                        resource = self._factory()
+                        self._created_count += 1
+                    else:
+                        raise RuntimeError("Pool exhausted")
+                self._in_use.add(resource)
+            yield resource
+        finally:
+            if resource is not None:
+                with self._lock:
+                    self._in_use.discard(resource)
+                    self._pool.append(resource)
+            self._semaphore.release()
+    def cleanup_all(self):
+        """Clean up all resources in the pool."""
+        with self._lock:
+            # Clean up pooled resources
+            for resource in self._pool:
+                if self._cleanup:
+                    try:
+                        self._cleanup(resource)
+                    except Exception as e:
+                        logger.error(f"Error cleaning up resource: {e}")
+            # Clean up in-use resources (best effort)
+            for resource in self._in_use:
+                if self._cleanup:
+                    try:
+                        self._cleanup(resource)
+                    except Exception as e:
+                        logger.error(f"Error cleaning up in-use resource: {e}")
+            self._pool.clear()
+            self._in_use.clear()
+            self._created_count = 0
+class AsyncResourcePool(Generic[T]):
+    """Async version of ResourcePool for async resources."""
+    def __init__(
+        self,
+        factory: Callable[[], T],
+        max_size: int = 10,
+        timeout: float = 30.0,
+        cleanup: Optional[Callable[[T], Any]] = None,
+    ):
+        """Initialize the async resource pool.
+        Args:
+            factory: Async function to create new resources
+            max_size: Maximum pool size
+            timeout: Timeout for acquiring resources
+            cleanup: Optional async cleanup function
+        """
+        self._factory = factory
+        self._max_size = max_size
+        self._timeout = timeout
+        self._cleanup = cleanup
+        self._pool: list[T] = []
+        self._in_use: Set[T] = set()
+        self._lock = asyncio.Lock()
+        self._semaphore = asyncio.Semaphore(max_size)
+        self._created_count = 0
+    @asynccontextmanager
+    async def acquire(self):
+        """Acquire a resource from the pool asynchronously.
+        Yields:
+            Resource instance
+        Raises:
+            TimeoutError: If resource cannot be acquired within timeout
+        """
+        try:
+            await asyncio.wait_for(self._semaphore.acquire(), timeout=self._timeout)
+        except asyncio.TimeoutError:
+            raise TimeoutError(f"Failed to acquire resource within {self._timeout}s")
+        resource = None
+        try:
+            async with self._lock:
+                # Try to get from pool
+                if self._pool:
+                    resource = self._pool.pop()
+                else:
+                    # Create new resource if under limit
+                    if self._created_count < self._max_size:
+                        if asyncio.iscoroutinefunction(self._factory):
+                            resource = await self._factory()
+                        else:
+                            resource = self._factory()
+                        self._created_count += 1
+                    else:
+                        raise RuntimeError("Pool exhausted")
+                self._in_use.add(resource)
+            yield resource
+        finally:
+            if resource is not None:
+                async with self._lock:
+                    self._in_use.discard(resource)
+                    self._pool.append(resource)
+            self._semaphore.release()
+    async def cleanup_all(self):
+        """Clean up all resources in the pool asynchronously."""
+        async with self._lock:
+            # Clean up pooled resources
+            for resource in self._pool:
+                if self._cleanup:
+                    try:
+                        if asyncio.iscoroutinefunction(self._cleanup):
+                            await self._cleanup(resource)
+                        else:
+                            self._cleanup(resource)
+                    except Exception as e:
+                        logger.error(f"Error cleaning up resource: {e}")
+            # Clean up in-use resources (best effort)
+            for resource in self._in_use:
+                if self._cleanup:
+                    try:
+                        if asyncio.iscoroutinefunction(self._cleanup):
+                            await self._cleanup(resource)
+                        else:
+                            self._cleanup(resource)
+                    except Exception as e:
+                        logger.error(f"Error cleaning up in-use resource: {e}")
+            self._pool.clear()
+            self._in_use.clear()
+            self._created_count = 0
+class ResourceTracker:
+    """Track and manage resources across the SDK to prevent leaks."""
+    def __init__(self):
+        self._resources: Dict[str, weakref.WeakSet] = defaultdict(weakref.WeakSet)
+        self._metrics: Dict[str, Dict[str, Any]] = defaultdict(dict)
+        self._lock = threading.Lock()
+    def register(self, resource_type: str, resource: Any):
+        """Register a resource for tracking.
+        Args:
+            resource_type: Type/category of resource
+            resource: Resource instance to track
+        """
+        with self._lock:
+            self._resources[resource_type].add(resource)
+            # Update metrics
+            if resource_type not in self._metrics:
+                self._metrics[resource_type] = {
+                    "created": 0,
+                    "active": 0,
+                    "peak": 0,
+                    "last_created": None,
+                }
+            self._metrics[resource_type]["created"] += 1
+            self._metrics[resource_type]["active"] = len(self._resources[resource_type])
+            self._metrics[resource_type]["peak"] = max(
+                self._metrics[resource_type]["peak"],
+                self._metrics[resource_type]["active"],
+            )
+            self._metrics[resource_type]["last_created"] = datetime.now(UTC)
+    def get_metrics(self) -> Dict[str, Dict[str, Any]]:
+        """Get current resource metrics.
+        Returns:
+            Dictionary of metrics by resource type
+        """
+        with self._lock:
+            # Update active counts
+            for resource_type in self._metrics:
+                self._metrics[resource_type]["active"] = len(
+                    self._resources[resource_type]
+                )
+            return dict(self._metrics)
+    def get_active_resources(
+        self, resource_type: Optional[str] = None
+    ) -> Dict[str, int]:
+        """Get count of active resources.
+        Args:
+            resource_type: Optional filter by type
+        Returns:
+            Dictionary of resource type to active count
+        """
+        with self._lock:
+            if resource_type:
+                return {resource_type: len(self._resources.get(resource_type, set()))}
+            else:
+                return {
+                    rtype: len(resources)
+                    for rtype, resources in self._resources.items()
+                }
+# Global resource tracker instance
+_resource_tracker = ResourceTracker()
+def get_resource_tracker() -> ResourceTracker:
+    """Get the global resource tracker instance."""
+    return _resource_tracker
+@contextmanager
+def managed_resource(
+    resource_type: str, resource: Any, cleanup: Optional[Callable] = None
+):
+    """Context manager for tracking and cleaning up resources.
+    Args:
+        resource_type: Type/category of resource
+        resource: Resource instance
+        cleanup: Optional cleanup function
+    Yields:
+        The resource instance
+    """
+    _resource_tracker.register(resource_type, resource)
+    try:
+        yield resource
+    finally:
+        if cleanup:
+            try:
+                cleanup(resource)
+            except Exception as e:
+                logger.error(f"Error cleaning up {resource_type}: {e}")
+@asynccontextmanager
+async def async_managed_resource(
+    resource_type: str, resource: Any, cleanup: Optional[Callable] = None
+):
+    """Async context manager for tracking and cleaning up resources.
+    Args:
+        resource_type: Type/category of resource
+        resource: Resource instance
+        cleanup: Optional async cleanup function
+    Yields:
+        The resource instance
+    """
+    _resource_tracker.register(resource_type, resource)
+    try:
+        yield resource
+    finally:
+        if cleanup:
+            try:
+                if asyncio.iscoroutinefunction(cleanup):
+                    await cleanup(resource)
+                else:
+                    cleanup(resource)
+            except Exception as e:
+                logger.error(f"Error cleaning up {resource_type}: {e}")
+class ConcurrencyLimiter:
+    """Limit concurrent operations to prevent resource exhaustion."""
+    def __init__(self, max_concurrent: int = 10):
+        """Initialize the concurrency limiter.
+        Args:
+            max_concurrent: Maximum concurrent operations
+        """
+        self._semaphore = threading.Semaphore(max_concurrent)
+        self._active = 0
+        self._peak = 0
+        self._lock = threading.Lock()
+    @contextmanager
+    def limit(self):
+        """Context manager to limit concurrency."""
+        self._semaphore.acquire()
+        with self._lock:
+            self._active += 1
+            self._peak = max(self._peak, self._active)
+        try:
+            yield
+        finally:
+            with self._lock:
+                self._active -= 1
+            self._semaphore.release()
+    def get_stats(self) -> Dict[str, int]:
+        """Get concurrency statistics."""
+        with self._lock:
+            return {"active": self._active, "peak": self._peak}
+class AsyncConcurrencyLimiter:
+    """Async version of ConcurrencyLimiter."""
+    def __init__(self, max_concurrent: int = 10):
+        """Initialize the async concurrency limiter.
+        Args:
+            max_concurrent: Maximum concurrent operations
+        """
+        self._semaphore = asyncio.Semaphore(max_concurrent)
+        self._active = 0
+        self._peak = 0
+        self._lock = asyncio.Lock()
+    @asynccontextmanager
+    async def limit(self):
+        """Async context manager to limit concurrency."""
+        await self._semaphore.acquire()
+        async with self._lock:
+            self._active += 1
+            self._peak = max(self._peak, self._active)
+        try:
+            yield
+        finally:
+            async with self._lock:
+                self._active -= 1
+            self._semaphore.release()
+    async def get_stats(self) -> Dict[str, int]:
+        """Get concurrency statistics."""
+        async with self._lock:
+            return {"active": self._active, "peak": self._peak}

kailash/workflow/builder.py CHANGED Viewed

@@ -21,7 +21,7 @@ class WorkflowBuilder:
     def add_node(
         self,
-        node_type: str,
+        node_type: str | type | Any,
         node_id: str | None = None,
         config: dict[str, Any] | None = None,
     ) -> str:
@@ -29,9 +29,9 @@ class WorkflowBuilder:
         Add a node to the workflow.
         Args:
-            node_type: Node type name
+            node_type: Node type name (string), Node class, or Node instance
             node_id: Unique identifier for this node (auto-generated if not provided)
-            config: Configuration for the node
+            config: Configuration for the node (ignored if node_type is an instance)
         Returns:
             Node ID (useful for method chaining)
@@ -48,11 +48,80 @@ class WorkflowBuilder:
                 f"Node ID '{node_id}' already exists in workflow"
             )
-        self.nodes[node_id] = {"type": node_type, "config": config or {}}
+        # Import Node here to avoid circular imports
+        from kailash.nodes.base import Node
+        # Handle different input types
+        if isinstance(node_type, str):
+            # String node type name
+            self.nodes[node_id] = {"type": node_type, "config": config or {}}
+            type_name = node_type
+        elif isinstance(node_type, type) and issubclass(node_type, Node):
+            # Node class
+            self.nodes[node_id] = {
+                "type": node_type.__name__,
+                "config": config or {},
+                "class": node_type,
+            }
+            type_name = node_type.__name__
+        elif hasattr(node_type, "__class__") and issubclass(node_type.__class__, Node):
+            # Node instance
+            self.nodes[node_id] = {
+                "instance": node_type,
+                "type": node_type.__class__.__name__,
+            }
+            type_name = node_type.__class__.__name__
+        else:
+            raise WorkflowValidationError(
+                f"Invalid node type: {type(node_type)}. "
+                "Expected: str (node type name), Node class, or Node instance"
+            )
-        logger.info(f"Added node '{node_id}' of type '{node_type}'")
+        logger.info(f"Added node '{node_id}' of type '{type_name}'")
         return node_id
+    def add_node_instance(self, node_instance: Any, node_id: str | None = None) -> str:
+        """
+        Add a node instance to the workflow.
+        This is a convenience method for adding pre-configured node instances.
+        Args:
+            node_instance: Pre-configured node instance
+            node_id: Unique identifier for this node (auto-generated if not provided)
+        Returns:
+            Node ID
+        Raises:
+            WorkflowValidationError: If node_id is already used or instance is invalid
+        """
+        return self.add_node(node_instance, node_id)
+    def add_node_type(
+        self,
+        node_type: str,
+        node_id: str | None = None,
+        config: dict[str, Any] | None = None,
+    ) -> str:
+        """
+        Add a node by type name to the workflow.
+        This is the original string-based method, provided for clarity and backward compatibility.
+        Args:
+            node_type: Node type name as string
+            node_id: Unique identifier for this node (auto-generated if not provided)
+            config: Configuration for the node
+        Returns:
+            Node ID
+        Raises:
+            WorkflowValidationError: If node_id is already used
+        """
+        return self.add_node(node_type, node_id, config)
     def add_connection(
         self, from_node: str, from_output: str, to_node: str, to_input: str
     ) -> None:
@@ -149,11 +218,25 @@ class WorkflowBuilder:
         # Add nodes to workflow
         for node_id, node_info in self.nodes.items():
             try:
-                node_type = node_info["type"]
-                node_config = node_info.get("config", {})
-                # Add the node to workflow
-                workflow._add_node_internal(node_id, node_type, node_config)
+                if "instance" in node_info:
+                    # Node instance was provided
+                    workflow.add_node(
+                        node_id=node_id, node_or_type=node_info["instance"]
+                    )
+                elif "class" in node_info:
+                    # Node class was provided
+                    node_class = node_info["class"]
+                    node_config = node_info.get("config", {})
+                    workflow.add_node(
+                        node_id=node_id, node_or_type=node_class, **node_config
+                    )
+                else:
+                    # String node type
+                    node_type = node_info["type"]
+                    node_config = node_info.get("config", {})
+                    workflow.add_node(
+                        node_id=node_id, node_or_type=node_type, **node_config
+                    )
             except Exception as e:
                 raise WorkflowValidationError(
                     f"Failed to add node '{node_id}' to workflow: {e}"

kailash/workflow/cyclic_runner.py CHANGED Viewed

@@ -516,12 +516,11 @@ class CyclicWorkflowExecutor:
                     f"Cycle {cycle_id} iteration now at {cycle_state.iteration} (after update)"
                 )
-                # Check max iterations (built into monitor.record_iteration)
-                if cycle_state.iteration >= cycle_config.get(
-                    "max_iterations", float("inf")
-                ):
+                # Check max iterations - loop_count represents actual iterations executed
+                max_iterations = cycle_config.get("max_iterations", float("inf"))
+                if loop_count >= max_iterations:
                     logger.info(
-                        f"Cycle {cycle_id} reached max iterations: {cycle_state.iteration}"
+                        f"Cycle {cycle_id} reached max iterations: {loop_count}/{max_iterations}"
                     )
                     should_terminate = True
@@ -643,9 +642,6 @@ class CyclicWorkflowExecutor:
             if is_cycle_edge and is_cycle_iteration and previous_iteration_results:
                 # For cycle edges after first iteration, use previous iteration results
                 pred_output = previous_iteration_results.get(pred)
-                logger.debug(
-                    f"Using previous iteration result for {pred} -> {node_id}: {type(pred_output)} keys={list(pred_output.keys()) if isinstance(pred_output, dict) else 'not dict'}"
-                )
             elif pred in state.node_outputs:
                 # For non-cycle edges or first iteration, use normal state
                 pred_output = state.node_outputs[pred]
@@ -658,10 +654,6 @@ class CyclicWorkflowExecutor:
             # Apply mapping
             mapping = edge_data.get("mapping", {})
-            if is_cycle_edge and is_cycle_iteration:
-                logger.debug(
-                    f"Applying cycle mapping: {mapping} from {pred} to {node_id}"
-                )
             for src_key, dst_key in mapping.items():
                 # Handle nested output access
                 if "." in src_key:
@@ -677,10 +669,6 @@ class CyclicWorkflowExecutor:
                         inputs[dst_key] = value
                 elif isinstance(pred_output, dict) and src_key in pred_output:
                     inputs[dst_key] = pred_output[src_key]
-                    if is_cycle_edge and is_cycle_iteration:
-                        logger.debug(
-                            f"Mapped {src_key}={pred_output[src_key]} to {dst_key}"
-                        )
                 elif src_key == "output":
                     # Default output mapping
                     inputs[dst_key] = pred_output
@@ -706,10 +694,6 @@ class CyclicWorkflowExecutor:
         # Recursively filter None values from context to avoid security validation errors
         context = self._filter_none_values(context)
-        # Debug inputs before merging
-        if cycle_state and cycle_state.iteration > 0:
-            logger.debug(f"Inputs gathered from connections: {inputs}")
         # Merge node config with inputs
         # Order: config < initial_parameters < connection inputs
         merged_inputs = {**node.config}
@@ -769,11 +753,6 @@ class CyclicWorkflowExecutor:
         logger.debug(
             f"Executing node: {node_id} (iteration: {cycle_state.iteration if cycle_state else 'N/A'})"
         )
-        logger.debug(f"Node inputs: {list(merged_inputs.keys())}")
-        if cycle_state:
-            logger.debug(
-                f"Input values - value: {merged_inputs.get('value')}, counter: {merged_inputs.get('counter')}"
-            )
         try:
             with collector.collect(node_id=node_id) as metrics_context:

{kailash-0.4.2.dist-info → kailash-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kailash
-Version: 0.4.2
+Version: 0.5.0
 Summary: Python SDK for the Kailash container-node architecture
 Home-page: https://github.com/integrum/kailash-python-sdk
 Author: Integrum
@@ -120,12 +120,13 @@ Dynamic: requires-python
 - 🏭 **Session 067 Enhancements**: Business workflow templates, data lineage tracking, automatic credential rotation
 - 🔄 **Zero-Downtime Operations**: Automatic credential rotation with enterprise notifications and audit trails
 - 🌉 **Enterprise Middleware (v0.4.0)**: Production-ready middleware architecture with real-time agent-frontend communication, dynamic workflows, and AI chat integration
+- ⚡ **Performance Revolution (v0.5.0)**: 10-100x faster parameter resolution, clear async/sync separation, automatic resource management
 ## 🏗️ Project Architecture
 The Kailash project is organized into three distinct layers:
-### Core Architecture (v0.4.0)
+### Core Architecture (v0.5.0)
 ```
 ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
 │   Frontend      │    │   Middleware     │    │   Kailash Core  │
@@ -150,9 +151,10 @@ kailash_python_sdk/
 1. **SDK Layer** (`src/kailash/`) - The core framework providing:
    - Nodes: Reusable computational units (100+ built-in)
    - Workflows: DAG-based orchestration with cyclic support
-   - Runtime: Unified execution engine (async + enterprise)
-   - Middleware: Enterprise communication layer (NEW in v0.4.0)
+   - Runtime: Unified execution engine with optimized async/sync separation (v0.5.0)
+   - Middleware: Enterprise communication layer (v0.4.0)
    - Security: RBAC/ABAC access control with audit logging
+   - Performance: LRU parameter caching, automatic resource pooling (NEW in v0.5.0)
 2. **Application Layer** (`apps/`) - Complete applications including:
    - User Management System (Django++ capabilities)

kailash 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

kailash 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl