PyPI - kailash - Versions diffs - 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

kailash 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

kailash/__init__.py +33 -1
kailash/access_control/__init__.py +129 -0
kailash/access_control/managers.py +461 -0
kailash/access_control/rule_evaluators.py +467 -0
kailash/access_control_abac.py +825 -0
kailash/config/__init__.py +27 -0
kailash/config/database_config.py +359 -0
kailash/database/__init__.py +28 -0
kailash/database/execution_pipeline.py +499 -0
kailash/middleware/__init__.py +306 -0
kailash/middleware/auth/__init__.py +33 -0
kailash/middleware/auth/access_control.py +436 -0
kailash/middleware/auth/auth_manager.py +422 -0
kailash/middleware/auth/jwt_auth.py +477 -0
kailash/middleware/auth/kailash_jwt_auth.py +616 -0
kailash/middleware/communication/__init__.py +37 -0
kailash/middleware/communication/ai_chat.py +989 -0
kailash/middleware/communication/api_gateway.py +802 -0
kailash/middleware/communication/events.py +470 -0
kailash/middleware/communication/realtime.py +710 -0
kailash/middleware/core/__init__.py +21 -0
kailash/middleware/core/agent_ui.py +890 -0
kailash/middleware/core/schema.py +643 -0
kailash/middleware/core/workflows.py +396 -0
kailash/middleware/database/__init__.py +63 -0
kailash/middleware/database/base.py +113 -0
kailash/middleware/database/base_models.py +525 -0
kailash/middleware/database/enums.py +106 -0
kailash/middleware/database/migrations.py +12 -0
kailash/{api/database.py → middleware/database/models.py} +183 -291
kailash/middleware/database/repositories.py +685 -0
kailash/middleware/database/session_manager.py +19 -0
kailash/middleware/mcp/__init__.py +38 -0
kailash/middleware/mcp/client_integration.py +585 -0
kailash/middleware/mcp/enhanced_server.py +576 -0
kailash/nodes/__init__.py +25 -3
kailash/nodes/admin/__init__.py +35 -0
kailash/nodes/admin/audit_log.py +794 -0
kailash/nodes/admin/permission_check.py +864 -0
kailash/nodes/admin/role_management.py +823 -0
kailash/nodes/admin/security_event.py +1519 -0
kailash/nodes/admin/user_management.py +944 -0
kailash/nodes/ai/a2a.py +24 -7
kailash/nodes/ai/ai_providers.py +1 -0
kailash/nodes/ai/embedding_generator.py +11 -11
kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
kailash/nodes/ai/llm_agent.py +407 -2
kailash/nodes/ai/self_organizing.py +85 -10
kailash/nodes/api/auth.py +287 -6
kailash/nodes/api/rest.py +151 -0
kailash/nodes/auth/__init__.py +17 -0
kailash/nodes/auth/directory_integration.py +1228 -0
kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
kailash/nodes/auth/mfa.py +2338 -0
kailash/nodes/auth/risk_assessment.py +872 -0
kailash/nodes/auth/session_management.py +1093 -0
kailash/nodes/auth/sso.py +1040 -0
kailash/nodes/base.py +344 -13
kailash/nodes/base_cycle_aware.py +4 -2
kailash/nodes/base_with_acl.py +1 -1
kailash/nodes/code/python.py +283 -10
kailash/nodes/compliance/__init__.py +9 -0
kailash/nodes/compliance/data_retention.py +1888 -0
kailash/nodes/compliance/gdpr.py +2004 -0
kailash/nodes/data/__init__.py +22 -2
kailash/nodes/data/async_connection.py +469 -0
kailash/nodes/data/async_sql.py +757 -0
kailash/nodes/data/async_vector.py +598 -0
kailash/nodes/data/readers.py +767 -0
kailash/nodes/data/retrieval.py +360 -1
kailash/nodes/data/sharepoint_graph.py +397 -21
kailash/nodes/data/sql.py +94 -5
kailash/nodes/data/streaming.py +68 -8
kailash/nodes/data/vector_db.py +54 -4
kailash/nodes/enterprise/__init__.py +13 -0
kailash/nodes/enterprise/batch_processor.py +741 -0
kailash/nodes/enterprise/data_lineage.py +497 -0
kailash/nodes/logic/convergence.py +31 -9
kailash/nodes/logic/operations.py +14 -3
kailash/nodes/mixins/__init__.py +8 -0
kailash/nodes/mixins/event_emitter.py +201 -0
kailash/nodes/mixins/mcp.py +9 -4
kailash/nodes/mixins/security.py +165 -0
kailash/nodes/monitoring/__init__.py +7 -0
kailash/nodes/monitoring/performance_benchmark.py +2497 -0
kailash/nodes/rag/__init__.py +284 -0
kailash/nodes/rag/advanced.py +1615 -0
kailash/nodes/rag/agentic.py +773 -0
kailash/nodes/rag/conversational.py +999 -0
kailash/nodes/rag/evaluation.py +875 -0
kailash/nodes/rag/federated.py +1188 -0
kailash/nodes/rag/graph.py +721 -0
kailash/nodes/rag/multimodal.py +671 -0
kailash/nodes/rag/optimized.py +933 -0
kailash/nodes/rag/privacy.py +1059 -0
kailash/nodes/rag/query_processing.py +1335 -0
kailash/nodes/rag/realtime.py +764 -0
kailash/nodes/rag/registry.py +547 -0
kailash/nodes/rag/router.py +837 -0
kailash/nodes/rag/similarity.py +1854 -0
kailash/nodes/rag/strategies.py +566 -0
kailash/nodes/rag/workflows.py +575 -0
kailash/nodes/security/__init__.py +19 -0
kailash/nodes/security/abac_evaluator.py +1411 -0
kailash/nodes/security/audit_log.py +91 -0
kailash/nodes/security/behavior_analysis.py +1893 -0
kailash/nodes/security/credential_manager.py +401 -0
kailash/nodes/security/rotating_credentials.py +760 -0
kailash/nodes/security/security_event.py +132 -0
kailash/nodes/security/threat_detection.py +1103 -0
kailash/nodes/testing/__init__.py +9 -0
kailash/nodes/testing/credential_testing.py +499 -0
kailash/nodes/transform/__init__.py +10 -2
kailash/nodes/transform/chunkers.py +592 -1
kailash/nodes/transform/processors.py +484 -14
kailash/nodes/validation.py +321 -0
kailash/runtime/access_controlled.py +1 -1
kailash/runtime/async_local.py +41 -7
kailash/runtime/docker.py +1 -1
kailash/runtime/local.py +474 -55
kailash/runtime/parallel.py +1 -1
kailash/runtime/parallel_cyclic.py +1 -1
kailash/runtime/testing.py +210 -2
kailash/utils/migrations/__init__.py +25 -0
kailash/utils/migrations/generator.py +433 -0
kailash/utils/migrations/models.py +231 -0
kailash/utils/migrations/runner.py +489 -0
kailash/utils/secure_logging.py +342 -0
kailash/workflow/__init__.py +16 -0
kailash/workflow/cyclic_runner.py +3 -4
kailash/workflow/graph.py +70 -2
kailash/workflow/resilience.py +249 -0
kailash/workflow/templates.py +726 -0
{kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
kailash-0.4.0.dist-info/RECORD +223 -0
kailash/api/__init__.py +0 -17
kailash/api/__main__.py +0 -6
kailash/api/studio_secure.py +0 -893
kailash/mcp/__main__.py +0 -13
kailash/mcp/server_new.py +0 -336
kailash/mcp/servers/__init__.py +0 -12
kailash-0.3.2.dist-info/RECORD +0 -136
{kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
{kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
{kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
{kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0

kailash/database/execution_pipeline.py ADDED Viewed

@@ -0,0 +1,499 @@
+"""Database execution pipeline for clean separation of concerns.
+This module provides a pipeline-based approach to database operations,
+separating permission checking, query execution, and data masking into
+clear, testable stages.
+"""
+import logging
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Union
+from kailash.access_control import NodePermission, UserContext
+from kailash.sdk_exceptions import NodeExecutionError
+logger = logging.getLogger(__name__)
+@dataclass
+class ExecutionContext:
+    """Context for database execution pipeline."""
+    query: str
+    parameters: Optional[Union[Dict[str, Any], List[Any]]] = None
+    user_context: Optional[UserContext] = None
+    node_name: str = "unknown_node"
+    result_format: str = "dict"
+    runtime_context: Optional[Dict[str, Any]] = None
+@dataclass
+class ExecutionResult:
+    """Result from database execution pipeline."""
+    data: Any
+    row_count: int
+    columns: List[str]
+    execution_time: float
+    metadata: Optional[Dict[str, Any]] = None
+class PipelineStage(ABC):
+    """Abstract base class for pipeline stages."""
+    @abstractmethod
+    async def process(
+        self, context: ExecutionContext, result: Optional[ExecutionResult] = None
+    ) -> Optional[ExecutionResult]:
+        """Process this stage of the pipeline.
+        Args:
+            context: Execution context
+            result: Result from previous stage (None for first stage)
+        Returns:
+            Result to pass to next stage, or None to stop pipeline
+        """
+        pass
+    @abstractmethod
+    def get_stage_name(self) -> str:
+        """Get the name of this pipeline stage."""
+        pass
+class PermissionCheckStage(PipelineStage):
+    """Pipeline stage for checking user permissions."""
+    def __init__(self, access_control_manager=None):
+        """Initialize permission check stage.
+        Args:
+            access_control_manager: Access control manager for permission checks
+        """
+        self.access_control_manager = access_control_manager
+        self.logger = logging.getLogger(f"{__name__}.PermissionCheckStage")
+    async def process(
+        self, context: ExecutionContext, result: Optional[ExecutionResult] = None
+    ) -> Optional[ExecutionResult]:
+        """Check user permissions before query execution."""
+        # Skip if no access control or no user context
+        if not self.access_control_manager or not context.user_context:
+            self.logger.debug(
+                "Skipping permission check - no access control or user context"
+            )
+            return result
+        # Check execute permission
+        decision = self.access_control_manager.check_node_access(
+            context.user_context,
+            context.node_name,
+            NodePermission.EXECUTE,
+            context.runtime_context,
+        )
+        if not decision.allowed:
+            raise NodeExecutionError(f"Access denied: {decision.reason}")
+        self.logger.debug(
+            f"Permission granted for {context.node_name}: {decision.reason}"
+        )
+        return result
+    def get_stage_name(self) -> str:
+        """Get stage name."""
+        return "permission_check"
+class QueryValidationStage(PipelineStage):
+    """Pipeline stage for validating SQL queries."""
+    def __init__(self, validation_rules: Optional[Dict[str, Any]] = None):
+        """Initialize query validation stage.
+        Args:
+            validation_rules: Custom validation rules
+        """
+        self.validation_rules = validation_rules or {}
+        self.logger = logging.getLogger(f"{__name__}.QueryValidationStage")
+    async def process(
+        self, context: ExecutionContext, result: Optional[ExecutionResult] = None
+    ) -> Optional[ExecutionResult]:
+        """Validate query for security and safety."""
+        if not context.query:
+            raise NodeExecutionError("Query cannot be empty")
+        # Basic SQL injection checks
+        self._validate_query_safety(context.query)
+        self.logger.debug(f"Query validation passed for: {context.query[:100]}...")
+        return result
+    def _validate_query_safety(self, query: str) -> None:
+        """Validate query for potential security issues."""
+        if not query:
+            return
+        query_upper = query.upper().strip()
+        # Check for dangerous operations
+        dangerous_keywords = [
+            "DROP",
+            "DELETE",
+            "TRUNCATE",
+            "ALTER",
+            "CREATE",
+            "GRANT",
+            "REVOKE",
+            "EXEC",
+            "EXECUTE",
+            "SHUTDOWN",
+            "BACKUP",
+            "RESTORE",
+        ]
+        import re
+        for keyword in dangerous_keywords:
+            pattern = r"\b" + re.escape(keyword) + r"\b"
+            if re.search(pattern, query_upper):
+                self.logger.warning(
+                    f"Query contains potentially dangerous keyword: {keyword}"
+                )
+                # In production, you might want to block these entirely
+                # raise NodeExecutionError(f"Query contains forbidden keyword: {keyword}")
+    def get_stage_name(self) -> str:
+        """Get stage name."""
+        return "query_validation"
+class QueryExecutionStage(PipelineStage):
+    """Pipeline stage for executing SQL queries."""
+    def __init__(self, query_executor):
+        """Initialize query execution stage.
+        Args:
+            query_executor: Object that can execute queries (engine, connection, etc.)
+        """
+        self.query_executor = query_executor
+        self.logger = logging.getLogger(f"{__name__}.QueryExecutionStage")
+    async def process(
+        self, context: ExecutionContext, result: Optional[ExecutionResult] = None
+    ) -> Optional[ExecutionResult]:
+        """Execute the SQL query."""
+        start_time = time.time()
+        try:
+            # This is where the actual query execution happens
+            # The implementation depends on whether it's sync or async
+            if hasattr(self.query_executor, "execute_query"):
+                # Custom executor interface
+                query_result = await self.query_executor.execute_query(
+                    context.query, context.parameters, context.result_format
+                )
+            else:
+                # Fallback - assume it's a callable
+                query_result = await self.query_executor(
+                    context.query, context.parameters
+                )
+            execution_time = time.time() - start_time
+            # Format the result
+            if isinstance(query_result, dict):
+                # Structured result
+                return ExecutionResult(
+                    data=query_result.get("data", []),
+                    row_count=query_result.get("row_count", 0),
+                    columns=query_result.get("columns", []),
+                    execution_time=execution_time,
+                    metadata=query_result.get("metadata", {}),
+                )
+            else:
+                # Raw result - format it
+                return ExecutionResult(
+                    data=query_result,
+                    row_count=(
+                        len(query_result) if isinstance(query_result, list) else 1
+                    ),
+                    columns=[],
+                    execution_time=execution_time,
+                )
+        except Exception as e:
+            execution_time = time.time() - start_time
+            self.logger.error(
+                f"Query execution failed after {execution_time:.3f}s: {e}"
+            )
+            raise NodeExecutionError(f"Database query failed: {e}") from e
+    def get_stage_name(self) -> str:
+        """Get stage name."""
+        return "query_execution"
+class DataMaskingStage(PipelineStage):
+    """Pipeline stage for applying data masking based on user attributes."""
+    def __init__(self, access_control_manager=None):
+        """Initialize data masking stage.
+        Args:
+            access_control_manager: Access control manager with masking capabilities
+        """
+        self.access_control_manager = access_control_manager
+        self.logger = logging.getLogger(f"{__name__}.DataMaskingStage")
+    async def process(
+        self, context: ExecutionContext, result: Optional[ExecutionResult] = None
+    ) -> Optional[ExecutionResult]:
+        """Apply data masking based on user attributes."""
+        if not result or not result.data:
+            return result
+        # Skip if no access control or no user context
+        if not self.access_control_manager or not context.user_context:
+            self.logger.debug(
+                "Skipping data masking - no access control or user context"
+            )
+            return result
+        # Skip if not dict format (masking only works on structured data)
+        if context.result_format != "dict" or not isinstance(result.data, list):
+            self.logger.debug("Skipping data masking - data format not supported")
+            return result
+        # Apply masking to each row
+        masked_data = []
+        for row in result.data:
+            if isinstance(row, dict):
+                # Apply masking if access control manager supports it
+                if hasattr(self.access_control_manager, "apply_data_masking"):
+                    masked_row = self.access_control_manager.apply_data_masking(
+                        context.user_context, context.node_name, row
+                    )
+                    masked_data.append(masked_row)
+                else:
+                    masked_data.append(row)
+            else:
+                masked_data.append(row)
+        # Return result with masked data
+        return ExecutionResult(
+            data=masked_data,
+            row_count=result.row_count,
+            columns=result.columns,
+            execution_time=result.execution_time,
+            metadata=result.metadata,
+        )
+    def get_stage_name(self) -> str:
+        """Get stage name."""
+        return "data_masking"
+class DatabaseExecutionPipeline:
+    """Pipeline for executing database operations with clean separation of concerns.
+    This pipeline provides:
+    - Permission checking
+    - Query validation
+    - Query execution
+    - Data masking
+    Example:
+        >>> pipeline = DatabaseExecutionPipeline(
+        ...     access_control_manager=access_manager,
+        ...     query_executor=my_executor
+        ... )
+        >>>
+        >>> context = ExecutionContext(
+        ...     query="SELECT * FROM users",
+        ...     user_context=user,
+        ...     node_name="user_query"
+        ... )
+        >>>
+        >>> result = await pipeline.execute(context)
+    """
+    def __init__(
+        self,
+        access_control_manager=None,
+        query_executor=None,
+        validation_rules: Optional[Dict[str, Any]] = None,
+        custom_stages: Optional[List[PipelineStage]] = None,
+    ):
+        """Initialize database execution pipeline.
+        Args:
+            access_control_manager: Access control manager for permissions and masking
+            query_executor: Object that can execute database queries
+            validation_rules: Custom validation rules for queries
+            custom_stages: Additional custom pipeline stages
+        """
+        self.access_control_manager = access_control_manager
+        self.query_executor = query_executor
+        self.logger = logging.getLogger(f"{__name__}.DatabaseExecutionPipeline")
+        # Build pipeline stages
+        self.stages: List[PipelineStage] = []
+        # 1. Permission check
+        self.stages.append(PermissionCheckStage(access_control_manager))
+        # 2. Query validation
+        self.stages.append(QueryValidationStage(validation_rules))
+        # 3. Custom stages (before execution)
+        if custom_stages:
+            for stage in custom_stages:
+                if stage.get_stage_name() != "query_execution":
+                    self.stages.append(stage)
+        # 4. Query execution
+        if query_executor:
+            self.stages.append(QueryExecutionStage(query_executor))
+        # 5. Data masking
+        self.stages.append(DataMaskingStage(access_control_manager))
+        # 6. Custom stages (after execution)
+        if custom_stages:
+            for stage in custom_stages:
+                if stage.get_stage_name() == "post_processing":
+                    self.stages.append(stage)
+        self.logger.info(f"Initialized pipeline with {len(self.stages)} stages")
+    async def execute(self, context: ExecutionContext) -> ExecutionResult:
+        """Execute the full database pipeline.
+        Args:
+            context: Execution context with query, user, etc.
+        Returns:
+            Execution result with data, timing, etc.
+        Raises:
+            NodeExecutionError: If any stage fails
+        """
+        self.logger.debug(f"Starting pipeline execution for {context.node_name}")
+        result = None
+        pipeline_start = time.time()
+        try:
+            # Execute each stage in sequence
+            for i, stage in enumerate(self.stages):
+                stage_start = time.time()
+                try:
+                    result = await stage.process(context, result)
+                    stage_time = time.time() - stage_start
+                    self.logger.debug(
+                        f"Stage {i+1}/{len(self.stages)} ({stage.get_stage_name()}) "
+                        f"completed in {stage_time:.3f}s"
+                    )
+                    # Allow stages to stop the pipeline
+                    if result is None and stage.get_stage_name() != "permission_check":
+                        self.logger.warning(
+                            f"Pipeline stopped at stage: {stage.get_stage_name()}"
+                        )
+                        break
+                except Exception as e:
+                    self.logger.error(
+                        f"Pipeline failed at stage {stage.get_stage_name()}: {e}"
+                    )
+                    raise
+            pipeline_time = time.time() - pipeline_start
+            self.logger.info(f"Pipeline execution completed in {pipeline_time:.3f}s")
+            # Ensure we have a result
+            if result is None:
+                result = ExecutionResult(
+                    data=[],
+                    row_count=0,
+                    columns=[],
+                    execution_time=pipeline_time,
+                )
+            return result
+        except Exception as e:
+            pipeline_time = time.time() - pipeline_start
+            self.logger.error(
+                f"Pipeline execution failed after {pipeline_time:.3f}s: {e}"
+            )
+            raise
+    def add_stage(self, stage: PipelineStage, position: Optional[int] = None) -> None:
+        """Add a custom stage to the pipeline.
+        Args:
+            stage: Pipeline stage to add
+            position: Position to insert at (None = append)
+        """
+        if position is None:
+            self.stages.append(stage)
+        else:
+            self.stages.insert(position, stage)
+        self.logger.info(
+            f"Added stage {stage.get_stage_name()} at position {position or len(self.stages)}"
+        )
+    def remove_stage(self, stage_name: str) -> bool:
+        """Remove a stage from the pipeline.
+        Args:
+            stage_name: Name of stage to remove
+        Returns:
+            True if stage was found and removed
+        """
+        initial_count = len(self.stages)
+        self.stages = [s for s in self.stages if s.get_stage_name() != stage_name]
+        removed = len(self.stages) < initial_count
+        if removed:
+            self.logger.info(f"Removed stage {stage_name}")
+        return removed
+    def get_stage_info(self) -> List[Dict[str, str]]:
+        """Get information about all pipeline stages.
+        Returns:
+            List of stage information dictionaries
+        """
+        return [
+            {
+                "name": stage.get_stage_name(),
+                "type": type(stage).__name__,
+            }
+            for stage in self.stages
+        ]
+# Export components
+__all__ = [
+    "ExecutionContext",
+    "ExecutionResult",
+    "PipelineStage",
+    "PermissionCheckStage",
+    "QueryValidationStage",
+    "QueryExecutionStage",
+    "DataMaskingStage",
+    "DatabaseExecutionPipeline",
+]

kailash 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

kailash 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl