PyPI - mdb-engine - Versions diffs - 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

mdb-engine 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

mdb_engine/__init__.py +104 -11
mdb_engine/auth/ARCHITECTURE.md +112 -0
mdb_engine/auth/README.md +648 -11
mdb_engine/auth/__init__.py +136 -29
mdb_engine/auth/audit.py +592 -0
mdb_engine/auth/base.py +252 -0
mdb_engine/auth/casbin_factory.py +264 -69
mdb_engine/auth/config_helpers.py +7 -6
mdb_engine/auth/cookie_utils.py +3 -7
mdb_engine/auth/csrf.py +373 -0
mdb_engine/auth/decorators.py +3 -10
mdb_engine/auth/dependencies.py +47 -50
mdb_engine/auth/helpers.py +3 -3
mdb_engine/auth/integration.py +53 -80
mdb_engine/auth/jwt.py +2 -6
mdb_engine/auth/middleware.py +77 -34
mdb_engine/auth/oso_factory.py +18 -38
mdb_engine/auth/provider.py +270 -171
mdb_engine/auth/rate_limiter.py +504 -0
mdb_engine/auth/restrictions.py +8 -24
mdb_engine/auth/session_manager.py +14 -29
mdb_engine/auth/shared_middleware.py +600 -0
mdb_engine/auth/shared_users.py +759 -0
mdb_engine/auth/token_store.py +14 -28
mdb_engine/auth/users.py +54 -113
mdb_engine/auth/utils.py +213 -15
mdb_engine/cli/commands/generate.py +545 -9
mdb_engine/cli/commands/validate.py +3 -7
mdb_engine/cli/utils.py +3 -3
mdb_engine/config.py +7 -21
mdb_engine/constants.py +65 -0
mdb_engine/core/README.md +117 -6
mdb_engine/core/__init__.py +39 -7
mdb_engine/core/app_registration.py +22 -41
mdb_engine/core/app_secrets.py +290 -0
mdb_engine/core/connection.py +18 -9
mdb_engine/core/encryption.py +223 -0
mdb_engine/core/engine.py +1057 -93
mdb_engine/core/index_management.py +12 -16
mdb_engine/core/manifest.py +459 -150
mdb_engine/core/ray_integration.py +435 -0
mdb_engine/core/seeding.py +10 -18
mdb_engine/core/service_initialization.py +12 -23
mdb_engine/core/types.py +2 -5
mdb_engine/database/README.md +140 -17
mdb_engine/database/__init__.py +17 -6
mdb_engine/database/abstraction.py +25 -37
mdb_engine/database/connection.py +11 -18
mdb_engine/database/query_validator.py +367 -0
mdb_engine/database/resource_limiter.py +204 -0
mdb_engine/database/scoped_wrapper.py +713 -196
mdb_engine/dependencies.py +426 -0
mdb_engine/di/__init__.py +34 -0
mdb_engine/di/container.py +248 -0
mdb_engine/di/providers.py +205 -0
mdb_engine/di/scopes.py +139 -0
mdb_engine/embeddings/README.md +54 -24
mdb_engine/embeddings/__init__.py +31 -24
mdb_engine/embeddings/dependencies.py +37 -154
mdb_engine/embeddings/service.py +11 -25
mdb_engine/exceptions.py +92 -0
mdb_engine/indexes/README.md +30 -13
mdb_engine/indexes/__init__.py +1 -0
mdb_engine/indexes/helpers.py +1 -1
mdb_engine/indexes/manager.py +50 -114
mdb_engine/memory/README.md +2 -2
mdb_engine/memory/__init__.py +1 -2
mdb_engine/memory/service.py +30 -87
mdb_engine/observability/README.md +4 -2
mdb_engine/observability/__init__.py +26 -9
mdb_engine/observability/health.py +8 -9
mdb_engine/observability/metrics.py +32 -12
mdb_engine/repositories/__init__.py +34 -0
mdb_engine/repositories/base.py +325 -0
mdb_engine/repositories/mongo.py +233 -0
mdb_engine/repositories/unit_of_work.py +166 -0
mdb_engine/routing/README.md +1 -1
mdb_engine/routing/__init__.py +1 -3
mdb_engine/routing/websockets.py +25 -60
mdb_engine-0.2.0.dist-info/METADATA +313 -0
mdb_engine-0.2.0.dist-info/RECORD +96 -0
mdb_engine-0.1.6.dist-info/METADATA +0 -213
mdb_engine-0.1.6.dist-info/RECORD +0 -75
{mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/WHEEL +0 -0
{mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/entry_points.txt +0 -0
{mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/licenses/LICENSE +0 -0
{mdb_engine-0.1.6.dist-info → mdb_engine-0.2.0.dist-info}/top_level.txt +0 -0

mdb_engine/database/query_validator.py ADDED Viewed

@@ -0,0 +1,367 @@
+"""
+Query validation for MongoDB Engine.
+This module provides comprehensive query validation to prevent NoSQL injection,
+block dangerous operators, and enforce query complexity limits.
+Security Features:
+- Blocks dangerous MongoDB operators ($where, $eval, $function, $accumulator)
+- Prevents deeply nested queries
+- Limits regex complexity to prevent ReDoS attacks
+- Validates aggregation pipelines
+- Prevents NoSQL injection patterns
+"""
+import logging
+import re
+from typing import Any, Dict, List, Optional, Set
+from ..constants import (
+    DANGEROUS_OPERATORS,
+    MAX_PIPELINE_STAGES,
+    MAX_QUERY_DEPTH,
+    MAX_REGEX_COMPLEXITY,
+    MAX_REGEX_LENGTH,
+    MAX_SORT_FIELDS,
+)
+from ..exceptions import QueryValidationError
+logger = logging.getLogger(__name__)
+class QueryValidator:
+    """
+    Validates MongoDB queries for security and safety.
+    This class provides comprehensive validation to prevent:
+    - NoSQL injection attacks
+    - Dangerous operator usage
+    - Resource exhaustion via complex queries
+    - ReDoS attacks via complex regex patterns
+    """
+    def __init__(
+        self,
+        max_depth: int = MAX_QUERY_DEPTH,
+        max_pipeline_stages: int = MAX_PIPELINE_STAGES,
+        max_regex_length: int = MAX_REGEX_LENGTH,
+        max_regex_complexity: int = MAX_REGEX_COMPLEXITY,
+        dangerous_operators: Optional[Set[str]] = None,
+    ):
+        """
+        Initialize the query validator.
+        Args:
+            max_depth: Maximum nesting depth for queries
+            max_pipeline_stages: Maximum stages in aggregation pipelines
+            max_regex_length: Maximum length for regex patterns
+            max_regex_complexity: Maximum complexity score for regex patterns
+            dangerous_operators: Set of dangerous operators to block
+                (defaults to DANGEROUS_OPERATORS)
+        """
+        self.max_depth = max_depth
+        self.max_pipeline_stages = max_pipeline_stages
+        self.max_regex_length = max_regex_length
+        self.max_regex_complexity = max_regex_complexity
+        # Merge custom dangerous operators with defaults
+        if dangerous_operators is not None:
+            # Convert DANGEROUS_OPERATORS tuple to set for union operation
+            default_ops = (
+                set(DANGEROUS_OPERATORS)
+                if isinstance(DANGEROUS_OPERATORS, tuple)
+                else DANGEROUS_OPERATORS
+            )
+            self.dangerous_operators = default_ops | set(dangerous_operators)
+        else:
+            # Convert tuple to set for consistency
+            self.dangerous_operators = (
+                set(DANGEROUS_OPERATORS)
+                if isinstance(DANGEROUS_OPERATORS, tuple)
+                else DANGEROUS_OPERATORS
+            )
+    def validate_filter(self, filter: Optional[Dict[str, Any]], path: str = "") -> None:
+        """
+        Validate a MongoDB query filter.
+        Args:
+            filter: The query filter to validate
+            path: JSON path for error reporting (used recursively)
+        Raises:
+            QueryValidationError: If the filter contains dangerous operators or exceeds limits
+        """
+        if not filter:
+            return
+        if not isinstance(filter, dict):
+            raise QueryValidationError(
+                f"Query filter must be a dictionary, got {type(filter).__name__}",
+                query_type="filter",
+                path=path,
+            )
+        # Check for dangerous operators and validate depth
+        self._check_dangerous_operators(filter, path)
+        self._check_query_depth(filter, path, depth=0)
+    def validate_pipeline(self, pipeline: List[Dict[str, Any]]) -> None:
+        """
+        Validate an aggregation pipeline.
+        Args:
+            pipeline: The aggregation pipeline to validate
+        Raises:
+            QueryValidationError: If the pipeline exceeds limits or contains dangerous operators
+        """
+        if not pipeline:
+            return
+        if not isinstance(pipeline, list):
+            raise QueryValidationError(
+                f"Aggregation pipeline must be a list, got {type(pipeline).__name__}",
+                query_type="pipeline",
+            )
+        # Check pipeline length
+        if len(pipeline) > self.max_pipeline_stages:
+            raise QueryValidationError(
+                f"Aggregation pipeline exceeds maximum stages: "
+                f"{len(pipeline)} > {self.max_pipeline_stages}",
+                query_type="pipeline",
+                context={
+                    "stages": len(pipeline),
+                    "max_stages": self.max_pipeline_stages,
+                },
+            )
+        # Validate each stage
+        for idx, stage in enumerate(pipeline):
+            if not isinstance(stage, dict):
+                raise QueryValidationError(
+                    f"Pipeline stage {idx} must be a dictionary, got {type(stage).__name__}",
+                    query_type="pipeline",
+                    path=f"$[{idx}]",
+                )
+            # Check for dangerous operators in each stage
+            stage_path = f"$[{idx}]"
+            self._check_dangerous_operators(stage, stage_path)
+            self._check_query_depth(stage, stage_path, depth=0)
+    def validate_regex(self, pattern: str, path: str = "") -> None:
+        """
+        Validate a regex pattern to prevent ReDoS attacks.
+        Args:
+            pattern: The regex pattern to validate
+            path: JSON path for error reporting
+        Raises:
+            QueryValidationError: If the regex pattern is too complex or long
+        """
+        if not isinstance(pattern, str):
+            return  # Not a regex pattern
+        # Check length
+        if len(pattern) > self.max_regex_length:
+            raise QueryValidationError(
+                f"Regex pattern exceeds maximum length: "
+                f"{len(pattern)} > {self.max_regex_length}",
+                query_type="regex",
+                path=path,
+                context={
+                    "length": len(pattern),
+                    "max_length": self.max_regex_length,
+                },
+            )
+        # Check complexity (simple heuristic: count quantifiers and alternations)
+        complexity = self._calculate_regex_complexity(pattern)
+        if complexity > self.max_regex_complexity:
+            raise QueryValidationError(
+                f"Regex pattern exceeds maximum complexity: "
+                f"{complexity} > {self.max_regex_complexity}",
+                query_type="regex",
+                path=path,
+                context={
+                    "complexity": complexity,
+                    "max_complexity": self.max_regex_complexity,
+                },
+            )
+        # Try to compile the regex to catch syntax errors early
+        try:
+            re.compile(pattern)
+        except re.error as e:
+            raise QueryValidationError(
+                f"Invalid regex pattern: {e}",
+                query_type="regex",
+                path=path,
+            ) from e
+    def validate_sort(self, sort: Optional[Any]) -> None:
+        """
+        Validate a sort specification.
+        Args:
+            sort: The sort specification to validate
+        Raises:
+            QueryValidationError: If the sort specification exceeds limits
+        """
+        if not sort:
+            return
+        # Count sort fields
+        sort_fields = self._extract_sort_fields(sort)
+        if len(sort_fields) > MAX_SORT_FIELDS:
+            raise QueryValidationError(
+                f"Sort specification exceeds maximum fields: "
+                f"{len(sort_fields)} > {MAX_SORT_FIELDS}",
+                query_type="sort",
+                context={
+                    "fields": len(sort_fields),
+                    "max_fields": MAX_SORT_FIELDS,
+                },
+            )
+    def _check_dangerous_operators(
+        self, query: Dict[str, Any], path: str = "", depth: int = 0
+    ) -> None:
+        """
+        Recursively check for dangerous operators in a query.
+        Args:
+            query: The query dictionary to check
+            path: Current JSON path for error reporting
+            depth: Current nesting depth
+        Raises:
+            QueryValidationError: If a dangerous operator is found
+        """
+        if depth > self.max_depth:
+            raise QueryValidationError(
+                f"Query exceeds maximum nesting depth: {depth} > {self.max_depth}",
+                query_type="filter",
+                path=path,
+                context={"depth": depth, "max_depth": self.max_depth},
+            )
+        for key, value in query.items():
+            current_path = f"{path}.{key}" if path else key
+            # Check if key is a dangerous operator
+            if key in self.dangerous_operators:
+                logger.warning(
+                    f"Security: Dangerous operator '{key}' detected in query "
+                    f"at path '{current_path}'"
+                )
+                raise QueryValidationError(
+                    f"Dangerous operator '{key}' is not allowed for security reasons. "
+                    f"Found at path: {current_path}",
+                    query_type="filter",
+                    operator=key,
+                    path=current_path,
+                )
+            # Recursively check nested dictionaries
+            if isinstance(value, dict):
+                # Check for $regex operator and validate pattern
+                if "$regex" in value:
+                    regex_pattern = value["$regex"]
+                    if isinstance(regex_pattern, str):
+                        self.validate_regex(regex_pattern, f"{current_path}.$regex")
+                self._check_dangerous_operators(value, current_path, depth + 1)
+            elif isinstance(value, list):
+                # Check list elements
+                for idx, item in enumerate(value):
+                    if isinstance(item, dict):
+                        item_path = f"{current_path}[{idx}]"
+                        # Check for $regex in list items
+                        if "$regex" in item and isinstance(item["$regex"], str):
+                            self.validate_regex(item["$regex"], f"{item_path}.$regex")
+                        self._check_dangerous_operators(item, item_path, depth + 1)
+            elif isinstance(value, str) and key == "$regex":
+                # Direct $regex value (less common but possible)
+                self.validate_regex(value, current_path)
+    def _check_query_depth(self, query: Dict[str, Any], path: str = "", depth: int = 0) -> None:
+        """
+        Check query nesting depth.
+        Args:
+            query: The query dictionary to check
+            path: Current JSON path for error reporting
+            depth: Current nesting depth
+        Raises:
+            QueryValidationError: If query depth exceeds maximum
+        """
+        if depth > self.max_depth:
+            raise QueryValidationError(
+                f"Query exceeds maximum nesting depth: {depth} > {self.max_depth}",
+                query_type="filter",
+                path=path,
+                context={"depth": depth, "max_depth": self.max_depth},
+            )
+        # Recursively check nested dictionaries
+        for key, value in query.items():
+            current_path = f"{path}.{key}" if path else key
+            if isinstance(value, dict):
+                self._check_query_depth(value, current_path, depth + 1)
+            elif isinstance(value, list):
+                for idx, item in enumerate(value):
+                    if isinstance(item, dict):
+                        item_path = f"{current_path}[{idx}]"
+                        self._check_query_depth(item, item_path, depth + 1)
+    def _calculate_regex_complexity(self, pattern: str) -> int:
+        """
+        Calculate a complexity score for a regex pattern.
+        This is a simple heuristic to detect potentially dangerous regex patterns
+        that could cause ReDoS attacks.
+        Args:
+            pattern: The regex pattern
+        Returns:
+            Complexity score (higher = more complex)
+        """
+        complexity = 0
+        # Count quantifiers (can cause backtracking)
+        complexity += len(re.findall(r"[*+?{]", pattern))
+        # Count alternations (can cause exponential growth)
+        complexity += len(re.findall(r"\|", pattern))
+        # Count nested groups (can cause deep backtracking)
+        complexity += len(re.findall(r"\([^)]*\([^)]*\)", pattern))
+        # Count lookahead/lookbehind (can be expensive)
+        complexity += len(re.findall(r"\(\?[=!<>]", pattern))
+        return complexity
+    def _extract_sort_fields(self, sort: Any) -> List[str]:
+        """
+        Extract field names from a sort specification.
+        Args:
+            sort: Sort specification (list of tuples, dict, or single tuple)
+        Returns:
+            List of field names
+        """
+        if isinstance(sort, list):
+            return [field for field, _ in sort if isinstance(field, str)]
+        elif isinstance(sort, dict):
+            return list(sort.keys())
+        elif isinstance(sort, tuple) and len(sort) == 2:
+            return [sort[0]] if isinstance(sort[0], str) else []
+        return []

mdb_engine/database/resource_limiter.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""
+Resource limiting for MongoDB Engine.
+This module provides resource limit enforcement to prevent resource exhaustion
+and ensure fair resource usage across applications.
+Features:
+- Query timeout enforcement
+- Result size limits
+- Document size validation
+- Connection limit tracking
+"""
+import logging
+from typing import Any, Dict, Optional
+from bson import encode as bson_encode
+from bson.errors import InvalidDocument
+from ..constants import (
+    DEFAULT_MAX_TIME_MS,
+    MAX_CURSOR_BATCH_SIZE,
+    MAX_DOCUMENT_SIZE,
+    MAX_QUERY_RESULT_SIZE,
+    MAX_QUERY_TIME_MS,
+)
+from ..exceptions import ResourceLimitExceeded
+logger = logging.getLogger(__name__)
+class ResourceLimiter:
+    """
+    Enforces resource limits on MongoDB operations.
+    This class provides resource limit enforcement to prevent:
+    - Query timeouts
+    - Excessive result sizes
+    - Oversized documents
+    - Resource exhaustion
+    """
+    def __init__(
+        self,
+        default_timeout_ms: int = DEFAULT_MAX_TIME_MS,
+        max_timeout_ms: int = MAX_QUERY_TIME_MS,
+        max_result_size: int = MAX_QUERY_RESULT_SIZE,
+        max_batch_size: int = MAX_CURSOR_BATCH_SIZE,
+        max_document_size: int = MAX_DOCUMENT_SIZE,
+    ):
+        """
+        Initialize the resource limiter.
+        Args:
+            default_timeout_ms: Default query timeout in milliseconds
+            max_timeout_ms: Maximum allowed query timeout in milliseconds
+            max_result_size: Maximum number of documents in a result set
+            max_batch_size: Maximum batch size for cursor operations
+            max_document_size: Maximum document size in bytes
+        """
+        self.default_timeout_ms = default_timeout_ms
+        self.max_timeout_ms = max_timeout_ms
+        self.max_result_size = max_result_size
+        self.max_batch_size = max_batch_size
+        self.max_document_size = max_document_size
+    def enforce_query_timeout(
+        self, kwargs: Dict[str, Any], default_timeout: Optional[int] = None
+    ) -> Dict[str, Any]:
+        """
+        Enforce query timeout by adding maxTimeMS if not present.
+        Args:
+            kwargs: Query keyword arguments
+            default_timeout: Default timeout to use (defaults to self.default_timeout_ms)
+        Returns:
+            Updated kwargs with maxTimeMS added if needed
+        """
+        kwargs = dict(kwargs)  # Create a copy to avoid mutating original
+        default = default_timeout if default_timeout is not None else self.default_timeout_ms
+        # Check if maxTimeMS is already set
+        if "maxTimeMS" in kwargs:
+            user_timeout = kwargs["maxTimeMS"]
+            # Validate user-provided timeout doesn't exceed maximum
+            if user_timeout > self.max_timeout_ms:
+                logger.warning(
+                    f"Query timeout {user_timeout}ms exceeds maximum {self.max_timeout_ms}ms. "
+                    f"Capping to {self.max_timeout_ms}ms"
+                )
+                kwargs["maxTimeMS"] = self.max_timeout_ms
+        else:
+            # Add default timeout
+            kwargs["maxTimeMS"] = default
+        return kwargs
+    def enforce_result_limit(self, limit: Optional[int], max_limit: Optional[int] = None) -> int:
+        """
+        Enforce maximum result limit.
+        Args:
+            limit: Requested limit (None means no limit)
+            max_limit: Maximum allowed limit (defaults to self.max_result_size)
+        Returns:
+            Enforced limit value (capped to maximum if needed)
+        """
+        max_allowed = max_limit if max_limit is not None else self.max_result_size
+        if limit is None:
+            # No limit requested, return max allowed
+            return max_allowed
+        if limit > max_allowed:
+            logger.warning(
+                f"Result limit {limit} exceeds maximum {max_allowed}. " f"Capping to {max_allowed}"
+            )
+            return max_allowed
+        return limit
+    def enforce_batch_size(self, batch_size: Optional[int], max_batch: Optional[int] = None) -> int:
+        """
+        Enforce maximum batch size for cursor operations.
+        Args:
+            batch_size: Requested batch size (None means use default)
+            max_batch: Maximum allowed batch size (defaults to self.max_batch_size)
+        Returns:
+            Enforced batch size
+        """
+        max_allowed = max_batch if max_batch is not None else self.max_batch_size
+        if batch_size is None:
+            return max_allowed
+        if batch_size > max_allowed:
+            logger.warning(
+                f"Batch size {batch_size} exceeds maximum {max_allowed}. "
+                f"Capping to {max_allowed}"
+            )
+            return max_allowed
+        return batch_size
+    def validate_document_size(self, document: Dict[str, Any]) -> None:
+        """
+        Validate that a document doesn't exceed size limits.
+        Uses actual BSON encoding for accurate size calculation.
+        Args:
+            document: Document to validate
+        Raises:
+            ResourceLimitExceeded: If document exceeds size limit
+        """
+        try:
+            # Use actual BSON encoding for accurate size
+            bson_bytes = bson_encode(document)
+            actual_size = len(bson_bytes)
+            if actual_size > self.max_document_size:
+                raise ResourceLimitExceeded(
+                    f"Document size {actual_size} bytes exceeds maximum "
+                    f"{self.max_document_size} bytes",
+                    limit_type="document_size",
+                    limit_value=self.max_document_size,
+                    actual_value=actual_size,
+                )
+        except ResourceLimitExceeded:
+            # Re-raise our validation exceptions immediately
+            raise
+        except InvalidDocument as e:
+            # If BSON encoding fails, log warning but don't fail
+            # MongoDB will catch this anyway during actual insert
+            logger.warning(f"Could not encode document as BSON for size validation: {e}")
+    def validate_documents_size(self, documents: list[Dict[str, Any]]) -> None:
+        """
+        Validate that multiple documents don't exceed size limits.
+        Args:
+            documents: List of documents to validate
+        Raises:
+            ResourceLimitExceeded: If any document exceeds size limit
+        """
+        for idx, doc in enumerate(documents):
+            try:
+                self.validate_document_size(doc)
+            except ResourceLimitExceeded as e:
+                # Add document index to error context
+                raise ResourceLimitExceeded(
+                    f"{e.message} (document index: {idx})",
+                    limit_type=e.limit_type,
+                    limit_value=e.limit_value,
+                    actual_value=e.actual_value,
+                    context={**(e.context or {}), "document_index": idx},
+                ) from e

mdb-engine 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

mdb-engine 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl