PyPI - omnibase_infra - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

omnibase_infra 0.2.8py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

omnibase_infra/mixins/mixin_async_circuit_breaker.py CHANGED Viewed

@@ -191,6 +191,7 @@ class MixinAsyncCircuitBreaker:
         reset_timeout: float = 60.0,
         service_name: str = "unknown",
         transport_type: EnumInfraTransportType = EnumInfraTransportType.HTTP,
+        half_open_successes: int = 1,
     ) -> None:
         """Initialize circuit breaker state and configuration.
@@ -202,9 +203,11 @@ class MixinAsyncCircuitBreaker:
             reset_timeout: Seconds before automatic reset (default: 60.0)
             service_name: Service identifier for error context (e.g., "kafka.dev")
             transport_type: Transport type for error context (default: HTTP)
+            half_open_successes: Successful requests required to close circuit
+                from half-open state (default: 1)
         Raises:
-            ValueError: If threshold < 1 or reset_timeout < 0
+            ValueError: If threshold < 1 or reset_timeout < 0 or half_open_successes < 1
         Example:
             ```python
@@ -215,6 +218,7 @@ class MixinAsyncCircuitBreaker:
                         reset_timeout=config.circuit_breaker_reset_timeout,
                         service_name=f"my-service.{config.environment}",
                         transport_type=EnumInfraTransportType.HTTP,
+                        half_open_successes=config.circuit_breaker_half_open_successes,
                     )
             ```
         """
@@ -243,15 +247,30 @@ class MixinAsyncCircuitBreaker:
                 parameter="reset_timeout",
                 value=reset_timeout,
             )
+        if half_open_successes < 1:
+            context = ModelInfraErrorContext.with_correlation(
+                transport_type=transport_type,
+                operation="init_circuit_breaker",
+                target_name=service_name,
+            )
+            raise ProtocolConfigurationError(
+                f"Circuit breaker half_open_successes must be >= 1, got {half_open_successes}",
+                context=context,
+                parameter="half_open_successes",
+                value=half_open_successes,
+            )
         # State variables
         self._circuit_breaker_failures = 0
         self._circuit_breaker_open = False
         self._circuit_breaker_open_until: float = 0.0
+        self._circuit_breaker_half_open = False
+        self._circuit_breaker_half_open_success_count = 0
         # Configuration
         self.circuit_breaker_threshold = threshold
         self.circuit_breaker_reset_timeout = reset_timeout
+        self.circuit_breaker_half_open_successes = half_open_successes
         self.service_name = service_name
         self._cb_transport_type = (
             transport_type  # Use private name to avoid property conflicts
@@ -265,6 +284,7 @@ class MixinAsyncCircuitBreaker:
             extra={
                 "threshold": threshold,
                 "reset_timeout": reset_timeout,
+                "half_open_successes": half_open_successes,
                 "transport_type": transport_type.value,
             },
         )
@@ -298,6 +318,7 @@ class MixinAsyncCircuitBreaker:
                         reset_timeout_seconds=60.0,
                         service_name=f"kafka.{environment}",
                         transport_type=EnumInfraTransportType.KAFKA,
+                        half_open_successes=2,
                     )
                     self._init_circuit_breaker_from_config(config)
             ```
@@ -311,6 +332,7 @@ class MixinAsyncCircuitBreaker:
             reset_timeout=config.reset_timeout_seconds,
             service_name=config.service_name,
             transport_type=config.transport_type,
+            half_open_successes=config.half_open_successes,
         )
     async def _check_circuit_breaker(
@@ -388,12 +410,15 @@ class MixinAsyncCircuitBreaker:
             if current_time >= self._circuit_breaker_open_until:
                 # Transition to HALF_OPEN (atomic write protected by caller's lock)
                 self._circuit_breaker_open = False
+                self._circuit_breaker_half_open = True
+                self._circuit_breaker_half_open_success_count = 0
                 self._circuit_breaker_failures = 0
                 logger.info(
                     f"Circuit breaker transitioning to half-open for {self.service_name}",
                     extra={
                         "service": self.service_name,
                         "operation": operation,
+                        "required_successes": self.circuit_breaker_half_open_successes,
                     },
                 )
             else:
@@ -484,6 +509,26 @@ class MixinAsyncCircuitBreaker:
         # Increment failure counter (atomic write protected by caller's lock)
         self._circuit_breaker_failures += 1
+        # If in half-open state, any failure immediately re-opens the circuit
+        if self._circuit_breaker_half_open:
+            self._circuit_breaker_open = True
+            self._circuit_breaker_half_open = False
+            self._circuit_breaker_half_open_success_count = 0
+            self._circuit_breaker_open_until = (
+                time.time() + self.circuit_breaker_reset_timeout
+            )
+            logger.warning(
+                f"Circuit breaker re-opened for {self.service_name} after failure in half-open state",
+                extra={
+                    "service": self.service_name,
+                    "operation": operation,
+                    "reset_timeout": self.circuit_breaker_reset_timeout,
+                    "correlation_id": str(correlation_id) if correlation_id else None,
+                },
+            )
+            return
         # Check if threshold reached
         if self._circuit_breaker_failures >= self.circuit_breaker_threshold:
             # Transition to OPEN state (atomic write protected by caller's lock)
@@ -564,6 +609,39 @@ class MixinAsyncCircuitBreaker:
             )
             # Still proceed but log the violation for debugging
+        # If in half-open state, track successes
+        if self._circuit_breaker_half_open:
+            self._circuit_breaker_half_open_success_count += 1
+            if (
+                self._circuit_breaker_half_open_success_count
+                >= self.circuit_breaker_half_open_successes
+            ):
+                # Enough successes - transition to CLOSED
+                logger.info(
+                    f"Circuit breaker closed for {self.service_name} after {self._circuit_breaker_half_open_success_count} successful requests in half-open state",
+                    extra={
+                        "service": self.service_name,
+                        "half_open_successes": self._circuit_breaker_half_open_success_count,
+                        "required_successes": self.circuit_breaker_half_open_successes,
+                    },
+                )
+                self._circuit_breaker_half_open = False
+                self._circuit_breaker_half_open_success_count = 0
+                self._circuit_breaker_failures = 0
+                self._circuit_breaker_open_until = 0.0
+            else:
+                # Still in half-open, waiting for more successes
+                logger.debug(
+                    f"Circuit breaker half-open success {self._circuit_breaker_half_open_success_count}/{self.circuit_breaker_half_open_successes} for {self.service_name}",
+                    extra={
+                        "service": self.service_name,
+                        "half_open_successes": self._circuit_breaker_half_open_success_count,
+                        "required_successes": self.circuit_breaker_half_open_successes,
+                    },
+                )
+            return
         # Log state transition if circuit was open or had failures
         if self._circuit_breaker_open or self._circuit_breaker_failures > 0:
             previous_state = "open" if self._circuit_breaker_open else "closed"
@@ -578,6 +656,8 @@ class MixinAsyncCircuitBreaker:
         # Reset state (atomic write protected by caller's lock)
         self._circuit_breaker_open = False
+        self._circuit_breaker_half_open = False
+        self._circuit_breaker_half_open_success_count = 0
         self._circuit_breaker_failures = 0
         self._circuit_breaker_open_until = 0.0
@@ -621,10 +701,15 @@ class MixinAsyncCircuitBreaker:
         # Read state variables with safe defaults for uninitialized state
         cb_open = getattr(self, "_circuit_breaker_open", False)
+        cb_half_open = getattr(self, "_circuit_breaker_half_open", False)
         cb_open_until = getattr(self, "_circuit_breaker_open_until", 0.0)
         cb_failures = getattr(self, "_circuit_breaker_failures", 0)
         cb_threshold = getattr(self, "circuit_breaker_threshold", 5)
         cb_reset_timeout = getattr(self, "circuit_breaker_reset_timeout", 60.0)
+        cb_half_open_successes = getattr(self, "circuit_breaker_half_open_successes", 1)
+        cb_half_open_success_count = getattr(
+            self, "_circuit_breaker_half_open_success_count", 0
+        )
         # Calculate state: closed, open, or half_open
         current_time = time.time()
@@ -635,6 +720,9 @@ class MixinAsyncCircuitBreaker:
             else:
                 cb_state = "open"
                 seconds_until_half_open = round(cb_open_until - current_time, 2)
+        elif cb_half_open:
+            cb_state = "half_open"
+            seconds_until_half_open = None
         else:
             cb_state = "closed"
             seconds_until_half_open = None
@@ -645,12 +733,36 @@ class MixinAsyncCircuitBreaker:
             "failures": cb_failures,
             "threshold": cb_threshold,
             "reset_timeout_seconds": cb_reset_timeout,
+            "half_open_successes_required": cb_half_open_successes,
         }
         if seconds_until_half_open is not None:
             result["seconds_until_half_open"] = seconds_until_half_open
+        if cb_state == "half_open":
+            result["half_open_success_count"] = cb_half_open_success_count
         return result
+    def get_circuit_breaker_state(self) -> dict[str, JsonType]:
+        """Return current circuit breaker state for external introspection.
+        This is the public API for accessing circuit breaker state. Adapters and
+        other external consumers should use this method rather than accessing
+        private attributes.
+        See `_get_circuit_breaker_state()` for implementation details.
+        Returns:
+            dict containing:
+                - initialized: Whether circuit breaker has been initialized
+                - state: Current state ("closed", "open", or "half_open")
+                - failures: Current failure count
+                - threshold: Configured failure threshold
+                - reset_timeout_seconds: Configured reset timeout
+                - seconds_until_half_open: Seconds until half_open (only when open)
+        """
+        return self._get_circuit_breaker_state()
 __all__ = ["EnumCircuitState", "MixinAsyncCircuitBreaker", "ModelCircuitBreakerConfig"]

omnibase_infra/models/__init__.py CHANGED Viewed

@@ -22,6 +22,11 @@ from omnibase_infra.models.dispatch import (
     ModelTopicParser,
 )
 from omnibase_infra.models.errors import ModelHandlerValidationError
+from omnibase_infra.models.event_bus import (
+    ModelConsumerRetryConfig,
+    ModelIdempotencyConfig,
+    ModelOffsetPolicyConfig,
+)
 from omnibase_infra.models.handlers import ModelHandlerIdentifier
 from omnibase_infra.models.health import ModelHealthCheckResult
 from omnibase_infra.models.logging import ModelLogContext
@@ -84,6 +89,10 @@ __all__: list[str] = [
     # Dispatch models
     "EnumDispatchStatus",
     "EnumTopicStandard",
+    # Event bus models
+    "ModelConsumerRetryConfig",
+    "ModelIdempotencyConfig",
+    "ModelOffsetPolicyConfig",
     # Resilience models
     "ModelCircuitBreakerConfig",
     # Validation models

omnibase_infra/models/event_bus/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: 2025 OmniNode Team <info@omninode.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Event bus models for message consumption, idempotency, and DLQ configuration."""
+from omnibase_infra.models.event_bus.model_consumer_retry_config import (
+    ModelConsumerRetryConfig,
+)
+from omnibase_infra.models.event_bus.model_dlq_config import ModelDlqConfig
+from omnibase_infra.models.event_bus.model_idempotency_config import (
+    ModelIdempotencyConfig,
+)
+from omnibase_infra.models.event_bus.model_offset_policy_config import (
+    ModelOffsetPolicyConfig,
+)
+__all__ = [
+    "ModelConsumerRetryConfig",
+    "ModelDlqConfig",
+    "ModelIdempotencyConfig",
+    "ModelOffsetPolicyConfig",
+]

omnibase_infra/models/event_bus/model_consumer_retry_config.py ADDED Viewed

@@ -0,0 +1,367 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2025 OmniNode Team
+"""Consumer-side retry configuration model.
+This module provides the configuration model for consumer-side retry behavior
+when message handlers fail. It distinguishes between:
+- **Content errors** (non-retryable): Malformed messages, validation failures,
+  business logic errors. These will fail regardless of retry attempts.
+- **Infrastructure errors** (retryable): Network timeouts, temporary service
+  unavailability, rate limiting. These may succeed on retry.
+The model uses exponential backoff with optional jitter to prevent thundering
+herd problems in distributed systems.
+Example:
+    >>> config = ModelConsumerRetryConfig(
+    ...     max_attempts=5,
+    ...     backoff_ms=500,
+    ...     backoff_multiplier=2.0,
+    ...     jitter_enabled=True,
+    ... )
+    >>> config.calculate_delay_ms(attempt=3)  # Returns ~2000ms + jitter
+See Also:
+    - docs/patterns/error_recovery_patterns.md: Error recovery patterns
+    - docs/patterns/dispatcher_resilience.md: Dispatcher resilience patterns
+"""
+from __future__ import annotations
+import random
+from typing import Literal
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+from omnibase_core.errors import OnexError
+class ModelConsumerRetryConfig(BaseModel):
+    """Consumer-side retry configuration.
+    Controls retry behavior when message handlers fail. Distinguishes between
+    content errors (non-retryable) and infrastructure errors (retryable).
+    Attributes:
+        max_attempts: Maximum retry attempts before giving up. Includes the
+            initial attempt, so max_attempts=3 means 1 initial + 2 retries.
+        backoff_ms: Base backoff delay in milliseconds. For exponential
+            backoff, subsequent delays are backoff_ms * (multiplier ^ attempt).
+        backoff_multiplier: Exponential backoff multiplier. A value of 2.0
+            doubles the delay with each retry.
+        jitter_enabled: When True, adds random jitter (0-25% of delay) to
+            prevent thundering herd when multiple consumers retry simultaneously.
+        backoff_strategy: Strategy for calculating delays between retries.
+            "exponential" doubles delay each retry, "fixed" uses constant delay.
+        max_backoff_ms: Maximum backoff delay cap to prevent excessive waits.
+    Example:
+        ```python
+        from omnibase_infra.models.event_bus import ModelConsumerRetryConfig
+        # Standard configuration with exponential backoff
+        config = ModelConsumerRetryConfig(
+            max_attempts=3,
+            backoff_ms=1000,
+            backoff_multiplier=2.0,
+            jitter_enabled=True,
+        )
+        # Conservative configuration for critical operations
+        config = ModelConsumerRetryConfig.create_conservative()
+        # Aggressive configuration for resilient operations
+        config = ModelConsumerRetryConfig.create_aggressive()
+        ```
+    Configuration Guidelines:
+        - Critical operations: Use lower max_attempts (2-3), higher backoff
+        - Best-effort operations: Use higher max_attempts (5+), lower backoff
+        - High-concurrency: Always enable jitter to prevent thundering herd
+    """
+    model_config = ConfigDict(
+        frozen=True,
+        extra="forbid",
+        json_schema_extra={
+            "examples": [
+                {
+                    "max_attempts": 3,
+                    "backoff_ms": 1000,
+                    "backoff_multiplier": 2.0,
+                    "jitter_enabled": True,
+                    "backoff_strategy": "exponential",
+                    "max_backoff_ms": 30000,
+                },
+                {
+                    "max_attempts": 5,
+                    "backoff_ms": 500,
+                    "backoff_multiplier": 1.5,
+                    "jitter_enabled": True,
+                    "backoff_strategy": "exponential",
+                    "max_backoff_ms": 60000,
+                },
+            ]
+        },
+    )
+    max_attempts: int = Field(
+        default=3,
+        ge=1,
+        le=10,
+        description="Maximum retry attempts before giving up (1-10). "
+        "Includes initial attempt, so 3 means 1 initial + 2 retries.",
+    )
+    backoff_ms: int = Field(
+        default=1000,
+        ge=100,
+        le=60000,
+        description="Base backoff delay in milliseconds (100-60000). "
+        "For exponential backoff, subsequent delays are backoff_ms * (multiplier ^ attempt).",
+    )
+    backoff_multiplier: float = Field(
+        default=2.0,
+        ge=1.0,
+        le=4.0,
+        description="Exponential backoff multiplier (1.0-4.0). "
+        "A value of 2.0 doubles the delay with each retry.",
+    )
+    jitter_enabled: bool = Field(
+        default=True,
+        description="Add random jitter (0-25% of delay) to backoff. "
+        "Prevents thundering herd when multiple consumers retry simultaneously.",
+    )
+    backoff_strategy: Literal["exponential", "fixed"] = Field(
+        default="exponential",
+        description="Backoff strategy: 'exponential' multiplies delay each retry, "
+        "'fixed' uses constant delay.",
+    )
+    max_backoff_ms: int = Field(
+        default=30000,
+        ge=1000,
+        le=300000,
+        description="Maximum backoff delay cap in milliseconds (1000-300000). "
+        "Prevents excessive waits in exponential backoff.",
+    )
+    @field_validator("max_backoff_ms")
+    @classmethod
+    def validate_max_backoff_greater_than_base(cls, v: int, info: object) -> int:
+        """Ensure max_backoff_ms is at least as large as backoff_ms.
+        Args:
+            v: The max_backoff_ms value to validate.
+            info: Pydantic validation info containing other field values.
+        Returns:
+            The validated max_backoff_ms value.
+        Raises:
+            ValueError: If max_backoff_ms is less than backoff_ms.
+        """
+        # Access data from validation info
+        # Use getattr for safer access pattern
+        data = getattr(info, "data", None) or {}
+        base_delay = data.get("backoff_ms", 1000)
+        if v < base_delay:
+            msg = f"max_backoff_ms ({v}) must be >= backoff_ms ({base_delay})"
+            raise ValueError(msg)
+        return v
+    def calculate_delay_ms(
+        self, attempt: int, include_jitter: bool | None = None
+    ) -> int:
+        """Calculate delay in milliseconds for a specific retry attempt.
+        Args:
+            attempt: The retry attempt number (1-based). Attempt 1 is the first
+                retry after the initial failure.
+            include_jitter: Whether to include jitter. If None, uses the
+                jitter_enabled setting from configuration.
+        Returns:
+            Delay in milliseconds for the specified attempt, capped at max_backoff_ms.
+        Raises:
+            OnexError: If attempt is less than 1 or exceeds allowed retries.
+        Example:
+            >>> config = ModelConsumerRetryConfig(backoff_ms=1000, backoff_multiplier=2.0)
+            >>> config.calculate_delay_ms(1)  # ~1000ms
+            >>> config.calculate_delay_ms(2)  # ~2000ms (max_attempts=3 allows 2 retries)
+        """
+        if attempt < 1:
+            msg = f"Attempt must be >= 1, got {attempt}"
+            raise OnexError(msg)
+        # max_attempts includes the initial attempt, so valid retries are 1 to max_attempts-1
+        max_retry = self.max_attempts - 1
+        if attempt > max_retry:
+            msg = f"Attempt {attempt} exceeds max retries {max_retry} (max_attempts={self.max_attempts} includes initial attempt)"
+            raise OnexError(msg)
+        if self.backoff_strategy == "exponential":
+            # Exponential: backoff_ms * (multiplier ^ (attempt - 1))
+            delay = self.backoff_ms * (self.backoff_multiplier ** (attempt - 1))
+        else:
+            # Fixed: constant delay
+            delay = float(self.backoff_ms)
+        # Cap at max_backoff_ms
+        delay = min(delay, float(self.max_backoff_ms))
+        # Add jitter if enabled
+        use_jitter = (
+            include_jitter if include_jitter is not None else self.jitter_enabled
+        )
+        if use_jitter:
+            # Add 0-25% random jitter
+            jitter_factor = random.random() * 0.25
+            delay += delay * jitter_factor
+        return int(delay)
+    def get_all_delays_ms(self, include_jitter: bool = False) -> list[int]:
+        """Get all delay times for the complete retry sequence.
+        Args:
+            include_jitter: Whether to include jitter in calculations.
+                Defaults to False for predictable results.
+        Returns:
+            List of delays in milliseconds for each retry attempt.
+            Since max_attempts includes the initial attempt, there are
+            max_attempts - 1 retries, and thus max_attempts - 1 delays.
+        Example:
+            >>> config = ModelConsumerRetryConfig(max_attempts=3, backoff_ms=1000)
+            >>> config.get_all_delays_ms()
+            [1000, 2000]  # 2 retries (max_attempts=3 includes initial)
+        """
+        # max_attempts includes initial attempt, so we have max_attempts - 1 retries
+        return [
+            self.calculate_delay_ms(i, include_jitter=include_jitter)
+            for i in range(1, self.max_attempts)
+        ]
+    def get_total_retry_time_ms(self) -> int:
+        """Get total time that all retries will take.
+        Returns:
+            Total time in milliseconds for all retry attempts (without jitter).
+            Since max_attempts includes the initial attempt, this sums delays
+            for max_attempts - 1 retries.
+        Example:
+            >>> config = ModelConsumerRetryConfig(max_attempts=3, backoff_ms=1000)
+            >>> config.get_total_retry_time_ms()
+            3000  # 1000 + 2000 (2 retries for max_attempts=3)
+        """
+        return sum(self.get_all_delays_ms(include_jitter=False))
+    @classmethod
+    def create_conservative(cls) -> ModelConsumerRetryConfig:
+        """Create conservative retry configuration for critical operations.
+        Conservative configuration minimizes retry attempts and uses longer
+        delays to reduce load on failing services.
+        Returns:
+            ModelConsumerRetryConfig with conservative settings.
+        Example:
+            >>> config = ModelConsumerRetryConfig.create_conservative()
+            >>> config.max_attempts
+            2
+        """
+        return cls(
+            max_attempts=2,
+            backoff_ms=2000,
+            backoff_multiplier=2.0,
+            jitter_enabled=True,
+            backoff_strategy="exponential",
+            max_backoff_ms=30000,
+        )
+    @classmethod
+    def create_standard(cls) -> ModelConsumerRetryConfig:
+        """Create standard retry configuration for typical operations.
+        Standard configuration balances reliability with reasonable latency.
+        Returns:
+            ModelConsumerRetryConfig with standard settings.
+        Example:
+            >>> config = ModelConsumerRetryConfig.create_standard()
+            >>> config.max_attempts
+            3
+        """
+        return cls(
+            max_attempts=3,
+            backoff_ms=1000,
+            backoff_multiplier=2.0,
+            jitter_enabled=True,
+            backoff_strategy="exponential",
+            max_backoff_ms=30000,
+        )
+    @classmethod
+    def create_aggressive(cls) -> ModelConsumerRetryConfig:
+        """Create aggressive retry configuration for resilient operations.
+        Aggressive configuration maximizes retry attempts with shorter delays
+        for operations that need high availability.
+        Returns:
+            ModelConsumerRetryConfig with aggressive settings.
+        Example:
+            >>> config = ModelConsumerRetryConfig.create_aggressive()
+            >>> config.max_attempts
+            5
+        """
+        return cls(
+            max_attempts=5,
+            backoff_ms=500,
+            backoff_multiplier=1.5,
+            jitter_enabled=True,
+            backoff_strategy="exponential",
+            max_backoff_ms=15000,
+        )
+    @classmethod
+    def create_no_retry(cls) -> ModelConsumerRetryConfig:
+        """Create configuration with no retries (fail-fast).
+        Use for operations where retries are not appropriate, such as
+        idempotency-sensitive operations or when circuit breaker is preferred.
+        Returns:
+            ModelConsumerRetryConfig with no retry attempts.
+        Example:
+            >>> config = ModelConsumerRetryConfig.create_no_retry()
+            >>> config.max_attempts
+            1
+        """
+        return cls(
+            max_attempts=1,
+            backoff_ms=100,
+            backoff_multiplier=1.0,
+            jitter_enabled=False,
+            backoff_strategy="fixed",
+            max_backoff_ms=1000,
+        )
+__all__: list[str] = [
+    "ModelConsumerRetryConfig",
+]

omnibase_infra 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

omnibase_infra 0.2.8py3-none-any.whl → 0.2.9py3-none-any.whl