PyPI - dory-sdk - Versions diffs - 2.1.0__py3-none-any.whl - Mend

dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

dory/__init__.py +70 -0
dory/auto_instrument.py +142 -0
dory/cli/__init__.py +5 -0
dory/cli/main.py +290 -0
dory/cli/templates.py +333 -0
dory/config/__init__.py +23 -0
dory/config/defaults.py +50 -0
dory/config/loader.py +361 -0
dory/config/presets.py +325 -0
dory/config/schema.py +152 -0
dory/core/__init__.py +27 -0
dory/core/app.py +404 -0
dory/core/context.py +209 -0
dory/core/lifecycle.py +214 -0
dory/core/meta.py +121 -0
dory/core/modes.py +479 -0
dory/core/processor.py +654 -0
dory/core/signals.py +122 -0
dory/decorators.py +142 -0
dory/errors/__init__.py +117 -0
dory/errors/classification.py +362 -0
dory/errors/codes.py +495 -0
dory/health/__init__.py +10 -0
dory/health/probes.py +210 -0
dory/health/server.py +306 -0
dory/k8s/__init__.py +11 -0
dory/k8s/annotation_watcher.py +184 -0
dory/k8s/client.py +251 -0
dory/k8s/pod_metadata.py +182 -0
dory/logging/__init__.py +9 -0
dory/logging/logger.py +175 -0
dory/metrics/__init__.py +7 -0
dory/metrics/collector.py +301 -0
dory/middleware/__init__.py +36 -0
dory/middleware/connection_tracker.py +608 -0
dory/middleware/request_id.py +321 -0
dory/middleware/request_tracker.py +501 -0
dory/migration/__init__.py +11 -0
dory/migration/configmap.py +260 -0
dory/migration/serialization.py +167 -0
dory/migration/state_manager.py +301 -0
dory/monitoring/__init__.py +23 -0
dory/monitoring/opentelemetry.py +462 -0
dory/py.typed +2 -0
dory/recovery/__init__.py +60 -0
dory/recovery/golden_image.py +480 -0
dory/recovery/golden_snapshot.py +561 -0
dory/recovery/golden_validator.py +518 -0
dory/recovery/partial_recovery.py +479 -0
dory/recovery/recovery_decision.py +242 -0
dory/recovery/restart_detector.py +142 -0
dory/recovery/state_validator.py +187 -0
dory/resilience/__init__.py +45 -0
dory/resilience/circuit_breaker.py +454 -0
dory/resilience/retry.py +389 -0
dory/sidecar/__init__.py +6 -0
dory/sidecar/main.py +75 -0
dory/sidecar/server.py +329 -0
dory/simple.py +342 -0
dory/types.py +75 -0
dory/utils/__init__.py +25 -0
dory/utils/errors.py +59 -0
dory/utils/retry.py +115 -0
dory/utils/timeout.py +80 -0
dory_sdk-2.1.0.dist-info/METADATA +663 -0
dory_sdk-2.1.0.dist-info/RECORD +69 -0
dory_sdk-2.1.0.dist-info/WHEEL +5 -0
dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
dory_sdk-2.1.0.dist-info/top_level.txt +1 -0

dory/core/context.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""
+ExecutionContext - Runtime context passed to processors.
+Contains pod metadata, events, and utility methods. The context is
+created by DoryApp and passed to the processor constructor.
+"""
+import asyncio
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class ExecutionContext:
+    """
+    Execution context containing pod metadata and utilities.
+    Attributes:
+        pod_name: Kubernetes pod name (e.g., "camera-processor-1")
+        pod_namespace: Kubernetes namespace (e.g., "default")
+        processor_id: Unique processor ID from Dory DB
+        attempt_number: Pod restart count (1, 2, 3...)
+        is_migrating: True if this is a restart due to migration
+        previous_pod_name: Name of pod we're migrating from (if applicable)
+        shutdown_requested: Event that fires when SIGTERM received
+        migration_imminent: Event that fires 10s before forced shutdown
+    """
+    # Pod metadata (read from K8s/env)
+    pod_name: str
+    pod_namespace: str
+    processor_id: str
+    attempt_number: int = 1
+    is_migrating: bool = False
+    previous_pod_name: str | None = None
+    # Async events for coordination
+    shutdown_requested: asyncio.Event = field(default_factory=asyncio.Event)
+    migration_imminent: asyncio.Event = field(default_factory=asyncio.Event)
+    # Internal config cache
+    _config: dict[str, Any] = field(default_factory=dict)
+    _logger: logging.Logger | None = field(default=None, repr=False)
+    def is_shutdown_requested(self) -> bool:
+        """
+        Check if graceful shutdown is in progress.
+        Processors should poll this in their run() loop to exit gracefully.
+        Returns:
+            True if SIGTERM received and shutdown initiated
+        """
+        return self.shutdown_requested.is_set()
+    def is_migration_imminent(self) -> bool:
+        """
+        Check if migration is about to happen.
+        If True, processor should finish in-flight operations quickly.
+        Returns:
+            True if migration scheduled within next 10s
+        """
+        return self.migration_imminent.is_set()
+    def config(self) -> dict[str, Any]:
+        """
+        Get application configuration from environment/ConfigMap.
+        Only returns app-specific config (CAMERA_FEED_URL, MODEL_PATH, etc.),
+        not SDK internals (DORY_* vars are filtered out).
+        Returns:
+            Dict with app configuration
+        """
+        return self._config
+    def logger(self) -> logging.Logger:
+        """
+        Get pre-configured logger with pod context.
+        Logger automatically includes pod_name, processor_id, namespace
+        in all log messages.
+        Returns:
+            Logger configured with pod context
+        """
+        if self._logger is None:
+            self._logger = logging.getLogger(f"dory.processor.{self.processor_id}")
+        return self._logger
+    def get_env(self, key: str, default: str | None = None) -> str | None:
+        """
+        Get environment variable value.
+        Args:
+            key: Environment variable name
+            default: Default value if not set
+        Returns:
+            Environment variable value or default
+        """
+        return os.environ.get(key, default)
+    @classmethod
+    def from_environment(cls) -> "ExecutionContext":
+        """
+        Create ExecutionContext from environment variables.
+        Reads DORY_* environment variables set by Dory orchestrator.
+        Returns:
+            ExecutionContext populated from environment
+        """
+        # Read Dory system env vars
+        pod_name = os.environ.get("DORY_POD_NAME", os.environ.get("POD_NAME", "unknown"))
+        pod_namespace = os.environ.get(
+            "DORY_POD_NAMESPACE", os.environ.get("POD_NAMESPACE", "default")
+        )
+        # Get processor_id from env or derive from pod name
+        processor_id = os.environ.get("DORY_PROCESSOR_ID", os.environ.get("PROCESSOR_ID"))
+        if not processor_id:
+            # Derive from pod name (e.g., "myapp-7f8d9c6b-x4h2j" -> "myapp")
+            processor_id = cls._derive_processor_id_from_pod_name(pod_name)
+        is_migrating = os.environ.get("DORY_IS_MIGRATING", "false").lower() == "true"
+        previous_pod = os.environ.get("DORY_MIGRATED_FROM")
+        # Parse restart count (will be updated from K8s later)
+        attempt_number = 1
+        # Load app config (non-DORY_ env vars)
+        app_config = {}
+        for key, value in os.environ.items():
+            if not key.startswith("DORY_") and not key.startswith("KUBERNETES_"):
+                app_config[key] = value
+        return cls(
+            pod_name=pod_name,
+            pod_namespace=pod_namespace,
+            processor_id=processor_id,
+            attempt_number=attempt_number,
+            is_migrating=is_migrating,
+            previous_pod_name=previous_pod,
+            _config=app_config,
+        )
+    @staticmethod
+    def _derive_processor_id_from_pod_name(pod_name: str) -> str:
+        """
+        Derive processor ID from Kubernetes pod name.
+        Pod names typically follow the pattern:
+        - Deployment: <deployment>-<replicaset-hash>-<pod-hash> (e.g., "myapp-7f8d9c6b-x4h2j")
+        - StatefulSet: <statefulset>-<ordinal> (e.g., "myapp-0")
+        Args:
+            pod_name: Kubernetes pod name
+        Returns:
+            Derived processor ID or "unknown" if cannot be derived
+        """
+        if not pod_name or pod_name == "unknown":
+            return "unknown"
+        parts = pod_name.split("-")
+        if len(parts) >= 3:
+            # Deployment format: name-replicaset-pod
+            # Try to find where the hash parts start (typically 8+ chars of alphanumeric)
+            for i in range(len(parts) - 1, 0, -1):
+                part = parts[i]
+                # If this looks like a hash (short alphanumeric), skip it
+                if len(part) <= 10 and part.isalnum():
+                    continue
+                # Otherwise, include up to this point
+                return "-".join(parts[: i + 1])
+            # If all parts look like hashes, take the first part
+            return parts[0]
+        elif len(parts) == 2:
+            # StatefulSet format: name-ordinal or simple name-hash
+            if parts[1].isdigit():
+                return parts[0]  # StatefulSet
+            return parts[0]  # Simple deployment
+        else:
+            return pod_name
+    def request_shutdown(self) -> None:
+        """Signal that shutdown has been requested."""
+        self.shutdown_requested.set()
+    def signal_migration(self) -> None:
+        """Signal that migration will happen soon."""
+        self.migration_imminent.set()
+    def signal_migration_imminent(self) -> None:
+        """Signal that migration will happen soon (alias for signal_migration)."""
+        self.migration_imminent.set()
+    def update_config(self, config: dict[str, Any]) -> None:
+        """Update app configuration (internal use)."""
+        self._config.update(config)
+    def set_attempt_number(self, attempt: int) -> None:
+        """Set restart attempt number (internal use)."""
+        self.attempt_number = attempt

dory/core/lifecycle.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+LifecycleManager - Manages processor lifecycle state machine.
+Handles transitions between lifecycle states and enforces valid
+state transitions.
+"""
+import asyncio
+import logging
+from typing import TYPE_CHECKING
+from dory.types import LifecycleState
+from dory.utils.errors import DoryStartupError, DoryShutdownError
+if TYPE_CHECKING:
+    from dory.core.processor import BaseProcessor
+    from dory.core.context import ExecutionContext
+logger = logging.getLogger(__name__)
+class LifecycleManager:
+    """
+    Manages the processor lifecycle state machine.
+    States:
+        CREATED -> STARTING -> RUNNING -> SHUTTING_DOWN -> STOPPED
+                              |
+                              v
+                           FAILED (from any state on error)
+    """
+    # Valid state transitions
+    VALID_TRANSITIONS: dict[LifecycleState, set[LifecycleState]] = {
+        LifecycleState.CREATED: {LifecycleState.STARTING, LifecycleState.FAILED},
+        LifecycleState.STARTING: {LifecycleState.RUNNING, LifecycleState.FAILED},
+        LifecycleState.RUNNING: {LifecycleState.SHUTTING_DOWN, LifecycleState.FAILED},
+        LifecycleState.SHUTTING_DOWN: {LifecycleState.STOPPED, LifecycleState.FAILED},
+        LifecycleState.STOPPED: set(),  # Terminal state
+        LifecycleState.FAILED: set(),   # Terminal state
+    }
+    def __init__(self):
+        self._state = LifecycleState.CREATED
+        self._state_lock = asyncio.Lock()
+        self._state_changed = asyncio.Event()
+    @property
+    def state(self) -> LifecycleState:
+        """Current lifecycle state."""
+        return self._state
+    def is_running(self) -> bool:
+        """Check if processor is in running state."""
+        return self._state == LifecycleState.RUNNING
+    def is_stopped(self) -> bool:
+        """Check if processor has stopped (gracefully or failed)."""
+        return self._state in (LifecycleState.STOPPED, LifecycleState.FAILED)
+    def is_shutting_down(self) -> bool:
+        """Check if shutdown is in progress."""
+        return self._state == LifecycleState.SHUTTING_DOWN
+    async def transition_to(self, new_state: LifecycleState) -> None:
+        """
+        Transition to a new lifecycle state.
+        Args:
+            new_state: Target state
+        Raises:
+            ValueError: If transition is not valid
+        """
+        async with self._state_lock:
+            if new_state not in self.VALID_TRANSITIONS.get(self._state, set()):
+                raise ValueError(
+                    f"Invalid state transition: {self._state.name} -> {new_state.name}"
+                )
+            old_state = self._state
+            self._state = new_state
+            self._state_changed.set()
+            self._state_changed.clear()
+            logger.debug(f"Lifecycle transition: {old_state.name} -> {new_state.name}")
+    async def wait_for_state(
+        self,
+        target_states: set[LifecycleState],
+        timeout: float | None = None,
+    ) -> LifecycleState:
+        """
+        Wait for lifecycle to reach one of the target states.
+        Args:
+            target_states: Set of states to wait for
+            timeout: Maximum time to wait (None = forever)
+        Returns:
+            The state that was reached
+        Raises:
+            asyncio.TimeoutError: If timeout exceeded
+        """
+        while self._state not in target_states:
+            try:
+                await asyncio.wait_for(
+                    self._state_changed.wait(),
+                    timeout=timeout,
+                )
+            except asyncio.TimeoutError:
+                raise
+        return self._state
+    async def run_startup(
+        self,
+        processor: "BaseProcessor",
+        timeout: float = 60.0,
+    ) -> None:
+        """
+        Run processor startup with timeout.
+        Args:
+            processor: Processor instance to start
+            timeout: Maximum time for startup (seconds)
+        Raises:
+            DoryStartupError: If startup fails or times out
+        """
+        await self.transition_to(LifecycleState.STARTING)
+        try:
+            await asyncio.wait_for(
+                processor.startup(),
+                timeout=timeout,
+            )
+            await self.transition_to(LifecycleState.RUNNING)
+            logger.info("Processor startup completed")
+        except asyncio.TimeoutError:
+            await self.transition_to(LifecycleState.FAILED)
+            raise DoryStartupError(f"Startup timed out after {timeout}s")
+        except Exception as e:
+            await self.transition_to(LifecycleState.FAILED)
+            raise DoryStartupError(f"Startup failed: {e}", cause=e)
+    async def run_shutdown(
+        self,
+        processor: "BaseProcessor",
+        timeout: float = 30.0,
+    ) -> None:
+        """
+        Run processor shutdown with timeout.
+        Args:
+            processor: Processor instance to shutdown
+            timeout: Maximum time for shutdown (seconds)
+        Raises:
+            DoryShutdownError: If shutdown times out
+        """
+        if self._state in (LifecycleState.STOPPED, LifecycleState.FAILED):
+            return  # Already stopped
+        await self.transition_to(LifecycleState.SHUTTING_DOWN)
+        try:
+            await asyncio.wait_for(
+                processor.shutdown(),
+                timeout=timeout,
+            )
+            await self.transition_to(LifecycleState.STOPPED)
+            logger.info("Processor shutdown completed")
+        except asyncio.TimeoutError:
+            logger.error(f"Shutdown timed out after {timeout}s, forcing exit")
+            await self.transition_to(LifecycleState.FAILED)
+            raise DoryShutdownError(f"Shutdown timed out after {timeout}s")
+        except Exception as e:
+            # Log but continue - shutdown should complete
+            logger.error(f"Error during shutdown: {e}")
+            await self.transition_to(LifecycleState.STOPPED)
+    async def run_main_loop(
+        self,
+        processor: "BaseProcessor",
+        context: "ExecutionContext",
+    ) -> None:
+        """
+        Run processor main loop until shutdown requested.
+        Args:
+            processor: Processor instance to run
+            context: Execution context
+        """
+        if self._state != LifecycleState.RUNNING:
+            raise ValueError(f"Cannot run: state is {self._state.name}, expected RUNNING")
+        try:
+            await processor.run()
+            logger.info("Processor run() completed")
+        except asyncio.CancelledError:
+            logger.info("Processor run() cancelled")
+            raise
+        except Exception as e:
+            logger.error(f"Error in processor run(): {e}")
+            await self.transition_to(LifecycleState.FAILED)
+            raise

dory/core/meta.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""
+Metaclass for automatic handler instrumentation.
+Automatically applies @auto_instrument to all async methods
+starting with "handle_" or "_handle_".
+No manual decorators needed!
+"""
+import inspect
+import logging
+from abc import ABCMeta
+from typing import Any
+logger = logging.getLogger(__name__)
+class AutoInstrumentMeta(ABCMeta):
+    """
+    Metaclass that automatically applies @auto_instrument to handler methods.
+    This eliminates the need for developers to add decorators manually.
+    Usage:
+        class MyProcessor(BaseProcessor, metaclass=AutoInstrumentMeta):
+            async def handle_request(self, request):
+                # Automatically instrumented!
+                # - Request ID generated
+                # - Request tracked
+                # - Span created
+                # - Errors classified
+                return {"status": "ok"}
+            async def handle_webhook(self, webhook):
+                # Also automatically instrumented!
+                return {"received": True}
+            async def internal_method(self):
+                # NOT instrumented (doesn't start with handle_)
+                pass
+    Auto-instrumented methods:
+    - async def handle_*(...): Public handlers
+    - async def _handle_*(...): Private handlers
+    Not instrumented:
+    - Other methods (don't start with handle_)
+    - Sync methods
+    - Lifecycle methods (startup, shutdown, run)
+    """
+    # List of methods that should NOT be auto-instrumented
+    EXCLUDED_METHODS = {
+        "startup",
+        "shutdown",
+        "run",
+        "get_state",
+        "restore_state",
+        "on_state_restore_failed",
+        "on_rapid_restart_detected",
+        "on_health_check_failed",
+        "reset_caches",
+        "run_loop",
+        "is_shutting_down",
+    }
+    def __new__(mcs, name, bases, namespace):
+        """
+        Create new class with auto-instrumented handler methods.
+        Args:
+            name: Class name
+            bases: Base classes
+            namespace: Class namespace (attributes and methods)
+        Returns:
+            New class with auto-instrumented handlers
+        """
+        # Import here to avoid circular dependency
+        try:
+            from dory.auto_instrument import auto_instrument
+        except ImportError:
+            logger.warning(
+                "auto_instrument decorator not available, skipping auto-instrumentation"
+            )
+            return super().__new__(mcs, name, bases, namespace)
+        # Count of instrumented methods
+        instrumented_count = 0
+        # Auto-instrument handler methods
+        for attr_name, attr_value in list(namespace.items()):
+            # Check if this is an async method
+            if not inspect.iscoroutinefunction(attr_value):
+                continue
+            # Check if method should be instrumented
+            should_instrument = False
+            # Instrument methods starting with handle_ or _handle_
+            if attr_name.startswith("handle_") or attr_name.startswith("_handle_"):
+                should_instrument = True
+            # Don't instrument excluded methods
+            if attr_name in mcs.EXCLUDED_METHODS:
+                should_instrument = False
+            # Don't instrument special methods
+            if attr_name.startswith("__") and attr_name.endswith("__"):
+                should_instrument = False
+            # Apply auto-instrumentation
+            if should_instrument:
+                namespace[attr_name] = auto_instrument(attr_value)
+                instrumented_count += 1
+                logger.debug(f"Auto-instrumented method: {name}.{attr_name}")
+        if instrumented_count > 0:
+            logger.info(f"Auto-instrumented {instrumented_count} methods in {name}")
+        return super().__new__(mcs, name, bases, namespace)