dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dory/__init__.py +70 -0
  2. dory/auto_instrument.py +142 -0
  3. dory/cli/__init__.py +5 -0
  4. dory/cli/main.py +290 -0
  5. dory/cli/templates.py +333 -0
  6. dory/config/__init__.py +23 -0
  7. dory/config/defaults.py +50 -0
  8. dory/config/loader.py +361 -0
  9. dory/config/presets.py +325 -0
  10. dory/config/schema.py +152 -0
  11. dory/core/__init__.py +27 -0
  12. dory/core/app.py +404 -0
  13. dory/core/context.py +209 -0
  14. dory/core/lifecycle.py +214 -0
  15. dory/core/meta.py +121 -0
  16. dory/core/modes.py +479 -0
  17. dory/core/processor.py +654 -0
  18. dory/core/signals.py +122 -0
  19. dory/decorators.py +142 -0
  20. dory/errors/__init__.py +117 -0
  21. dory/errors/classification.py +362 -0
  22. dory/errors/codes.py +495 -0
  23. dory/health/__init__.py +10 -0
  24. dory/health/probes.py +210 -0
  25. dory/health/server.py +306 -0
  26. dory/k8s/__init__.py +11 -0
  27. dory/k8s/annotation_watcher.py +184 -0
  28. dory/k8s/client.py +251 -0
  29. dory/k8s/pod_metadata.py +182 -0
  30. dory/logging/__init__.py +9 -0
  31. dory/logging/logger.py +175 -0
  32. dory/metrics/__init__.py +7 -0
  33. dory/metrics/collector.py +301 -0
  34. dory/middleware/__init__.py +36 -0
  35. dory/middleware/connection_tracker.py +608 -0
  36. dory/middleware/request_id.py +321 -0
  37. dory/middleware/request_tracker.py +501 -0
  38. dory/migration/__init__.py +11 -0
  39. dory/migration/configmap.py +260 -0
  40. dory/migration/serialization.py +167 -0
  41. dory/migration/state_manager.py +301 -0
  42. dory/monitoring/__init__.py +23 -0
  43. dory/monitoring/opentelemetry.py +462 -0
  44. dory/py.typed +2 -0
  45. dory/recovery/__init__.py +60 -0
  46. dory/recovery/golden_image.py +480 -0
  47. dory/recovery/golden_snapshot.py +561 -0
  48. dory/recovery/golden_validator.py +518 -0
  49. dory/recovery/partial_recovery.py +479 -0
  50. dory/recovery/recovery_decision.py +242 -0
  51. dory/recovery/restart_detector.py +142 -0
  52. dory/recovery/state_validator.py +187 -0
  53. dory/resilience/__init__.py +45 -0
  54. dory/resilience/circuit_breaker.py +454 -0
  55. dory/resilience/retry.py +389 -0
  56. dory/sidecar/__init__.py +6 -0
  57. dory/sidecar/main.py +75 -0
  58. dory/sidecar/server.py +329 -0
  59. dory/simple.py +342 -0
  60. dory/types.py +75 -0
  61. dory/utils/__init__.py +25 -0
  62. dory/utils/errors.py +59 -0
  63. dory/utils/retry.py +115 -0
  64. dory/utils/timeout.py +80 -0
  65. dory_sdk-2.1.0.dist-info/METADATA +663 -0
  66. dory_sdk-2.1.0.dist-info/RECORD +69 -0
  67. dory_sdk-2.1.0.dist-info/WHEEL +5 -0
  68. dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
  69. dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
dory/core/context.py ADDED
@@ -0,0 +1,209 @@
1
+ """
2
+ ExecutionContext - Runtime context passed to processors.
3
+
4
+ Contains pod metadata, events, and utility methods. The context is
5
+ created by DoryApp and passed to the processor constructor.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+
14
+
15
+ @dataclass
16
+ class ExecutionContext:
17
+ """
18
+ Execution context containing pod metadata and utilities.
19
+
20
+ Attributes:
21
+ pod_name: Kubernetes pod name (e.g., "camera-processor-1")
22
+ pod_namespace: Kubernetes namespace (e.g., "default")
23
+ processor_id: Unique processor ID from Dory DB
24
+ attempt_number: Pod restart count (1, 2, 3...)
25
+ is_migrating: True if this is a restart due to migration
26
+ previous_pod_name: Name of pod we're migrating from (if applicable)
27
+ shutdown_requested: Event that fires when SIGTERM received
28
+ migration_imminent: Event that fires 10s before forced shutdown
29
+ """
30
+
31
+ # Pod metadata (read from K8s/env)
32
+ pod_name: str
33
+ pod_namespace: str
34
+ processor_id: str
35
+ attempt_number: int = 1
36
+ is_migrating: bool = False
37
+ previous_pod_name: str | None = None
38
+
39
+ # Async events for coordination
40
+ shutdown_requested: asyncio.Event = field(default_factory=asyncio.Event)
41
+ migration_imminent: asyncio.Event = field(default_factory=asyncio.Event)
42
+
43
+ # Internal config cache
44
+ _config: dict[str, Any] = field(default_factory=dict)
45
+ _logger: logging.Logger | None = field(default=None, repr=False)
46
+
47
+ def is_shutdown_requested(self) -> bool:
48
+ """
49
+ Check if graceful shutdown is in progress.
50
+
51
+ Processors should poll this in their run() loop to exit gracefully.
52
+
53
+ Returns:
54
+ True if SIGTERM received and shutdown initiated
55
+ """
56
+ return self.shutdown_requested.is_set()
57
+
58
+ def is_migration_imminent(self) -> bool:
59
+ """
60
+ Check if migration is about to happen.
61
+
62
+ If True, processor should finish in-flight operations quickly.
63
+
64
+ Returns:
65
+ True if migration scheduled within next 10s
66
+ """
67
+ return self.migration_imminent.is_set()
68
+
69
+ def config(self) -> dict[str, Any]:
70
+ """
71
+ Get application configuration from environment/ConfigMap.
72
+
73
+ Only returns app-specific config (CAMERA_FEED_URL, MODEL_PATH, etc.),
74
+ not SDK internals (DORY_* vars are filtered out).
75
+
76
+ Returns:
77
+ Dict with app configuration
78
+ """
79
+ return self._config
80
+
81
+ def logger(self) -> logging.Logger:
82
+ """
83
+ Get pre-configured logger with pod context.
84
+
85
+ Logger automatically includes pod_name, processor_id, namespace
86
+ in all log messages.
87
+
88
+ Returns:
89
+ Logger configured with pod context
90
+ """
91
+ if self._logger is None:
92
+ self._logger = logging.getLogger(f"dory.processor.{self.processor_id}")
93
+ return self._logger
94
+
95
+ def get_env(self, key: str, default: str | None = None) -> str | None:
96
+ """
97
+ Get environment variable value.
98
+
99
+ Args:
100
+ key: Environment variable name
101
+ default: Default value if not set
102
+
103
+ Returns:
104
+ Environment variable value or default
105
+ """
106
+ return os.environ.get(key, default)
107
+
108
+ @classmethod
109
+ def from_environment(cls) -> "ExecutionContext":
110
+ """
111
+ Create ExecutionContext from environment variables.
112
+
113
+ Reads DORY_* environment variables set by Dory orchestrator.
114
+
115
+ Returns:
116
+ ExecutionContext populated from environment
117
+ """
118
+ # Read Dory system env vars
119
+ pod_name = os.environ.get("DORY_POD_NAME", os.environ.get("POD_NAME", "unknown"))
120
+ pod_namespace = os.environ.get(
121
+ "DORY_POD_NAMESPACE", os.environ.get("POD_NAMESPACE", "default")
122
+ )
123
+
124
+ # Get processor_id from env or derive from pod name
125
+ processor_id = os.environ.get("DORY_PROCESSOR_ID", os.environ.get("PROCESSOR_ID"))
126
+ if not processor_id:
127
+ # Derive from pod name (e.g., "myapp-7f8d9c6b-x4h2j" -> "myapp")
128
+ processor_id = cls._derive_processor_id_from_pod_name(pod_name)
129
+
130
+ is_migrating = os.environ.get("DORY_IS_MIGRATING", "false").lower() == "true"
131
+ previous_pod = os.environ.get("DORY_MIGRATED_FROM")
132
+
133
+ # Parse restart count (will be updated from K8s later)
134
+ attempt_number = 1
135
+
136
+ # Load app config (non-DORY_ env vars)
137
+ app_config = {}
138
+ for key, value in os.environ.items():
139
+ if not key.startswith("DORY_") and not key.startswith("KUBERNETES_"):
140
+ app_config[key] = value
141
+
142
+ return cls(
143
+ pod_name=pod_name,
144
+ pod_namespace=pod_namespace,
145
+ processor_id=processor_id,
146
+ attempt_number=attempt_number,
147
+ is_migrating=is_migrating,
148
+ previous_pod_name=previous_pod,
149
+ _config=app_config,
150
+ )
151
+
152
+ @staticmethod
153
+ def _derive_processor_id_from_pod_name(pod_name: str) -> str:
154
+ """
155
+ Derive processor ID from Kubernetes pod name.
156
+
157
+ Pod names typically follow the pattern:
158
+ - Deployment: <deployment>-<replicaset-hash>-<pod-hash> (e.g., "myapp-7f8d9c6b-x4h2j")
159
+ - StatefulSet: <statefulset>-<ordinal> (e.g., "myapp-0")
160
+
161
+ Args:
162
+ pod_name: Kubernetes pod name
163
+
164
+ Returns:
165
+ Derived processor ID or "unknown" if cannot be derived
166
+ """
167
+ if not pod_name or pod_name == "unknown":
168
+ return "unknown"
169
+
170
+ parts = pod_name.split("-")
171
+ if len(parts) >= 3:
172
+ # Deployment format: name-replicaset-pod
173
+ # Try to find where the hash parts start (typically 8+ chars of alphanumeric)
174
+ for i in range(len(parts) - 1, 0, -1):
175
+ part = parts[i]
176
+ # If this looks like a hash (short alphanumeric), skip it
177
+ if len(part) <= 10 and part.isalnum():
178
+ continue
179
+ # Otherwise, include up to this point
180
+ return "-".join(parts[: i + 1])
181
+ # If all parts look like hashes, take the first part
182
+ return parts[0]
183
+ elif len(parts) == 2:
184
+ # StatefulSet format: name-ordinal or simple name-hash
185
+ if parts[1].isdigit():
186
+ return parts[0] # StatefulSet
187
+ return parts[0] # Simple deployment
188
+ else:
189
+ return pod_name
190
+
191
+ def request_shutdown(self) -> None:
192
+ """Signal that shutdown has been requested."""
193
+ self.shutdown_requested.set()
194
+
195
+ def signal_migration(self) -> None:
196
+ """Signal that migration will happen soon."""
197
+ self.migration_imminent.set()
198
+
199
+ def signal_migration_imminent(self) -> None:
200
+ """Signal that migration will happen soon (alias for signal_migration)."""
201
+ self.migration_imminent.set()
202
+
203
+ def update_config(self, config: dict[str, Any]) -> None:
204
+ """Update app configuration (internal use)."""
205
+ self._config.update(config)
206
+
207
+ def set_attempt_number(self, attempt: int) -> None:
208
+ """Set restart attempt number (internal use)."""
209
+ self.attempt_number = attempt
dory/core/lifecycle.py ADDED
@@ -0,0 +1,214 @@
1
+ """
2
+ LifecycleManager - Manages processor lifecycle state machine.
3
+
4
+ Handles transitions between lifecycle states and enforces valid
5
+ state transitions.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ from typing import TYPE_CHECKING
11
+
12
+ from dory.types import LifecycleState
13
+ from dory.utils.errors import DoryStartupError, DoryShutdownError
14
+
15
+ if TYPE_CHECKING:
16
+ from dory.core.processor import BaseProcessor
17
+ from dory.core.context import ExecutionContext
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LifecycleManager:
23
+ """
24
+ Manages the processor lifecycle state machine.
25
+
26
+ States:
27
+ CREATED -> STARTING -> RUNNING -> SHUTTING_DOWN -> STOPPED
28
+ |
29
+ v
30
+ FAILED (from any state on error)
31
+ """
32
+
33
+ # Valid state transitions
34
+ VALID_TRANSITIONS: dict[LifecycleState, set[LifecycleState]] = {
35
+ LifecycleState.CREATED: {LifecycleState.STARTING, LifecycleState.FAILED},
36
+ LifecycleState.STARTING: {LifecycleState.RUNNING, LifecycleState.FAILED},
37
+ LifecycleState.RUNNING: {LifecycleState.SHUTTING_DOWN, LifecycleState.FAILED},
38
+ LifecycleState.SHUTTING_DOWN: {LifecycleState.STOPPED, LifecycleState.FAILED},
39
+ LifecycleState.STOPPED: set(), # Terminal state
40
+ LifecycleState.FAILED: set(), # Terminal state
41
+ }
42
+
43
+ def __init__(self):
44
+ self._state = LifecycleState.CREATED
45
+ self._state_lock = asyncio.Lock()
46
+ self._state_changed = asyncio.Event()
47
+
48
+ @property
49
+ def state(self) -> LifecycleState:
50
+ """Current lifecycle state."""
51
+ return self._state
52
+
53
+ def is_running(self) -> bool:
54
+ """Check if processor is in running state."""
55
+ return self._state == LifecycleState.RUNNING
56
+
57
+ def is_stopped(self) -> bool:
58
+ """Check if processor has stopped (gracefully or failed)."""
59
+ return self._state in (LifecycleState.STOPPED, LifecycleState.FAILED)
60
+
61
+ def is_shutting_down(self) -> bool:
62
+ """Check if shutdown is in progress."""
63
+ return self._state == LifecycleState.SHUTTING_DOWN
64
+
65
+ async def transition_to(self, new_state: LifecycleState) -> None:
66
+ """
67
+ Transition to a new lifecycle state.
68
+
69
+ Args:
70
+ new_state: Target state
71
+
72
+ Raises:
73
+ ValueError: If transition is not valid
74
+ """
75
+ async with self._state_lock:
76
+ if new_state not in self.VALID_TRANSITIONS.get(self._state, set()):
77
+ raise ValueError(
78
+ f"Invalid state transition: {self._state.name} -> {new_state.name}"
79
+ )
80
+
81
+ old_state = self._state
82
+ self._state = new_state
83
+ self._state_changed.set()
84
+ self._state_changed.clear()
85
+
86
+ logger.debug(f"Lifecycle transition: {old_state.name} -> {new_state.name}")
87
+
88
+ async def wait_for_state(
89
+ self,
90
+ target_states: set[LifecycleState],
91
+ timeout: float | None = None,
92
+ ) -> LifecycleState:
93
+ """
94
+ Wait for lifecycle to reach one of the target states.
95
+
96
+ Args:
97
+ target_states: Set of states to wait for
98
+ timeout: Maximum time to wait (None = forever)
99
+
100
+ Returns:
101
+ The state that was reached
102
+
103
+ Raises:
104
+ asyncio.TimeoutError: If timeout exceeded
105
+ """
106
+ while self._state not in target_states:
107
+ try:
108
+ await asyncio.wait_for(
109
+ self._state_changed.wait(),
110
+ timeout=timeout,
111
+ )
112
+ except asyncio.TimeoutError:
113
+ raise
114
+
115
+ return self._state
116
+
117
+ async def run_startup(
118
+ self,
119
+ processor: "BaseProcessor",
120
+ timeout: float = 60.0,
121
+ ) -> None:
122
+ """
123
+ Run processor startup with timeout.
124
+
125
+ Args:
126
+ processor: Processor instance to start
127
+ timeout: Maximum time for startup (seconds)
128
+
129
+ Raises:
130
+ DoryStartupError: If startup fails or times out
131
+ """
132
+ await self.transition_to(LifecycleState.STARTING)
133
+
134
+ try:
135
+ await asyncio.wait_for(
136
+ processor.startup(),
137
+ timeout=timeout,
138
+ )
139
+ await self.transition_to(LifecycleState.RUNNING)
140
+ logger.info("Processor startup completed")
141
+
142
+ except asyncio.TimeoutError:
143
+ await self.transition_to(LifecycleState.FAILED)
144
+ raise DoryStartupError(f"Startup timed out after {timeout}s")
145
+
146
+ except Exception as e:
147
+ await self.transition_to(LifecycleState.FAILED)
148
+ raise DoryStartupError(f"Startup failed: {e}", cause=e)
149
+
150
+ async def run_shutdown(
151
+ self,
152
+ processor: "BaseProcessor",
153
+ timeout: float = 30.0,
154
+ ) -> None:
155
+ """
156
+ Run processor shutdown with timeout.
157
+
158
+ Args:
159
+ processor: Processor instance to shutdown
160
+ timeout: Maximum time for shutdown (seconds)
161
+
162
+ Raises:
163
+ DoryShutdownError: If shutdown times out
164
+ """
165
+ if self._state in (LifecycleState.STOPPED, LifecycleState.FAILED):
166
+ return # Already stopped
167
+
168
+ await self.transition_to(LifecycleState.SHUTTING_DOWN)
169
+
170
+ try:
171
+ await asyncio.wait_for(
172
+ processor.shutdown(),
173
+ timeout=timeout,
174
+ )
175
+ await self.transition_to(LifecycleState.STOPPED)
176
+ logger.info("Processor shutdown completed")
177
+
178
+ except asyncio.TimeoutError:
179
+ logger.error(f"Shutdown timed out after {timeout}s, forcing exit")
180
+ await self.transition_to(LifecycleState.FAILED)
181
+ raise DoryShutdownError(f"Shutdown timed out after {timeout}s")
182
+
183
+ except Exception as e:
184
+ # Log but continue - shutdown should complete
185
+ logger.error(f"Error during shutdown: {e}")
186
+ await self.transition_to(LifecycleState.STOPPED)
187
+
188
+ async def run_main_loop(
189
+ self,
190
+ processor: "BaseProcessor",
191
+ context: "ExecutionContext",
192
+ ) -> None:
193
+ """
194
+ Run processor main loop until shutdown requested.
195
+
196
+ Args:
197
+ processor: Processor instance to run
198
+ context: Execution context
199
+ """
200
+ if self._state != LifecycleState.RUNNING:
201
+ raise ValueError(f"Cannot run: state is {self._state.name}, expected RUNNING")
202
+
203
+ try:
204
+ await processor.run()
205
+ logger.info("Processor run() completed")
206
+
207
+ except asyncio.CancelledError:
208
+ logger.info("Processor run() cancelled")
209
+ raise
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error in processor run(): {e}")
213
+ await self.transition_to(LifecycleState.FAILED)
214
+ raise
dory/core/meta.py ADDED
@@ -0,0 +1,121 @@
1
+ """
2
+ Metaclass for automatic handler instrumentation.
3
+
4
+ Automatically applies @auto_instrument to all async methods
5
+ starting with "handle_" or "_handle_".
6
+
7
+ No manual decorators needed!
8
+ """
9
+
10
+ import inspect
11
+ import logging
12
+ from abc import ABCMeta
13
+ from typing import Any
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class AutoInstrumentMeta(ABCMeta):
19
+ """
20
+ Metaclass that automatically applies @auto_instrument to handler methods.
21
+
22
+ This eliminates the need for developers to add decorators manually.
23
+
24
+ Usage:
25
+ class MyProcessor(BaseProcessor, metaclass=AutoInstrumentMeta):
26
+ async def handle_request(self, request):
27
+ # Automatically instrumented!
28
+ # - Request ID generated
29
+ # - Request tracked
30
+ # - Span created
31
+ # - Errors classified
32
+ return {"status": "ok"}
33
+
34
+ async def handle_webhook(self, webhook):
35
+ # Also automatically instrumented!
36
+ return {"received": True}
37
+
38
+ async def internal_method(self):
39
+ # NOT instrumented (doesn't start with handle_)
40
+ pass
41
+
42
+ Auto-instrumented methods:
43
+ - async def handle_*(...): Public handlers
44
+ - async def _handle_*(...): Private handlers
45
+
46
+ Not instrumented:
47
+ - Other methods (don't start with handle_)
48
+ - Sync methods
49
+ - Lifecycle methods (startup, shutdown, run)
50
+ """
51
+
52
+ # List of methods that should NOT be auto-instrumented
53
+ EXCLUDED_METHODS = {
54
+ "startup",
55
+ "shutdown",
56
+ "run",
57
+ "get_state",
58
+ "restore_state",
59
+ "on_state_restore_failed",
60
+ "on_rapid_restart_detected",
61
+ "on_health_check_failed",
62
+ "reset_caches",
63
+ "run_loop",
64
+ "is_shutting_down",
65
+ }
66
+
67
+ def __new__(mcs, name, bases, namespace):
68
+ """
69
+ Create new class with auto-instrumented handler methods.
70
+
71
+ Args:
72
+ name: Class name
73
+ bases: Base classes
74
+ namespace: Class namespace (attributes and methods)
75
+
76
+ Returns:
77
+ New class with auto-instrumented handlers
78
+ """
79
+ # Import here to avoid circular dependency
80
+ try:
81
+ from dory.auto_instrument import auto_instrument
82
+ except ImportError:
83
+ logger.warning(
84
+ "auto_instrument decorator not available, skipping auto-instrumentation"
85
+ )
86
+ return super().__new__(mcs, name, bases, namespace)
87
+
88
+ # Count of instrumented methods
89
+ instrumented_count = 0
90
+
91
+ # Auto-instrument handler methods
92
+ for attr_name, attr_value in list(namespace.items()):
93
+ # Check if this is an async method
94
+ if not inspect.iscoroutinefunction(attr_value):
95
+ continue
96
+
97
+ # Check if method should be instrumented
98
+ should_instrument = False
99
+
100
+ # Instrument methods starting with handle_ or _handle_
101
+ if attr_name.startswith("handle_") or attr_name.startswith("_handle_"):
102
+ should_instrument = True
103
+
104
+ # Don't instrument excluded methods
105
+ if attr_name in mcs.EXCLUDED_METHODS:
106
+ should_instrument = False
107
+
108
+ # Don't instrument special methods
109
+ if attr_name.startswith("__") and attr_name.endswith("__"):
110
+ should_instrument = False
111
+
112
+ # Apply auto-instrumentation
113
+ if should_instrument:
114
+ namespace[attr_name] = auto_instrument(attr_value)
115
+ instrumented_count += 1
116
+ logger.debug(f"Auto-instrumented method: {name}.{attr_name}")
117
+
118
+ if instrumented_count > 0:
119
+ logger.info(f"Auto-instrumented {instrumented_count} methods in {name}")
120
+
121
+ return super().__new__(mcs, name, bases, namespace)