dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dory/__init__.py +70 -0
  2. dory/auto_instrument.py +142 -0
  3. dory/cli/__init__.py +5 -0
  4. dory/cli/main.py +290 -0
  5. dory/cli/templates.py +333 -0
  6. dory/config/__init__.py +23 -0
  7. dory/config/defaults.py +50 -0
  8. dory/config/loader.py +361 -0
  9. dory/config/presets.py +325 -0
  10. dory/config/schema.py +152 -0
  11. dory/core/__init__.py +27 -0
  12. dory/core/app.py +404 -0
  13. dory/core/context.py +209 -0
  14. dory/core/lifecycle.py +214 -0
  15. dory/core/meta.py +121 -0
  16. dory/core/modes.py +479 -0
  17. dory/core/processor.py +654 -0
  18. dory/core/signals.py +122 -0
  19. dory/decorators.py +142 -0
  20. dory/errors/__init__.py +117 -0
  21. dory/errors/classification.py +362 -0
  22. dory/errors/codes.py +495 -0
  23. dory/health/__init__.py +10 -0
  24. dory/health/probes.py +210 -0
  25. dory/health/server.py +306 -0
  26. dory/k8s/__init__.py +11 -0
  27. dory/k8s/annotation_watcher.py +184 -0
  28. dory/k8s/client.py +251 -0
  29. dory/k8s/pod_metadata.py +182 -0
  30. dory/logging/__init__.py +9 -0
  31. dory/logging/logger.py +175 -0
  32. dory/metrics/__init__.py +7 -0
  33. dory/metrics/collector.py +301 -0
  34. dory/middleware/__init__.py +36 -0
  35. dory/middleware/connection_tracker.py +608 -0
  36. dory/middleware/request_id.py +321 -0
  37. dory/middleware/request_tracker.py +501 -0
  38. dory/migration/__init__.py +11 -0
  39. dory/migration/configmap.py +260 -0
  40. dory/migration/serialization.py +167 -0
  41. dory/migration/state_manager.py +301 -0
  42. dory/monitoring/__init__.py +23 -0
  43. dory/monitoring/opentelemetry.py +462 -0
  44. dory/py.typed +2 -0
  45. dory/recovery/__init__.py +60 -0
  46. dory/recovery/golden_image.py +480 -0
  47. dory/recovery/golden_snapshot.py +561 -0
  48. dory/recovery/golden_validator.py +518 -0
  49. dory/recovery/partial_recovery.py +479 -0
  50. dory/recovery/recovery_decision.py +242 -0
  51. dory/recovery/restart_detector.py +142 -0
  52. dory/recovery/state_validator.py +187 -0
  53. dory/resilience/__init__.py +45 -0
  54. dory/resilience/circuit_breaker.py +454 -0
  55. dory/resilience/retry.py +389 -0
  56. dory/sidecar/__init__.py +6 -0
  57. dory/sidecar/main.py +75 -0
  58. dory/sidecar/server.py +329 -0
  59. dory/simple.py +342 -0
  60. dory/types.py +75 -0
  61. dory/utils/__init__.py +25 -0
  62. dory/utils/errors.py +59 -0
  63. dory/utils/retry.py +115 -0
  64. dory/utils/timeout.py +80 -0
  65. dory_sdk-2.1.0.dist-info/METADATA +663 -0
  66. dory_sdk-2.1.0.dist-info/RECORD +69 -0
  67. dory_sdk-2.1.0.dist-info/WHEEL +5 -0
  68. dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
  69. dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,389 @@
1
+ """
2
+ Retry with exponential backoff implementation.
3
+
4
+ Provides automatic retry logic with:
5
+ - Exponential backoff with jitter
6
+ - Retry budgets to prevent retry storms
7
+ - Per-exception-type retry policies
8
+ - Comprehensive metrics
9
+
10
+ Usage:
11
+ @retry_with_backoff(max_attempts=3)
12
+ async def call_api():
13
+ return await api.get()
14
+ """
15
+
16
+ import asyncio
17
+ import logging
18
+ import random
19
+ import time
20
+ from dataclasses import dataclass, field
21
+ from typing import Any, Callable, Optional, Tuple, Type, Union
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class RetryExhaustedError(Exception):
27
+ """Raised when retry attempts are exhausted."""
28
+
29
+ def __init__(self, attempts: int, last_error: Exception):
30
+ self.attempts = attempts
31
+ self.last_error = last_error
32
+ super().__init__(
33
+ f"Retry exhausted after {attempts} attempts. Last error: {last_error}"
34
+ )
35
+
36
+
37
+ @dataclass
38
+ class RetryPolicy:
39
+ """
40
+ Configuration for retry behavior.
41
+
42
+ Attributes:
43
+ max_attempts: Maximum number of retry attempts (default: 3)
44
+ initial_delay: Initial delay in seconds (default: 1.0)
45
+ max_delay: Maximum delay in seconds (default: 30.0)
46
+ multiplier: Exponential backoff multiplier (default: 2.0)
47
+ jitter: Add random jitter to prevent thundering herd (default: True)
48
+ retryable_exceptions: Tuple of exceptions to retry (default: Exception)
49
+ non_retryable_exceptions: Exceptions that should never be retried
50
+ on_retry: Optional callback called on each retry attempt
51
+ """
52
+
53
+ max_attempts: int = 3
54
+ initial_delay: float = 1.0
55
+ max_delay: float = 30.0
56
+ multiplier: float = 2.0
57
+ jitter: bool = True
58
+ retryable_exceptions: Tuple[Type[Exception], ...] = (Exception,)
59
+ non_retryable_exceptions: Tuple[Type[Exception], ...] = ()
60
+ on_retry: Optional[Callable[[int, Exception], None]] = None
61
+
62
+ def calculate_delay(self, attempt: int) -> float:
63
+ """
64
+ Calculate delay for given attempt number.
65
+
66
+ Uses exponential backoff: delay = initial_delay * (multiplier ** attempt)
67
+ Caps at max_delay and adds jitter if enabled.
68
+
69
+ Args:
70
+ attempt: Current attempt number (0-indexed)
71
+
72
+ Returns:
73
+ Delay in seconds
74
+ """
75
+ delay = min(self.initial_delay * (self.multiplier**attempt), self.max_delay)
76
+
77
+ if self.jitter:
78
+ # Add +/- 25% jitter
79
+ jitter_amount = delay * 0.25
80
+ delay = delay + random.uniform(-jitter_amount, jitter_amount)
81
+
82
+ return max(0, delay) # Ensure non-negative
83
+
84
+ def is_retryable(self, error: Exception) -> bool:
85
+ """
86
+ Determine if an error should be retried.
87
+
88
+ Args:
89
+ error: The exception that occurred
90
+
91
+ Returns:
92
+ True if error should be retried, False otherwise
93
+ """
94
+ # Non-retryable takes precedence
95
+ if isinstance(error, self.non_retryable_exceptions):
96
+ return False
97
+
98
+ return isinstance(error, self.retryable_exceptions)
99
+
100
+
101
+ @dataclass
102
+ class RetryBudget:
103
+ """
104
+ Retry budget to prevent retry storms.
105
+
106
+ Limits the percentage of requests that can be retried within a time window.
107
+ This prevents cascading failures where retries overwhelm the system.
108
+
109
+ Attributes:
110
+ budget_percent: Percentage of requests allowed to retry (0-100)
111
+ window_seconds: Time window in seconds (default: 60)
112
+ _requests: Total requests in current window
113
+ _retries: Retry attempts in current window
114
+ _window_start: Start time of current window
115
+ """
116
+
117
+ budget_percent: float = 20.0 # Allow 20% of requests to retry
118
+ window_seconds: float = 60.0
119
+ _requests: int = field(default=0, init=False)
120
+ _retries: int = field(default=0, init=False)
121
+ _window_start: float = field(default_factory=time.time, init=False)
122
+
123
+ def can_retry(self) -> bool:
124
+ """
125
+ Check if retry is allowed within budget.
126
+
127
+ Returns:
128
+ True if retry is within budget, False otherwise
129
+ """
130
+ self._reset_window_if_needed()
131
+
132
+ if self._requests == 0:
133
+ return True
134
+
135
+ retry_ratio = (self._retries / self._requests) * 100
136
+ return retry_ratio <= self.budget_percent
137
+
138
+ def record_request(self):
139
+ """Record a new request."""
140
+ self._reset_window_if_needed()
141
+ self._requests += 1
142
+
143
+ def record_retry(self):
144
+ """Record a retry attempt."""
145
+ self._reset_window_if_needed()
146
+ self._retries += 1
147
+
148
+ def _reset_window_if_needed(self):
149
+ """Reset counters if window has expired."""
150
+ now = time.time()
151
+ if now - self._window_start >= self.window_seconds:
152
+ self._requests = 0
153
+ self._retries = 0
154
+ self._window_start = now
155
+
156
+ def get_stats(self) -> dict:
157
+ """Get current budget statistics."""
158
+ self._reset_window_if_needed()
159
+ return {
160
+ "requests": self._requests,
161
+ "retries": self._retries,
162
+ "retry_ratio": (
163
+ (self._retries / self._requests * 100) if self._requests > 0 else 0.0
164
+ ),
165
+ "budget_remaining": (
166
+ self.budget_percent
167
+ - ((self._retries / self._requests * 100) if self._requests > 0 else 0)
168
+ ),
169
+ }
170
+
171
+
172
+ class RetryContext:
173
+ """
174
+ Context for retry execution.
175
+
176
+ Tracks metrics and state across retry attempts.
177
+ """
178
+
179
+ def __init__(self, function_name: str, policy: RetryPolicy):
180
+ self.function_name = function_name
181
+ self.policy = policy
182
+ self.attempt = 0
183
+ self.start_time = time.time()
184
+ self.errors: list = []
185
+
186
+ def record_attempt(self, error: Optional[Exception] = None):
187
+ """Record a retry attempt."""
188
+ self.attempt += 1
189
+ if error:
190
+ self.errors.append(error)
191
+
192
+ if self.policy.on_retry and error:
193
+ try:
194
+ self.policy.on_retry(self.attempt, error)
195
+ except Exception as e:
196
+ logger.warning(f"Error in on_retry callback: {e}")
197
+
198
+ def get_metrics(self) -> dict:
199
+ """Get retry metrics."""
200
+ return {
201
+ "function": self.function_name,
202
+ "attempts": self.attempt,
203
+ "duration_seconds": time.time() - self.start_time,
204
+ "errors": [type(e).__name__ for e in self.errors],
205
+ }
206
+
207
+
208
+ def retry_with_backoff(
209
+ max_attempts: int = 3,
210
+ initial_delay: float = 1.0,
211
+ max_delay: float = 30.0,
212
+ multiplier: float = 2.0,
213
+ jitter: bool = True,
214
+ retryable_exceptions: Tuple[Type[Exception], ...] = (Exception,),
215
+ non_retryable_exceptions: Tuple[Type[Exception], ...] = (),
216
+ budget: Optional[RetryBudget] = None,
217
+ on_retry: Optional[Callable[[int, Exception], None]] = None,
218
+ ):
219
+ """
220
+ Decorator for automatic retry with exponential backoff.
221
+
222
+ Example:
223
+ @retry_with_backoff(max_attempts=3, initial_delay=1.0)
224
+ async def call_api():
225
+ return await api.get()
226
+
227
+ # With custom exceptions
228
+ @retry_with_backoff(
229
+ max_attempts=5,
230
+ retryable_exceptions=(ConnectionError, TimeoutError),
231
+ non_retryable_exceptions=(ValueError,)
232
+ )
233
+ async def fetch_data():
234
+ return await db.query()
235
+
236
+ Args:
237
+ max_attempts: Maximum retry attempts
238
+ initial_delay: Initial delay in seconds
239
+ max_delay: Maximum delay in seconds
240
+ multiplier: Exponential backoff multiplier
241
+ jitter: Add random jitter
242
+ retryable_exceptions: Exceptions to retry
243
+ non_retryable_exceptions: Exceptions to never retry
244
+ budget: Optional RetryBudget to prevent retry storms
245
+ on_retry: Optional callback called on each retry
246
+
247
+ Returns:
248
+ Decorated function with retry logic
249
+ """
250
+
251
+ policy = RetryPolicy(
252
+ max_attempts=max_attempts,
253
+ initial_delay=initial_delay,
254
+ max_delay=max_delay,
255
+ multiplier=multiplier,
256
+ jitter=jitter,
257
+ retryable_exceptions=retryable_exceptions,
258
+ non_retryable_exceptions=non_retryable_exceptions,
259
+ on_retry=on_retry,
260
+ )
261
+
262
+ def decorator(func):
263
+ async def async_wrapper(*args, **kwargs):
264
+ context = RetryContext(func.__name__, policy)
265
+
266
+ if budget:
267
+ budget.record_request()
268
+
269
+ last_error = None
270
+
271
+ for attempt in range(max_attempts):
272
+ try:
273
+ result = await func(*args, **kwargs)
274
+ if attempt > 0:
275
+ logger.info(
276
+ f"Retry succeeded for {func.__name__} "
277
+ f"on attempt {attempt + 1}/{max_attempts}"
278
+ )
279
+ return result
280
+
281
+ except Exception as e:
282
+ last_error = e
283
+ context.record_attempt(e)
284
+
285
+ # Check if retryable
286
+ if not policy.is_retryable(e):
287
+ logger.warning(
288
+ f"Non-retryable error in {func.__name__}: {type(e).__name__}"
289
+ )
290
+ raise
291
+
292
+ # Check if we have attempts left
293
+ if attempt >= max_attempts - 1:
294
+ logger.error(
295
+ f"Retry exhausted for {func.__name__} "
296
+ f"after {max_attempts} attempts. "
297
+ f"Metrics: {context.get_metrics()}"
298
+ )
299
+ raise RetryExhaustedError(max_attempts, e)
300
+
301
+ # Check retry budget
302
+ if budget and not budget.can_retry():
303
+ logger.warning(
304
+ f"Retry budget exhausted for {func.__name__}. "
305
+ f"Budget stats: {budget.get_stats()}"
306
+ )
307
+ raise
308
+
309
+ if budget:
310
+ budget.record_retry()
311
+
312
+ # Calculate delay and retry
313
+ delay = policy.calculate_delay(attempt)
314
+ logger.warning(
315
+ f"Retrying {func.__name__} after {delay:.2f}s "
316
+ f"(attempt {attempt + 1}/{max_attempts}). "
317
+ f"Error: {type(e).__name__}: {e}"
318
+ )
319
+ await asyncio.sleep(delay)
320
+
321
+ # Should never reach here, but just in case
322
+ raise RetryExhaustedError(max_attempts, last_error)
323
+
324
+ def sync_wrapper(*args, **kwargs):
325
+ """Synchronous wrapper for non-async functions."""
326
+ context = RetryContext(func.__name__, policy)
327
+
328
+ if budget:
329
+ budget.record_request()
330
+
331
+ last_error = None
332
+
333
+ for attempt in range(max_attempts):
334
+ try:
335
+ result = func(*args, **kwargs)
336
+ if attempt > 0:
337
+ logger.info(
338
+ f"Retry succeeded for {func.__name__} "
339
+ f"on attempt {attempt + 1}/{max_attempts}"
340
+ )
341
+ return result
342
+
343
+ except Exception as e:
344
+ last_error = e
345
+ context.record_attempt(e)
346
+
347
+ if not policy.is_retryable(e):
348
+ logger.warning(
349
+ f"Non-retryable error in {func.__name__}: {type(e).__name__}"
350
+ )
351
+ raise
352
+
353
+ if attempt >= max_attempts - 1:
354
+ logger.error(
355
+ f"Retry exhausted for {func.__name__} "
356
+ f"after {max_attempts} attempts"
357
+ )
358
+ raise RetryExhaustedError(max_attempts, e)
359
+
360
+ if budget and not budget.can_retry():
361
+ logger.warning(
362
+ f"Retry budget exhausted for {func.__name__}"
363
+ )
364
+ raise
365
+
366
+ if budget:
367
+ budget.record_retry()
368
+
369
+ delay = policy.calculate_delay(attempt)
370
+ logger.warning(
371
+ f"Retrying {func.__name__} after {delay:.2f}s "
372
+ f"(attempt {attempt + 1}/{max_attempts}). "
373
+ f"Error: {type(e).__name__}: {e}"
374
+ )
375
+ time.sleep(delay)
376
+
377
+ raise RetryExhaustedError(max_attempts, last_error)
378
+
379
+ # Return appropriate wrapper based on function type
380
+ if asyncio.iscoroutinefunction(func):
381
+ async_wrapper.__name__ = func.__name__
382
+ async_wrapper.__doc__ = func.__doc__
383
+ return async_wrapper
384
+ else:
385
+ sync_wrapper.__name__ = func.__name__
386
+ sync_wrapper.__doc__ = func.__doc__
387
+ return sync_wrapper
388
+
389
+ return decorator
@@ -0,0 +1,6 @@
1
+ """Dory Sidecar - Lightweight health proxy for non-SDK apps."""
2
+
3
+ from dory.sidecar.server import SidecarServer
4
+ from dory.sidecar.main import run_sidecar
5
+
6
+ __all__ = ["SidecarServer", "run_sidecar"]
dory/sidecar/main.py ADDED
@@ -0,0 +1,75 @@
1
+ """Entry point for the Dory sidecar."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ import signal
7
+ import sys
8
+
9
+ from dory.sidecar.server import SidecarServer, SidecarConfig
10
+
11
+
12
+ def setup_logging() -> None:
13
+ """Configure logging for the sidecar."""
14
+ log_level = os.getenv("DORY_LOG_LEVEL", "INFO").upper()
15
+ log_format = os.getenv("DORY_LOG_FORMAT", "text")
16
+
17
+ if log_format == "json":
18
+ import json
19
+
20
+ class JsonFormatter(logging.Formatter):
21
+ def format(self, record):
22
+ return json.dumps({
23
+ "timestamp": self.formatTime(record),
24
+ "level": record.levelname,
25
+ "logger": record.name,
26
+ "message": record.getMessage(),
27
+ })
28
+
29
+ formatter = JsonFormatter()
30
+ else:
31
+ formatter = logging.Formatter(
32
+ "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
33
+ )
34
+
35
+ handler = logging.StreamHandler(sys.stdout)
36
+ handler.setFormatter(formatter)
37
+
38
+ root_logger = logging.getLogger()
39
+ root_logger.addHandler(handler)
40
+ root_logger.setLevel(getattr(logging, log_level, logging.INFO))
41
+
42
+
43
+ def run_sidecar() -> None:
44
+ """Run the sidecar server (CLI entry point)."""
45
+ setup_logging()
46
+ logger = logging.getLogger("dory.sidecar")
47
+
48
+ config = SidecarConfig.from_env()
49
+ server = SidecarServer(config)
50
+
51
+ loop = asyncio.new_event_loop()
52
+ asyncio.set_event_loop(loop)
53
+
54
+ # Handle shutdown signals
55
+ def handle_signal(sig):
56
+ logger.info(f"Received signal {sig}, shutting down...")
57
+ for task in asyncio.all_tasks(loop):
58
+ task.cancel()
59
+
60
+ for sig in (signal.SIGTERM, signal.SIGINT):
61
+ loop.add_signal_handler(sig, lambda s=sig: handle_signal(s))
62
+
63
+ try:
64
+ loop.run_until_complete(server.run_forever())
65
+ except asyncio.CancelledError:
66
+ pass
67
+ finally:
68
+ loop.run_until_complete(server.stop())
69
+ loop.close()
70
+
71
+ logger.info("Sidecar shutdown complete")
72
+
73
+
74
+ if __name__ == "__main__":
75
+ run_sidecar()