dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dory/__init__.py +70 -0
  2. dory/auto_instrument.py +142 -0
  3. dory/cli/__init__.py +5 -0
  4. dory/cli/main.py +290 -0
  5. dory/cli/templates.py +333 -0
  6. dory/config/__init__.py +23 -0
  7. dory/config/defaults.py +50 -0
  8. dory/config/loader.py +361 -0
  9. dory/config/presets.py +325 -0
  10. dory/config/schema.py +152 -0
  11. dory/core/__init__.py +27 -0
  12. dory/core/app.py +404 -0
  13. dory/core/context.py +209 -0
  14. dory/core/lifecycle.py +214 -0
  15. dory/core/meta.py +121 -0
  16. dory/core/modes.py +479 -0
  17. dory/core/processor.py +654 -0
  18. dory/core/signals.py +122 -0
  19. dory/decorators.py +142 -0
  20. dory/errors/__init__.py +117 -0
  21. dory/errors/classification.py +362 -0
  22. dory/errors/codes.py +495 -0
  23. dory/health/__init__.py +10 -0
  24. dory/health/probes.py +210 -0
  25. dory/health/server.py +306 -0
  26. dory/k8s/__init__.py +11 -0
  27. dory/k8s/annotation_watcher.py +184 -0
  28. dory/k8s/client.py +251 -0
  29. dory/k8s/pod_metadata.py +182 -0
  30. dory/logging/__init__.py +9 -0
  31. dory/logging/logger.py +175 -0
  32. dory/metrics/__init__.py +7 -0
  33. dory/metrics/collector.py +301 -0
  34. dory/middleware/__init__.py +36 -0
  35. dory/middleware/connection_tracker.py +608 -0
  36. dory/middleware/request_id.py +321 -0
  37. dory/middleware/request_tracker.py +501 -0
  38. dory/migration/__init__.py +11 -0
  39. dory/migration/configmap.py +260 -0
  40. dory/migration/serialization.py +167 -0
  41. dory/migration/state_manager.py +301 -0
  42. dory/monitoring/__init__.py +23 -0
  43. dory/monitoring/opentelemetry.py +462 -0
  44. dory/py.typed +2 -0
  45. dory/recovery/__init__.py +60 -0
  46. dory/recovery/golden_image.py +480 -0
  47. dory/recovery/golden_snapshot.py +561 -0
  48. dory/recovery/golden_validator.py +518 -0
  49. dory/recovery/partial_recovery.py +479 -0
  50. dory/recovery/recovery_decision.py +242 -0
  51. dory/recovery/restart_detector.py +142 -0
  52. dory/recovery/state_validator.py +187 -0
  53. dory/resilience/__init__.py +45 -0
  54. dory/resilience/circuit_breaker.py +454 -0
  55. dory/resilience/retry.py +389 -0
  56. dory/sidecar/__init__.py +6 -0
  57. dory/sidecar/main.py +75 -0
  58. dory/sidecar/server.py +329 -0
  59. dory/simple.py +342 -0
  60. dory/types.py +75 -0
  61. dory/utils/__init__.py +25 -0
  62. dory/utils/errors.py +59 -0
  63. dory/utils/retry.py +115 -0
  64. dory/utils/timeout.py +80 -0
  65. dory_sdk-2.1.0.dist-info/METADATA +663 -0
  66. dory_sdk-2.1.0.dist-info/RECORD +69 -0
  67. dory_sdk-2.1.0.dist-info/WHEEL +5 -0
  68. dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
  69. dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,454 @@
1
+ """
2
+ Circuit breaker pattern implementation.
3
+
4
+ Prevents cascading failures by stopping requests to failing dependencies.
5
+
6
+ States:
7
+ - CLOSED: Normal operation, requests go through
8
+ - OPEN: Too many failures, requests fail fast
9
+ - HALF_OPEN: Testing if dependency recovered
10
+
11
+ Usage:
12
+ breaker = CircuitBreaker(name="database", failure_threshold=5)
13
+
14
+ try:
15
+ result = await breaker.call(db.query, args)
16
+ except CircuitOpenError:
17
+ # Circuit is open, fail fast
18
+ result = get_cached_data()
19
+ """
20
+
21
+ import asyncio
22
+ import logging
23
+ import time
24
+ from dataclasses import dataclass
25
+ from enum import Enum
26
+ from typing import Any, Callable, Optional
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class CircuitState(Enum):
32
+ """Circuit breaker states."""
33
+
34
+ CLOSED = "closed" # Normal operation
35
+ OPEN = "open" # Failing, reject requests
36
+ HALF_OPEN = "half_open" # Testing recovery
37
+
38
+
39
+ class CircuitOpenError(Exception):
40
+ """Raised when circuit breaker is open."""
41
+
42
+ def __init__(self, circuit_name: str, next_attempt_time: float):
43
+ self.circuit_name = circuit_name
44
+ self.next_attempt_time = next_attempt_time
45
+ seconds_until = max(0, next_attempt_time - time.time())
46
+ super().__init__(
47
+ f"Circuit '{circuit_name}' is OPEN. "
48
+ f"Next attempt in {seconds_until:.1f}s"
49
+ )
50
+
51
+
52
+ @dataclass
53
+ class CircuitBreakerConfig:
54
+ """
55
+ Configuration for circuit breaker behavior.
56
+
57
+ Attributes:
58
+ name: Circuit breaker name for identification
59
+ failure_threshold: Number of failures before opening (default: 5)
60
+ success_threshold: Successes needed in half-open to close (default: 2)
61
+ timeout_seconds: Seconds to wait before half-open (default: 60)
62
+ half_open_max_calls: Max concurrent calls in half-open (default: 1)
63
+ on_state_change: Callback for state transitions
64
+ """
65
+
66
+ name: str
67
+ failure_threshold: int = 5
68
+ success_threshold: int = 2
69
+ timeout_seconds: float = 60.0
70
+ half_open_max_calls: int = 1
71
+ on_state_change: Optional[Callable[[CircuitState, CircuitState], None]] = None
72
+
73
+
74
+ class CircuitBreaker:
75
+ """
76
+ Circuit breaker for fault tolerance.
77
+
78
+ Tracks failures and successes to determine when to open/close the circuit.
79
+ When open, requests fail fast without executing.
80
+
81
+ Example:
82
+ breaker = CircuitBreaker(name="api", failure_threshold=5)
83
+
84
+ async def call_api():
85
+ return await breaker.call(api.get, "/users")
86
+
87
+ # With error handling
88
+ try:
89
+ result = await breaker.call(risky_operation)
90
+ except CircuitOpenError as e:
91
+ logger.warning(f"Circuit open: {e}")
92
+ result = get_fallback_data()
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ name: str,
98
+ failure_threshold: int = 5,
99
+ success_threshold: int = 2,
100
+ timeout_seconds: float = 60.0,
101
+ half_open_max_calls: int = 1,
102
+ on_state_change: Optional[Callable[[CircuitState, CircuitState], None]] = None,
103
+ ):
104
+ """
105
+ Initialize circuit breaker.
106
+
107
+ Args:
108
+ name: Unique name for this circuit breaker
109
+ failure_threshold: Failures before opening circuit
110
+ success_threshold: Successes in half-open to close
111
+ timeout_seconds: Wait time before trying half-open
112
+ half_open_max_calls: Max concurrent calls in half-open
113
+ on_state_change: Optional callback for state changes
114
+ """
115
+ self.config = CircuitBreakerConfig(
116
+ name=name,
117
+ failure_threshold=failure_threshold,
118
+ success_threshold=success_threshold,
119
+ timeout_seconds=timeout_seconds,
120
+ half_open_max_calls=half_open_max_calls,
121
+ on_state_change=on_state_change,
122
+ )
123
+
124
+ self._state = CircuitState.CLOSED
125
+ self._failure_count = 0
126
+ self._success_count = 0
127
+ self._last_failure_time: Optional[float] = None
128
+ self._half_open_calls = 0
129
+ self._lock = asyncio.Lock()
130
+
131
+ logger.info(
132
+ f"Circuit breaker '{name}' initialized: "
133
+ f"failure_threshold={failure_threshold}, "
134
+ f"timeout={timeout_seconds}s"
135
+ )
136
+
137
+ @property
138
+ def state(self) -> CircuitState:
139
+ """Get current circuit state."""
140
+ return self._state
141
+
142
+ @property
143
+ def is_closed(self) -> bool:
144
+ """Check if circuit is closed (normal operation)."""
145
+ return self._state == CircuitState.CLOSED
146
+
147
+ @property
148
+ def is_open(self) -> bool:
149
+ """Check if circuit is open (failing)."""
150
+ return self._state == CircuitState.OPEN
151
+
152
+ @property
153
+ def is_half_open(self) -> bool:
154
+ """Check if circuit is half-open (testing)."""
155
+ return self._state == CircuitState.HALF_OPEN
156
+
157
+ async def call(self, func: Callable, *args, **kwargs) -> Any:
158
+ """
159
+ Execute function through circuit breaker.
160
+
161
+ Args:
162
+ func: Function to execute (can be sync or async)
163
+ *args: Positional arguments for function
164
+ **kwargs: Keyword arguments for function
165
+
166
+ Returns:
167
+ Function result
168
+
169
+ Raises:
170
+ CircuitOpenError: If circuit is open
171
+ Exception: Any exception from the function
172
+ """
173
+ async with self._lock:
174
+ await self._check_state_transition()
175
+
176
+ # Fail fast if circuit is open
177
+ if self._state == CircuitState.OPEN:
178
+ next_attempt = (
179
+ self._last_failure_time + self.config.timeout_seconds
180
+ if self._last_failure_time
181
+ else time.time()
182
+ )
183
+ raise CircuitOpenError(self.config.name, next_attempt)
184
+
185
+ # Limit concurrent calls in half-open
186
+ if self._state == CircuitState.HALF_OPEN:
187
+ if self._half_open_calls >= self.config.half_open_max_calls:
188
+ raise CircuitOpenError(
189
+ self.config.name,
190
+ time.time() + self.config.timeout_seconds,
191
+ )
192
+ self._half_open_calls += 1
193
+
194
+ # Execute function
195
+ try:
196
+ if asyncio.iscoroutinefunction(func):
197
+ result = await func(*args, **kwargs)
198
+ else:
199
+ result = func(*args, **kwargs)
200
+
201
+ await self._on_success()
202
+ return result
203
+
204
+ except Exception as e:
205
+ await self._on_failure(e)
206
+ raise
207
+
208
+ finally:
209
+ if self._state == CircuitState.HALF_OPEN:
210
+ async with self._lock:
211
+ self._half_open_calls -= 1
212
+
213
+ async def _check_state_transition(self):
214
+ """Check if circuit should transition to half-open."""
215
+ if self._state == CircuitState.OPEN and self._last_failure_time:
216
+ time_since_failure = time.time() - self._last_failure_time
217
+
218
+ if time_since_failure >= self.config.timeout_seconds:
219
+ logger.info(
220
+ f"Circuit '{self.config.name}' transitioning OPEN -> HALF_OPEN "
221
+ f"after {time_since_failure:.1f}s timeout"
222
+ )
223
+ await self._transition_to(CircuitState.HALF_OPEN)
224
+
225
+ async def _on_success(self):
226
+ """Handle successful execution."""
227
+ async with self._lock:
228
+ if self._state == CircuitState.HALF_OPEN:
229
+ self._success_count += 1
230
+ logger.debug(
231
+ f"Circuit '{self.config.name}' success in HALF_OPEN: "
232
+ f"{self._success_count}/{self.config.success_threshold}"
233
+ )
234
+
235
+ if self._success_count >= self.config.success_threshold:
236
+ logger.info(
237
+ f"Circuit '{self.config.name}' transitioning HALF_OPEN -> CLOSED "
238
+ f"after {self._success_count} successes"
239
+ )
240
+ await self._transition_to(CircuitState.CLOSED)
241
+
242
+ elif self._state == CircuitState.CLOSED:
243
+ # Reset failure count on success in closed state
244
+ if self._failure_count > 0:
245
+ self._failure_count = 0
246
+
247
+ async def _on_failure(self, error: Exception):
248
+ """Handle failed execution."""
249
+ async with self._lock:
250
+ self._failure_count += 1
251
+ self._last_failure_time = time.time()
252
+
253
+ logger.warning(
254
+ f"Circuit '{self.config.name}' failure #{self._failure_count}: "
255
+ f"{type(error).__name__}: {error}"
256
+ )
257
+
258
+ if self._state == CircuitState.HALF_OPEN:
259
+ # Any failure in half-open reopens the circuit
260
+ logger.warning(
261
+ f"Circuit '{self.config.name}' transitioning HALF_OPEN -> OPEN "
262
+ f"due to failure"
263
+ )
264
+ await self._transition_to(CircuitState.OPEN)
265
+
266
+ elif (
267
+ self._state == CircuitState.CLOSED
268
+ and self._failure_count >= self.config.failure_threshold
269
+ ):
270
+ logger.error(
271
+ f"Circuit '{self.config.name}' transitioning CLOSED -> OPEN "
272
+ f"after {self._failure_count} failures"
273
+ )
274
+ await self._transition_to(CircuitState.OPEN)
275
+
276
+ async def _transition_to(self, new_state: CircuitState):
277
+ """
278
+ Transition to new circuit state.
279
+
280
+ Args:
281
+ new_state: Target state
282
+ """
283
+ old_state = self._state
284
+ self._state = new_state
285
+
286
+ # Reset counters based on new state
287
+ if new_state == CircuitState.CLOSED:
288
+ self._failure_count = 0
289
+ self._success_count = 0
290
+ elif new_state == CircuitState.HALF_OPEN:
291
+ self._success_count = 0
292
+ self._half_open_calls = 0
293
+ elif new_state == CircuitState.OPEN:
294
+ self._success_count = 0
295
+
296
+ # Call state change callback
297
+ if self.config.on_state_change:
298
+ try:
299
+ self.config.on_state_change(old_state, new_state)
300
+ except Exception as e:
301
+ logger.warning(f"Error in on_state_change callback: {e}")
302
+
303
+ def get_stats(self) -> dict:
304
+ """
305
+ Get circuit breaker statistics.
306
+
307
+ Returns:
308
+ Dictionary with current stats
309
+ """
310
+ return {
311
+ "name": self.config.name,
312
+ "state": self._state.value,
313
+ "failure_count": self._failure_count,
314
+ "success_count": self._success_count,
315
+ "last_failure_time": self._last_failure_time,
316
+ "half_open_calls": self._half_open_calls,
317
+ "config": {
318
+ "failure_threshold": self.config.failure_threshold,
319
+ "success_threshold": self.config.success_threshold,
320
+ "timeout_seconds": self.config.timeout_seconds,
321
+ },
322
+ }
323
+
324
+ async def reset(self):
325
+ """
326
+ Manually reset circuit breaker to CLOSED state.
327
+
328
+ Use with caution - typically for admin/testing purposes only.
329
+ """
330
+ async with self._lock:
331
+ logger.warning(
332
+ f"Circuit '{self.config.name}' manually reset to CLOSED "
333
+ f"from {self._state.value}"
334
+ )
335
+ await self._transition_to(CircuitState.CLOSED)
336
+
337
+ async def open(self):
338
+ """
339
+ Manually open circuit breaker.
340
+
341
+ Use for planned maintenance or testing.
342
+ """
343
+ async with self._lock:
344
+ logger.warning(
345
+ f"Circuit '{self.config.name}' manually opened "
346
+ f"from {self._state.value}"
347
+ )
348
+ await self._transition_to(CircuitState.OPEN)
349
+ self._last_failure_time = time.time()
350
+
351
+
352
+ class CircuitBreakerRegistry:
353
+ """
354
+ Global registry for circuit breakers.
355
+
356
+ Allows managing multiple circuit breakers from a central location.
357
+
358
+ Example:
359
+ registry = CircuitBreakerRegistry()
360
+ registry.register("database", failure_threshold=5)
361
+ registry.register("api", failure_threshold=3)
362
+
363
+ # Use circuit breakers
364
+ result = await registry.call("database", db.query, sql)
365
+
366
+ # Get stats
367
+ stats = registry.get_all_stats()
368
+ """
369
+
370
+ def __init__(self):
371
+ self._breakers: dict[str, CircuitBreaker] = {}
372
+ self._lock = asyncio.Lock()
373
+
374
+ async def register(
375
+ self,
376
+ name: str,
377
+ failure_threshold: int = 5,
378
+ success_threshold: int = 2,
379
+ timeout_seconds: float = 60.0,
380
+ **kwargs,
381
+ ) -> CircuitBreaker:
382
+ """
383
+ Register a new circuit breaker.
384
+
385
+ Args:
386
+ name: Unique circuit breaker name
387
+ failure_threshold: Failures before opening
388
+ success_threshold: Successes to close
389
+ timeout_seconds: Timeout before half-open
390
+ **kwargs: Additional CircuitBreaker arguments
391
+
392
+ Returns:
393
+ Registered CircuitBreaker instance
394
+ """
395
+ async with self._lock:
396
+ if name in self._breakers:
397
+ logger.warning(
398
+ f"Circuit breaker '{name}' already registered, returning existing"
399
+ )
400
+ return self._breakers[name]
401
+
402
+ breaker = CircuitBreaker(
403
+ name=name,
404
+ failure_threshold=failure_threshold,
405
+ success_threshold=success_threshold,
406
+ timeout_seconds=timeout_seconds,
407
+ **kwargs,
408
+ )
409
+ self._breakers[name] = breaker
410
+ return breaker
411
+
412
+ def get(self, name: str) -> Optional[CircuitBreaker]:
413
+ """Get circuit breaker by name."""
414
+ return self._breakers.get(name)
415
+
416
+ async def call(self, name: str, func: Callable, *args, **kwargs) -> Any:
417
+ """
418
+ Execute function through named circuit breaker.
419
+
420
+ Args:
421
+ name: Circuit breaker name
422
+ func: Function to execute
423
+ *args: Positional arguments
424
+ **kwargs: Keyword arguments
425
+
426
+ Returns:
427
+ Function result
428
+
429
+ Raises:
430
+ ValueError: If circuit breaker not found
431
+ """
432
+ breaker = self.get(name)
433
+ if not breaker:
434
+ raise ValueError(f"Circuit breaker '{name}' not registered")
435
+
436
+ return await breaker.call(func, *args, **kwargs)
437
+
438
+ def get_all_stats(self) -> dict:
439
+ """Get stats for all registered circuit breakers."""
440
+ return {name: breaker.get_stats() for name, breaker in self._breakers.items()}
441
+
442
+ async def reset_all(self):
443
+ """Reset all circuit breakers to CLOSED state."""
444
+ for breaker in self._breakers.values():
445
+ await breaker.reset()
446
+
447
+
448
+ # Global registry instance
449
+ _global_registry = CircuitBreakerRegistry()
450
+
451
+
452
+ def get_global_registry() -> CircuitBreakerRegistry:
453
+ """Get the global circuit breaker registry."""
454
+ return _global_registry