dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dory/__init__.py +70 -0
  2. dory/auto_instrument.py +142 -0
  3. dory/cli/__init__.py +5 -0
  4. dory/cli/main.py +290 -0
  5. dory/cli/templates.py +333 -0
  6. dory/config/__init__.py +23 -0
  7. dory/config/defaults.py +50 -0
  8. dory/config/loader.py +361 -0
  9. dory/config/presets.py +325 -0
  10. dory/config/schema.py +152 -0
  11. dory/core/__init__.py +27 -0
  12. dory/core/app.py +404 -0
  13. dory/core/context.py +209 -0
  14. dory/core/lifecycle.py +214 -0
  15. dory/core/meta.py +121 -0
  16. dory/core/modes.py +479 -0
  17. dory/core/processor.py +654 -0
  18. dory/core/signals.py +122 -0
  19. dory/decorators.py +142 -0
  20. dory/errors/__init__.py +117 -0
  21. dory/errors/classification.py +362 -0
  22. dory/errors/codes.py +495 -0
  23. dory/health/__init__.py +10 -0
  24. dory/health/probes.py +210 -0
  25. dory/health/server.py +306 -0
  26. dory/k8s/__init__.py +11 -0
  27. dory/k8s/annotation_watcher.py +184 -0
  28. dory/k8s/client.py +251 -0
  29. dory/k8s/pod_metadata.py +182 -0
  30. dory/logging/__init__.py +9 -0
  31. dory/logging/logger.py +175 -0
  32. dory/metrics/__init__.py +7 -0
  33. dory/metrics/collector.py +301 -0
  34. dory/middleware/__init__.py +36 -0
  35. dory/middleware/connection_tracker.py +608 -0
  36. dory/middleware/request_id.py +321 -0
  37. dory/middleware/request_tracker.py +501 -0
  38. dory/migration/__init__.py +11 -0
  39. dory/migration/configmap.py +260 -0
  40. dory/migration/serialization.py +167 -0
  41. dory/migration/state_manager.py +301 -0
  42. dory/monitoring/__init__.py +23 -0
  43. dory/monitoring/opentelemetry.py +462 -0
  44. dory/py.typed +2 -0
  45. dory/recovery/__init__.py +60 -0
  46. dory/recovery/golden_image.py +480 -0
  47. dory/recovery/golden_snapshot.py +561 -0
  48. dory/recovery/golden_validator.py +518 -0
  49. dory/recovery/partial_recovery.py +479 -0
  50. dory/recovery/recovery_decision.py +242 -0
  51. dory/recovery/restart_detector.py +142 -0
  52. dory/recovery/state_validator.py +187 -0
  53. dory/resilience/__init__.py +45 -0
  54. dory/resilience/circuit_breaker.py +454 -0
  55. dory/resilience/retry.py +389 -0
  56. dory/sidecar/__init__.py +6 -0
  57. dory/sidecar/main.py +75 -0
  58. dory/sidecar/server.py +329 -0
  59. dory/simple.py +342 -0
  60. dory/types.py +75 -0
  61. dory/utils/__init__.py +25 -0
  62. dory/utils/errors.py +59 -0
  63. dory/utils/retry.py +115 -0
  64. dory/utils/timeout.py +80 -0
  65. dory_sdk-2.1.0.dist-info/METADATA +663 -0
  66. dory_sdk-2.1.0.dist-info/RECORD +69 -0
  67. dory_sdk-2.1.0.dist-info/WHEEL +5 -0
  68. dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
  69. dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
dory/core/modes.py ADDED
@@ -0,0 +1,479 @@
1
+ """
2
+ Processing Modes
3
+
4
+ Implements degraded mode and processing mode management for graceful degradation.
5
+ Allows processors to continue operating with reduced functionality instead of failing.
6
+
7
+ Processing Modes:
8
+ - FULL: Normal operation with all features
9
+ - DEGRADED: Reduced functionality, core operations only
10
+ - MINIMAL: Bare minimum processing, essential operations only
11
+ - RECOVERY: Recovery mode after failure, limited operations
12
+ - UNHEALTHY: System unhealthy, should not process new requests
13
+ """
14
+
15
+ import asyncio
16
+ import logging
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from enum import Enum
20
+ from typing import Optional, Dict, Any, List, Callable, Set
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class ProcessingMode(Enum):
26
+ """
27
+ Processing mode levels from full to unhealthy.
28
+ """
29
+ FULL = "full" # Normal operation, all features available
30
+ DEGRADED = "degraded" # Reduced functionality, core operations only
31
+ MINIMAL = "minimal" # Bare minimum, essential operations only
32
+ RECOVERY = "recovery" # Recovery mode after failure
33
+ UNHEALTHY = "unhealthy" # System unhealthy, should not process
34
+
35
+
36
+ class ModeTransitionReason(Enum):
37
+ """Reasons for mode transitions."""
38
+ MANUAL = "manual" # Manual mode change
39
+ ERROR_RATE = "error_rate" # High error rate detected
40
+ RESOURCE_EXHAUSTION = "resource_exhaustion" # Memory/CPU exhaustion
41
+ DEPENDENCY_FAILURE = "dependency_failure" # External dependency failed
42
+ CIRCUIT_OPEN = "circuit_open" # Circuit breaker opened
43
+ RECOVERY_ATTEMPT = "recovery_attempt" # Attempting recovery
44
+ RECOVERY_SUCCESS = "recovery_success" # Recovery successful
45
+ HEALTH_CHECK_FAILED = "health_check_failed" # Health check failed
46
+ STARTUP = "startup" # System startup
47
+ SHUTDOWN = "shutdown" # System shutdown
48
+
49
+
50
+ @dataclass
51
+ class ModeTransition:
52
+ """
53
+ Represents a mode transition event.
54
+ """
55
+ from_mode: ProcessingMode
56
+ to_mode: ProcessingMode
57
+ reason: ModeTransitionReason
58
+ timestamp: float
59
+ metadata: Dict[str, Any] = field(default_factory=dict)
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ """Convert to dictionary."""
63
+ return {
64
+ "from_mode": self.from_mode.value,
65
+ "to_mode": self.to_mode.value,
66
+ "reason": self.reason.value,
67
+ "timestamp": self.timestamp,
68
+ "metadata": self.metadata,
69
+ }
70
+
71
+
72
+ @dataclass
73
+ class ModeConfig:
74
+ """
75
+ Configuration for a processing mode.
76
+ """
77
+ mode: ProcessingMode
78
+ enabled_features: Set[str]
79
+ disabled_features: Set[str]
80
+ max_concurrent_requests: Optional[int] = None
81
+ timeout_seconds: Optional[float] = None
82
+ priority_only: bool = False # Only process high-priority requests
83
+ description: str = ""
84
+
85
+ def is_feature_enabled(self, feature: str) -> bool:
86
+ """Check if a feature is enabled in this mode."""
87
+ return feature in self.enabled_features
88
+
89
+
90
+ class ModeManager:
91
+ """
92
+ Manages processing modes and transitions between them.
93
+
94
+ Features:
95
+ - Automatic mode transitions based on conditions
96
+ - Mode transition history
97
+ - Feature availability by mode
98
+ - Graceful degradation
99
+ - Mode-specific callbacks
100
+
101
+ Usage:
102
+ manager = ModeManager()
103
+
104
+ # Configure modes
105
+ manager.configure_mode(
106
+ ProcessingMode.DEGRADED,
107
+ enabled_features=["core_processing"],
108
+ disabled_features=["analytics", "notifications"]
109
+ )
110
+
111
+ # Transition to degraded mode
112
+ await manager.transition_to(
113
+ ProcessingMode.DEGRADED,
114
+ reason=ModeTransitionReason.ERROR_RATE
115
+ )
116
+
117
+ # Check feature availability
118
+ if manager.is_feature_enabled("analytics"):
119
+ # Do analytics
120
+ pass
121
+ """
122
+
123
+ def __init__(
124
+ self,
125
+ initial_mode: ProcessingMode = ProcessingMode.FULL,
126
+ auto_recovery: bool = True,
127
+ recovery_check_interval: float = 60.0,
128
+ ):
129
+ """
130
+ Initialize mode manager.
131
+
132
+ Args:
133
+ initial_mode: Starting processing mode
134
+ auto_recovery: Automatically try to recover to higher modes
135
+ recovery_check_interval: Interval for recovery checks (seconds)
136
+ """
137
+ self._current_mode = initial_mode
138
+ self._auto_recovery = auto_recovery
139
+ self._recovery_check_interval = recovery_check_interval
140
+
141
+ # Mode configurations
142
+ self._mode_configs: Dict[ProcessingMode, ModeConfig] = {}
143
+ self._initialize_default_configs()
144
+
145
+ # Transition history
146
+ self._transition_history: List[ModeTransition] = []
147
+ self._max_history = 100
148
+
149
+ # Callbacks
150
+ self._on_transition_callbacks: List[Callable] = []
151
+
152
+ # Recovery task
153
+ self._recovery_task: Optional[asyncio.Task] = None
154
+
155
+ # Metrics
156
+ self._transition_count = 0
157
+ self._mode_durations: Dict[ProcessingMode, float] = {
158
+ mode: 0.0 for mode in ProcessingMode
159
+ }
160
+ self._last_transition_time = asyncio.get_event_loop().time()
161
+
162
+ logger.info(f"ModeManager initialized: mode={initial_mode.value}")
163
+
164
+ def _initialize_default_configs(self) -> None:
165
+ """Initialize default mode configurations."""
166
+ # FULL mode - all features enabled
167
+ self._mode_configs[ProcessingMode.FULL] = ModeConfig(
168
+ mode=ProcessingMode.FULL,
169
+ enabled_features={"*"}, # All features
170
+ disabled_features=set(),
171
+ description="Normal operation with all features",
172
+ )
173
+
174
+ # DEGRADED mode - core features only
175
+ self._mode_configs[ProcessingMode.DEGRADED] = ModeConfig(
176
+ mode=ProcessingMode.DEGRADED,
177
+ enabled_features={"core_processing", "state_persistence", "error_handling"},
178
+ disabled_features={"analytics", "notifications", "background_jobs"},
179
+ max_concurrent_requests=50,
180
+ description="Reduced functionality, core operations only",
181
+ )
182
+
183
+ # MINIMAL mode - essential operations only
184
+ self._mode_configs[ProcessingMode.MINIMAL] = ModeConfig(
185
+ mode=ProcessingMode.MINIMAL,
186
+ enabled_features={"core_processing", "error_handling"},
187
+ disabled_features={"analytics", "notifications", "background_jobs", "state_persistence"},
188
+ max_concurrent_requests=10,
189
+ priority_only=True,
190
+ description="Bare minimum processing, essential operations only",
191
+ )
192
+
193
+ # RECOVERY mode - recovery operations
194
+ self._mode_configs[ProcessingMode.RECOVERY] = ModeConfig(
195
+ mode=ProcessingMode.RECOVERY,
196
+ enabled_features={"error_handling", "recovery"},
197
+ disabled_features={"*"}, # Most features disabled
198
+ max_concurrent_requests=1,
199
+ description="Recovery mode after failure",
200
+ )
201
+
202
+ # UNHEALTHY mode - no processing
203
+ self._mode_configs[ProcessingMode.UNHEALTHY] = ModeConfig(
204
+ mode=ProcessingMode.UNHEALTHY,
205
+ enabled_features=set(),
206
+ disabled_features={"*"},
207
+ max_concurrent_requests=0,
208
+ description="System unhealthy, should not process",
209
+ )
210
+
211
+ def configure_mode(
212
+ self,
213
+ mode: ProcessingMode,
214
+ enabled_features: Optional[Set[str]] = None,
215
+ disabled_features: Optional[Set[str]] = None,
216
+ max_concurrent_requests: Optional[int] = None,
217
+ timeout_seconds: Optional[float] = None,
218
+ priority_only: bool = False,
219
+ description: str = "",
220
+ ) -> None:
221
+ """
222
+ Configure a processing mode.
223
+
224
+ Args:
225
+ mode: Mode to configure
226
+ enabled_features: Set of enabled feature names
227
+ disabled_features: Set of disabled feature names
228
+ max_concurrent_requests: Max concurrent requests in this mode
229
+ timeout_seconds: Timeout for operations in this mode
230
+ priority_only: Only process high-priority requests
231
+ description: Mode description
232
+ """
233
+ config = self._mode_configs.get(mode)
234
+ if config:
235
+ # Update existing config
236
+ if enabled_features is not None:
237
+ config.enabled_features = enabled_features
238
+ if disabled_features is not None:
239
+ config.disabled_features = disabled_features
240
+ if max_concurrent_requests is not None:
241
+ config.max_concurrent_requests = max_concurrent_requests
242
+ if timeout_seconds is not None:
243
+ config.timeout_seconds = timeout_seconds
244
+ if priority_only:
245
+ config.priority_only = priority_only
246
+ if description:
247
+ config.description = description
248
+ else:
249
+ # Create new config
250
+ self._mode_configs[mode] = ModeConfig(
251
+ mode=mode,
252
+ enabled_features=enabled_features or set(),
253
+ disabled_features=disabled_features or set(),
254
+ max_concurrent_requests=max_concurrent_requests,
255
+ timeout_seconds=timeout_seconds,
256
+ priority_only=priority_only,
257
+ description=description,
258
+ )
259
+
260
+ logger.info(f"Mode configured: {mode.value} with {len(enabled_features or [])} features")
261
+
262
+ async def transition_to(
263
+ self,
264
+ target_mode: ProcessingMode,
265
+ reason: ModeTransitionReason,
266
+ metadata: Optional[Dict[str, Any]] = None,
267
+ ) -> bool:
268
+ """
269
+ Transition to a new processing mode.
270
+
271
+ Args:
272
+ target_mode: Target mode
273
+ reason: Reason for transition
274
+ metadata: Optional metadata about transition
275
+
276
+ Returns:
277
+ True if transition successful
278
+ """
279
+ if target_mode == self._current_mode:
280
+ logger.debug(f"Already in {target_mode.value} mode")
281
+ return True
282
+
283
+ logger.info(
284
+ f"Mode transition: {self._current_mode.value} -> {target_mode.value} "
285
+ f"(reason: {reason.value})"
286
+ )
287
+
288
+ # Record transition
289
+ current_time = asyncio.get_event_loop().time()
290
+ transition = ModeTransition(
291
+ from_mode=self._current_mode,
292
+ to_mode=target_mode,
293
+ reason=reason,
294
+ timestamp=current_time,
295
+ metadata=metadata or {},
296
+ )
297
+
298
+ # Update mode duration
299
+ duration = current_time - self._last_transition_time
300
+ self._mode_durations[self._current_mode] += duration
301
+ self._last_transition_time = current_time
302
+
303
+ # Change mode
304
+ old_mode = self._current_mode
305
+ self._current_mode = target_mode
306
+
307
+ # Update history
308
+ self._transition_history.append(transition)
309
+ if len(self._transition_history) > self._max_history:
310
+ self._transition_history.pop(0)
311
+
312
+ # Update metrics
313
+ self._transition_count += 1
314
+
315
+ # Call transition callbacks
316
+ for callback in self._on_transition_callbacks:
317
+ try:
318
+ if asyncio.iscoroutinefunction(callback):
319
+ await callback(transition)
320
+ else:
321
+ callback(transition)
322
+ except Exception as e:
323
+ logger.error(f"Transition callback failed: {e}")
324
+
325
+ logger.info(f"Mode transition complete: now in {target_mode.value}")
326
+
327
+ # Start auto-recovery if transitioning to degraded/minimal/recovery
328
+ if self._auto_recovery and target_mode in [
329
+ ProcessingMode.DEGRADED,
330
+ ProcessingMode.MINIMAL,
331
+ ProcessingMode.RECOVERY,
332
+ ]:
333
+ self._start_auto_recovery()
334
+
335
+ return True
336
+
337
+ def get_current_mode(self) -> ProcessingMode:
338
+ """Get current processing mode."""
339
+ return self._current_mode
340
+
341
+ def is_feature_enabled(self, feature: str) -> bool:
342
+ """
343
+ Check if a feature is enabled in current mode.
344
+
345
+ Args:
346
+ feature: Feature name
347
+
348
+ Returns:
349
+ True if enabled
350
+ """
351
+ config = self._mode_configs.get(self._current_mode)
352
+ if not config:
353
+ return False
354
+
355
+ # Check wildcard
356
+ if "*" in config.enabled_features:
357
+ return feature not in config.disabled_features
358
+
359
+ # Check explicit enable/disable
360
+ if feature in config.disabled_features:
361
+ return False
362
+
363
+ return feature in config.enabled_features
364
+
365
+ def get_mode_config(self, mode: Optional[ProcessingMode] = None) -> ModeConfig:
366
+ """
367
+ Get configuration for a mode.
368
+
369
+ Args:
370
+ mode: Mode to get config for (current mode if None)
371
+
372
+ Returns:
373
+ ModeConfig
374
+ """
375
+ mode = mode or self._current_mode
376
+ return self._mode_configs[mode]
377
+
378
+ def can_process_requests(self) -> bool:
379
+ """Check if system can process requests in current mode."""
380
+ return self._current_mode != ProcessingMode.UNHEALTHY
381
+
382
+ def get_max_concurrent_requests(self) -> Optional[int]:
383
+ """Get max concurrent requests for current mode."""
384
+ config = self._mode_configs.get(self._current_mode)
385
+ return config.max_concurrent_requests if config else None
386
+
387
+ def on_transition(self, callback: Callable) -> None:
388
+ """
389
+ Register a callback for mode transitions.
390
+
391
+ Args:
392
+ callback: Callable that receives ModeTransition
393
+ """
394
+ self._on_transition_callbacks.append(callback)
395
+
396
+ def get_transition_history(self, limit: Optional[int] = None) -> List[ModeTransition]:
397
+ """
398
+ Get mode transition history.
399
+
400
+ Args:
401
+ limit: Maximum number of transitions to return
402
+
403
+ Returns:
404
+ List of transitions (most recent first)
405
+ """
406
+ history = list(reversed(self._transition_history))
407
+ if limit:
408
+ history = history[:limit]
409
+ return history
410
+
411
+ def get_stats(self) -> Dict[str, Any]:
412
+ """
413
+ Get mode manager statistics.
414
+
415
+ Returns:
416
+ Dictionary of statistics
417
+ """
418
+ return {
419
+ "current_mode": self._current_mode.value,
420
+ "transition_count": self._transition_count,
421
+ "mode_durations": {
422
+ mode.value: duration
423
+ for mode, duration in self._mode_durations.items()
424
+ },
425
+ "auto_recovery_enabled": self._auto_recovery,
426
+ "features_enabled": len([
427
+ f for f in ["core", "analytics", "notifications"]
428
+ if self.is_feature_enabled(f)
429
+ ]),
430
+ }
431
+
432
+ def _start_auto_recovery(self) -> None:
433
+ """Start automatic recovery task."""
434
+ if self._recovery_task and not self._recovery_task.done():
435
+ return
436
+
437
+ self._recovery_task = asyncio.create_task(self._auto_recovery_loop())
438
+
439
+ async def _auto_recovery_loop(self) -> None:
440
+ """Automatic recovery loop to attempt mode upgrades."""
441
+ logger.info("Starting auto-recovery loop")
442
+
443
+ while self._auto_recovery and self._current_mode != ProcessingMode.FULL:
444
+ await asyncio.sleep(self._recovery_check_interval)
445
+
446
+ # Try to upgrade mode
447
+ if self._current_mode == ProcessingMode.RECOVERY:
448
+ # Try to go to MINIMAL
449
+ logger.info("Attempting recovery: RECOVERY -> MINIMAL")
450
+ await self.transition_to(
451
+ ProcessingMode.MINIMAL,
452
+ ModeTransitionReason.RECOVERY_ATTEMPT,
453
+ )
454
+ elif self._current_mode == ProcessingMode.MINIMAL:
455
+ # Try to go to DEGRADED
456
+ logger.info("Attempting recovery: MINIMAL -> DEGRADED")
457
+ await self.transition_to(
458
+ ProcessingMode.DEGRADED,
459
+ ModeTransitionReason.RECOVERY_ATTEMPT,
460
+ )
461
+ elif self._current_mode == ProcessingMode.DEGRADED:
462
+ # Try to go to FULL
463
+ logger.info("Attempting recovery: DEGRADED -> FULL")
464
+ await self.transition_to(
465
+ ProcessingMode.FULL,
466
+ ModeTransitionReason.RECOVERY_SUCCESS,
467
+ )
468
+
469
+ logger.info("Auto-recovery loop stopped")
470
+
471
+ async def stop_auto_recovery(self) -> None:
472
+ """Stop automatic recovery."""
473
+ self._auto_recovery = False
474
+ if self._recovery_task:
475
+ self._recovery_task.cancel()
476
+ try:
477
+ await self._recovery_task
478
+ except asyncio.CancelledError:
479
+ pass