dory-sdk 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dory/__init__.py +70 -0
- dory/auto_instrument.py +142 -0
- dory/cli/__init__.py +5 -0
- dory/cli/main.py +290 -0
- dory/cli/templates.py +333 -0
- dory/config/__init__.py +23 -0
- dory/config/defaults.py +50 -0
- dory/config/loader.py +361 -0
- dory/config/presets.py +325 -0
- dory/config/schema.py +152 -0
- dory/core/__init__.py +27 -0
- dory/core/app.py +404 -0
- dory/core/context.py +209 -0
- dory/core/lifecycle.py +214 -0
- dory/core/meta.py +121 -0
- dory/core/modes.py +479 -0
- dory/core/processor.py +654 -0
- dory/core/signals.py +122 -0
- dory/decorators.py +142 -0
- dory/errors/__init__.py +117 -0
- dory/errors/classification.py +362 -0
- dory/errors/codes.py +495 -0
- dory/health/__init__.py +10 -0
- dory/health/probes.py +210 -0
- dory/health/server.py +306 -0
- dory/k8s/__init__.py +11 -0
- dory/k8s/annotation_watcher.py +184 -0
- dory/k8s/client.py +251 -0
- dory/k8s/pod_metadata.py +182 -0
- dory/logging/__init__.py +9 -0
- dory/logging/logger.py +175 -0
- dory/metrics/__init__.py +7 -0
- dory/metrics/collector.py +301 -0
- dory/middleware/__init__.py +36 -0
- dory/middleware/connection_tracker.py +608 -0
- dory/middleware/request_id.py +321 -0
- dory/middleware/request_tracker.py +501 -0
- dory/migration/__init__.py +11 -0
- dory/migration/configmap.py +260 -0
- dory/migration/serialization.py +167 -0
- dory/migration/state_manager.py +301 -0
- dory/monitoring/__init__.py +23 -0
- dory/monitoring/opentelemetry.py +462 -0
- dory/py.typed +2 -0
- dory/recovery/__init__.py +60 -0
- dory/recovery/golden_image.py +480 -0
- dory/recovery/golden_snapshot.py +561 -0
- dory/recovery/golden_validator.py +518 -0
- dory/recovery/partial_recovery.py +479 -0
- dory/recovery/recovery_decision.py +242 -0
- dory/recovery/restart_detector.py +142 -0
- dory/recovery/state_validator.py +187 -0
- dory/resilience/__init__.py +45 -0
- dory/resilience/circuit_breaker.py +454 -0
- dory/resilience/retry.py +389 -0
- dory/sidecar/__init__.py +6 -0
- dory/sidecar/main.py +75 -0
- dory/sidecar/server.py +329 -0
- dory/simple.py +342 -0
- dory/types.py +75 -0
- dory/utils/__init__.py +25 -0
- dory/utils/errors.py +59 -0
- dory/utils/retry.py +115 -0
- dory/utils/timeout.py +80 -0
- dory_sdk-2.1.0.dist-info/METADATA +663 -0
- dory_sdk-2.1.0.dist-info/RECORD +69 -0
- dory_sdk-2.1.0.dist-info/WHEEL +5 -0
- dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
- dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Error classification system for intelligent recovery strategies.
|
|
3
|
+
|
|
4
|
+
Classifies exceptions into categories and recommends appropriate recovery actions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Dict, Optional, Type
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ErrorType(Enum):
|
|
16
|
+
"""Error classification types."""
|
|
17
|
+
|
|
18
|
+
TRANSIENT = "transient" # Temporary, retry likely to succeed
|
|
19
|
+
PERMANENT = "permanent" # Logic error, retry won't help
|
|
20
|
+
RESOURCE = "resource" # Resource exhaustion
|
|
21
|
+
EXTERNAL = "external" # External dependency failure
|
|
22
|
+
LOGIC = "logic" # Application logic error
|
|
23
|
+
UNKNOWN = "unknown" # Unclassified
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RecoveryAction(Enum):
|
|
27
|
+
"""Recommended recovery actions for error types."""
|
|
28
|
+
|
|
29
|
+
RETRY = "retry" # Retry with backoff
|
|
30
|
+
CIRCUIT_BREAKER = "circuit_breaker" # Use circuit breaker
|
|
31
|
+
BACKOFF = "backoff" # Exponential backoff
|
|
32
|
+
SCALE = "scale" # Scale resources
|
|
33
|
+
GOLDEN_RESET = "golden_reset" # Reset to golden image
|
|
34
|
+
DEGRADE = "degrade" # Enter degraded mode
|
|
35
|
+
ALERT = "alert" # Alert operator
|
|
36
|
+
FAIL = "fail" # Fail immediately
|
|
37
|
+
LOG = "log" # Log and continue
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ClassificationResult:
|
|
42
|
+
"""
|
|
43
|
+
Result of error classification.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
error_type: Classified error type
|
|
47
|
+
recommended_action: Suggested recovery action
|
|
48
|
+
retryable: Whether error should be retried
|
|
49
|
+
severity: Error severity (low, medium, high, critical)
|
|
50
|
+
details: Additional classification details
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
error_type: ErrorType
|
|
54
|
+
recommended_action: RecoveryAction
|
|
55
|
+
retryable: bool
|
|
56
|
+
severity: str # "low", "medium", "high", "critical"
|
|
57
|
+
details: Optional[Dict] = None
|
|
58
|
+
|
|
59
|
+
def __str__(self) -> str:
|
|
60
|
+
return (
|
|
61
|
+
f"Error: {self.error_type.value} | "
|
|
62
|
+
f"Action: {self.recommended_action.value} | "
|
|
63
|
+
f"Retryable: {self.retryable} | "
|
|
64
|
+
f"Severity: {self.severity}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Global error type registry
|
|
69
|
+
_ERROR_TYPE_REGISTRY: Dict[Type[Exception], ErrorType] = {}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def register_error_type(exception_class: Type[Exception], error_type: ErrorType):
|
|
73
|
+
"""
|
|
74
|
+
Register custom exception type mapping.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
register_error_type(MyCustomTimeout, ErrorType.TRANSIENT)
|
|
78
|
+
"""
|
|
79
|
+
_ERROR_TYPE_REGISTRY[exception_class] = error_type
|
|
80
|
+
logger.debug(f"Registered {exception_class.__name__} as {error_type.value}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def clear_error_type_registry():
|
|
84
|
+
"""
|
|
85
|
+
Clear all custom error type registrations.
|
|
86
|
+
|
|
87
|
+
Useful for testing or resetting to default behavior.
|
|
88
|
+
"""
|
|
89
|
+
_ERROR_TYPE_REGISTRY.clear()
|
|
90
|
+
logger.debug("Cleared error type registry")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ErrorClassifier:
|
|
94
|
+
"""
|
|
95
|
+
Intelligent error classifier.
|
|
96
|
+
|
|
97
|
+
Analyzes exceptions and recommends recovery strategies based on error type.
|
|
98
|
+
|
|
99
|
+
Example:
|
|
100
|
+
classifier = ErrorClassifier()
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
await risky_operation()
|
|
104
|
+
except Exception as e:
|
|
105
|
+
result = classifier.classify(e)
|
|
106
|
+
logger.info(f"Classification: {result}")
|
|
107
|
+
|
|
108
|
+
if result.retryable:
|
|
109
|
+
await retry_operation()
|
|
110
|
+
elif result.recommended_action == RecoveryAction.CIRCUIT_BREAKER:
|
|
111
|
+
await circuit_breaker.call(operation)
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(self):
|
|
115
|
+
# Initialize built-in error mappings
|
|
116
|
+
self._initialize_builtin_mappings()
|
|
117
|
+
|
|
118
|
+
def _initialize_builtin_mappings(self):
|
|
119
|
+
"""Initialize common Python exception mappings."""
|
|
120
|
+
|
|
121
|
+
# Transient errors (network, timeouts)
|
|
122
|
+
transient_errors = [
|
|
123
|
+
"ConnectionError",
|
|
124
|
+
"TimeoutError",
|
|
125
|
+
"asyncio.TimeoutError",
|
|
126
|
+
"aiohttp.ClientConnectionError",
|
|
127
|
+
"aiohttp.ServerTimeoutError",
|
|
128
|
+
"urllib3.exceptions.ConnectionError",
|
|
129
|
+
"requests.exceptions.ConnectionError",
|
|
130
|
+
"requests.exceptions.Timeout",
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
# Permanent errors (logic, validation)
|
|
134
|
+
permanent_errors = [
|
|
135
|
+
"ValueError",
|
|
136
|
+
"TypeError",
|
|
137
|
+
"KeyError",
|
|
138
|
+
"AttributeError",
|
|
139
|
+
"NotImplementedError",
|
|
140
|
+
"AssertionError",
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
# Resource errors (memory, disk)
|
|
144
|
+
resource_errors = [
|
|
145
|
+
"MemoryError",
|
|
146
|
+
"OSError",
|
|
147
|
+
"IOError",
|
|
148
|
+
"FileNotFoundError",
|
|
149
|
+
"PermissionError",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# External dependency errors
|
|
153
|
+
external_errors = [
|
|
154
|
+
"aiohttp.ClientError",
|
|
155
|
+
"requests.exceptions.HTTPError",
|
|
156
|
+
"kubernetes.client.exceptions.ApiException",
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
# These are just strings for documentation
|
|
160
|
+
# Actual classification happens in classify() method
|
|
161
|
+
|
|
162
|
+
def classify(self, error: Exception) -> ClassificationResult:
|
|
163
|
+
"""
|
|
164
|
+
Classify an exception and recommend recovery strategy.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
error: The exception to classify
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ClassificationResult with error type and recovery action
|
|
171
|
+
"""
|
|
172
|
+
error_type = self._determine_error_type(error)
|
|
173
|
+
recommended_action = self._recommend_action(error_type, error)
|
|
174
|
+
retryable = self._is_retryable(error_type)
|
|
175
|
+
severity = self._determine_severity(error_type, error)
|
|
176
|
+
|
|
177
|
+
result = ClassificationResult(
|
|
178
|
+
error_type=error_type,
|
|
179
|
+
recommended_action=recommended_action,
|
|
180
|
+
retryable=retryable,
|
|
181
|
+
severity=severity,
|
|
182
|
+
details={
|
|
183
|
+
"exception_type": type(error).__name__,
|
|
184
|
+
"message": str(error),
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
logger.debug(f"Classified {type(error).__name__}: {result}")
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def _determine_error_type(self, error: Exception) -> ErrorType:
|
|
192
|
+
"""Determine error type from exception."""
|
|
193
|
+
|
|
194
|
+
# Check custom registry first
|
|
195
|
+
error_class = type(error)
|
|
196
|
+
if error_class in _ERROR_TYPE_REGISTRY:
|
|
197
|
+
return _ERROR_TYPE_REGISTRY[error_class]
|
|
198
|
+
|
|
199
|
+
error_name = error_class.__name__
|
|
200
|
+
error_str = str(error).lower()
|
|
201
|
+
|
|
202
|
+
# Transient errors (network, timeout)
|
|
203
|
+
if any(
|
|
204
|
+
pattern in error_name.lower()
|
|
205
|
+
for pattern in ["timeout", "connection", "network"]
|
|
206
|
+
):
|
|
207
|
+
return ErrorType.TRANSIENT
|
|
208
|
+
|
|
209
|
+
if any(pattern in error_str for pattern in ["timeout", "connection refused"]):
|
|
210
|
+
return ErrorType.TRANSIENT
|
|
211
|
+
|
|
212
|
+
# Resource errors
|
|
213
|
+
if any(
|
|
214
|
+
pattern in error_name.lower()
|
|
215
|
+
for pattern in ["memory", "resource", "disk", "quota"]
|
|
216
|
+
):
|
|
217
|
+
return ErrorType.RESOURCE
|
|
218
|
+
|
|
219
|
+
if any(
|
|
220
|
+
pattern in error_str
|
|
221
|
+
for pattern in ["out of memory", "disk full", "quota exceeded"]
|
|
222
|
+
):
|
|
223
|
+
return ErrorType.RESOURCE
|
|
224
|
+
|
|
225
|
+
# External dependency errors
|
|
226
|
+
if any(
|
|
227
|
+
pattern in error_name.lower()
|
|
228
|
+
for pattern in ["http", "api", "client", "server"]
|
|
229
|
+
):
|
|
230
|
+
# Check status codes for transient vs permanent
|
|
231
|
+
if any(
|
|
232
|
+
pattern in error_str
|
|
233
|
+
for pattern in ["500", "502", "503", "504", "429"]
|
|
234
|
+
):
|
|
235
|
+
return ErrorType.EXTERNAL # Server errors - use circuit breaker
|
|
236
|
+
|
|
237
|
+
if any(pattern in error_str for pattern in ["400", "401", "403", "404"]):
|
|
238
|
+
return ErrorType.PERMANENT # Client errors - don't retry
|
|
239
|
+
|
|
240
|
+
return ErrorType.EXTERNAL
|
|
241
|
+
|
|
242
|
+
# Logic errors (validation, type errors)
|
|
243
|
+
if any(
|
|
244
|
+
pattern in error_name.lower()
|
|
245
|
+
for pattern in [
|
|
246
|
+
"value",
|
|
247
|
+
"type",
|
|
248
|
+
"key",
|
|
249
|
+
"attribute",
|
|
250
|
+
"assertion",
|
|
251
|
+
"notimplemented",
|
|
252
|
+
]
|
|
253
|
+
):
|
|
254
|
+
return ErrorType.LOGIC
|
|
255
|
+
|
|
256
|
+
# Permanent errors (state corruption, etc.)
|
|
257
|
+
if any(
|
|
258
|
+
pattern in error_str for pattern in ["corrupt", "invalid state", "integrity"]
|
|
259
|
+
):
|
|
260
|
+
return ErrorType.PERMANENT
|
|
261
|
+
|
|
262
|
+
return ErrorType.UNKNOWN
|
|
263
|
+
|
|
264
|
+
def _recommend_action(
|
|
265
|
+
self, error_type: ErrorType, error: Exception
|
|
266
|
+
) -> RecoveryAction:
|
|
267
|
+
"""Recommend recovery action based on error type."""
|
|
268
|
+
|
|
269
|
+
action_map = {
|
|
270
|
+
ErrorType.TRANSIENT: RecoveryAction.RETRY,
|
|
271
|
+
ErrorType.EXTERNAL: RecoveryAction.CIRCUIT_BREAKER,
|
|
272
|
+
ErrorType.RESOURCE: RecoveryAction.BACKOFF,
|
|
273
|
+
ErrorType.LOGIC: RecoveryAction.ALERT,
|
|
274
|
+
ErrorType.PERMANENT: RecoveryAction.GOLDEN_RESET,
|
|
275
|
+
ErrorType.UNKNOWN: RecoveryAction.LOG,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return action_map.get(error_type, RecoveryAction.LOG)
|
|
279
|
+
|
|
280
|
+
def _is_retryable(self, error_type: ErrorType) -> bool:
|
|
281
|
+
"""Determine if error should be retried."""
|
|
282
|
+
|
|
283
|
+
retryable_types = {
|
|
284
|
+
ErrorType.TRANSIENT,
|
|
285
|
+
ErrorType.EXTERNAL,
|
|
286
|
+
ErrorType.RESOURCE,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return error_type in retryable_types
|
|
290
|
+
|
|
291
|
+
def _determine_severity(self, error_type: ErrorType, error: Exception) -> str:
|
|
292
|
+
"""Determine error severity."""
|
|
293
|
+
|
|
294
|
+
severity_map = {
|
|
295
|
+
ErrorType.TRANSIENT: "low",
|
|
296
|
+
ErrorType.EXTERNAL: "medium",
|
|
297
|
+
ErrorType.RESOURCE: "high",
|
|
298
|
+
ErrorType.LOGIC: "high",
|
|
299
|
+
ErrorType.PERMANENT: "critical",
|
|
300
|
+
ErrorType.UNKNOWN: "medium",
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return severity_map.get(error_type, "medium")
|
|
304
|
+
|
|
305
|
+
def classify_and_handle(self, error: Exception) -> ClassificationResult:
|
|
306
|
+
"""
|
|
307
|
+
Classify error and automatically log appropriate message.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
error: The exception to classify
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
ClassificationResult
|
|
314
|
+
"""
|
|
315
|
+
result = self.classify(error)
|
|
316
|
+
|
|
317
|
+
# Log based on severity
|
|
318
|
+
log_message = f"{result.error_type.value.upper()} error: {type(error).__name__}: {error}"
|
|
319
|
+
|
|
320
|
+
if result.severity == "critical":
|
|
321
|
+
logger.error(log_message)
|
|
322
|
+
elif result.severity == "high":
|
|
323
|
+
logger.warning(log_message)
|
|
324
|
+
elif result.severity == "medium":
|
|
325
|
+
logger.info(log_message)
|
|
326
|
+
else:
|
|
327
|
+
logger.debug(log_message)
|
|
328
|
+
|
|
329
|
+
logger.info(f"Recommended action: {result.recommended_action.value}")
|
|
330
|
+
|
|
331
|
+
return result
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# Global classifier instance
|
|
335
|
+
_global_classifier = ErrorClassifier()
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def classify_error(error: Exception) -> ClassificationResult:
|
|
339
|
+
"""
|
|
340
|
+
Convenience function for global error classification.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
error: Exception to classify
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
ClassificationResult
|
|
347
|
+
"""
|
|
348
|
+
return _global_classifier.classify(error)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def is_retryable(error: Exception) -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Quick check if error is retryable.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
error: Exception to check
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
True if error should be retried
|
|
360
|
+
"""
|
|
361
|
+
result = classify_error(error)
|
|
362
|
+
return result.retryable
|