dory-sdk 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dory/__init__.py +70 -0
  2. dory/auto_instrument.py +142 -0
  3. dory/cli/__init__.py +5 -0
  4. dory/cli/main.py +290 -0
  5. dory/cli/templates.py +333 -0
  6. dory/config/__init__.py +23 -0
  7. dory/config/defaults.py +50 -0
  8. dory/config/loader.py +361 -0
  9. dory/config/presets.py +325 -0
  10. dory/config/schema.py +152 -0
  11. dory/core/__init__.py +27 -0
  12. dory/core/app.py +404 -0
  13. dory/core/context.py +209 -0
  14. dory/core/lifecycle.py +214 -0
  15. dory/core/meta.py +121 -0
  16. dory/core/modes.py +479 -0
  17. dory/core/processor.py +654 -0
  18. dory/core/signals.py +122 -0
  19. dory/decorators.py +142 -0
  20. dory/errors/__init__.py +117 -0
  21. dory/errors/classification.py +362 -0
  22. dory/errors/codes.py +495 -0
  23. dory/health/__init__.py +10 -0
  24. dory/health/probes.py +210 -0
  25. dory/health/server.py +306 -0
  26. dory/k8s/__init__.py +11 -0
  27. dory/k8s/annotation_watcher.py +184 -0
  28. dory/k8s/client.py +251 -0
  29. dory/k8s/pod_metadata.py +182 -0
  30. dory/logging/__init__.py +9 -0
  31. dory/logging/logger.py +175 -0
  32. dory/metrics/__init__.py +7 -0
  33. dory/metrics/collector.py +301 -0
  34. dory/middleware/__init__.py +36 -0
  35. dory/middleware/connection_tracker.py +608 -0
  36. dory/middleware/request_id.py +321 -0
  37. dory/middleware/request_tracker.py +501 -0
  38. dory/migration/__init__.py +11 -0
  39. dory/migration/configmap.py +260 -0
  40. dory/migration/serialization.py +167 -0
  41. dory/migration/state_manager.py +301 -0
  42. dory/monitoring/__init__.py +23 -0
  43. dory/monitoring/opentelemetry.py +462 -0
  44. dory/py.typed +2 -0
  45. dory/recovery/__init__.py +60 -0
  46. dory/recovery/golden_image.py +480 -0
  47. dory/recovery/golden_snapshot.py +561 -0
  48. dory/recovery/golden_validator.py +518 -0
  49. dory/recovery/partial_recovery.py +479 -0
  50. dory/recovery/recovery_decision.py +242 -0
  51. dory/recovery/restart_detector.py +142 -0
  52. dory/recovery/state_validator.py +187 -0
  53. dory/resilience/__init__.py +45 -0
  54. dory/resilience/circuit_breaker.py +454 -0
  55. dory/resilience/retry.py +389 -0
  56. dory/sidecar/__init__.py +6 -0
  57. dory/sidecar/main.py +75 -0
  58. dory/sidecar/server.py +329 -0
  59. dory/simple.py +342 -0
  60. dory/types.py +75 -0
  61. dory/utils/__init__.py +25 -0
  62. dory/utils/errors.py +59 -0
  63. dory/utils/retry.py +115 -0
  64. dory/utils/timeout.py +80 -0
  65. dory_sdk-2.1.0.dist-info/METADATA +663 -0
  66. dory_sdk-2.1.0.dist-info/RECORD +69 -0
  67. dory_sdk-2.1.0.dist-info/WHEEL +5 -0
  68. dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
  69. dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,518 @@
1
+ """
2
+ Golden Snapshot Validator
3
+
4
+ Validates snapshots and state before/after capture and restoration.
5
+ Implements:
6
+ - Schema validation
7
+ - Dependency checks
8
+ - State integrity verification
9
+ - Reset validation
10
+ """
11
+
12
+ import asyncio
13
+ import logging
14
+ from dataclasses import dataclass
15
+ from enum import Enum
16
+ from typing import Any, Dict, List, Optional, Callable, Set
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ValidationSeverity(Enum):
22
+ """Severity level of validation issues."""
23
+ INFO = "info"
24
+ WARNING = "warning"
25
+ ERROR = "error"
26
+ CRITICAL = "critical"
27
+
28
+
29
+ @dataclass
30
+ class ValidationIssue:
31
+ """
32
+ Represents a validation issue found during validation.
33
+ """
34
+ field: str
35
+ severity: ValidationSeverity
36
+ message: str
37
+ code: str
38
+ value: Optional[Any] = None
39
+
40
+ def to_dict(self) -> Dict[str, Any]:
41
+ """Convert to dictionary."""
42
+ return {
43
+ "field": self.field,
44
+ "severity": self.severity.value,
45
+ "message": self.message,
46
+ "code": self.code,
47
+ "value": str(self.value) if self.value is not None else None,
48
+ }
49
+
50
+
51
+ @dataclass
52
+ class ValidationResult:
53
+ """
54
+ Result of a validation operation.
55
+ """
56
+ passed: bool
57
+ issues: List[ValidationIssue]
58
+ warnings_count: int = 0
59
+ errors_count: int = 0
60
+ critical_count: int = 0
61
+
62
+ def has_errors(self) -> bool:
63
+ """Check if there are any errors."""
64
+ return self.errors_count > 0 or self.critical_count > 0
65
+
66
+ def has_critical(self) -> bool:
67
+ """Check if there are critical issues."""
68
+ return self.critical_count > 0
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ """Convert to dictionary."""
72
+ return {
73
+ "passed": self.passed,
74
+ "issues": [issue.to_dict() for issue in self.issues],
75
+ "warnings_count": self.warnings_count,
76
+ "errors_count": self.errors_count,
77
+ "critical_count": self.critical_count,
78
+ }
79
+
80
+
81
+ class GoldenValidator:
82
+ """
83
+ Validates golden snapshots and state data.
84
+
85
+ Features:
86
+ - Schema validation (required fields, types)
87
+ - Dependency checking (required dependencies present)
88
+ - State integrity (checksums, consistency)
89
+ - Reset verification (pre/post reset validation)
90
+ - Custom validators
91
+
92
+ Usage:
93
+ validator = GoldenValidator()
94
+
95
+ # Define schema
96
+ validator.define_schema({
97
+ "required_fields": ["processor_id", "state_version"],
98
+ "field_types": {
99
+ "processor_id": str,
100
+ "counter": int,
101
+ }
102
+ })
103
+
104
+ # Validate state
105
+ result = await validator.validate_state(state_data)
106
+ if not result.passed:
107
+ print(f"Validation failed: {result.errors_count} errors")
108
+ """
109
+
110
+ def __init__(
111
+ self,
112
+ strict_mode: bool = False,
113
+ allow_unknown_fields: bool = True,
114
+ ):
115
+ """
116
+ Initialize validator.
117
+
118
+ Args:
119
+ strict_mode: Fail on warnings
120
+ allow_unknown_fields: Allow fields not in schema
121
+ """
122
+ self.strict_mode = strict_mode
123
+ self.allow_unknown_fields = allow_unknown_fields
124
+
125
+ # Schema definition
126
+ self._required_fields: Set[str] = set()
127
+ self._field_types: Dict[str, type] = {}
128
+ self._field_validators: Dict[str, List[Callable]] = {}
129
+ self._dependencies: Dict[str, List[str]] = {}
130
+
131
+ # Custom validators
132
+ self._custom_validators: List[Callable] = []
133
+
134
+ logger.info(
135
+ f"GoldenValidator initialized: strict_mode={strict_mode}, "
136
+ f"allow_unknown_fields={allow_unknown_fields}"
137
+ )
138
+
139
+ def define_schema(
140
+ self,
141
+ schema: Dict[str, Any],
142
+ ) -> None:
143
+ """
144
+ Define validation schema.
145
+
146
+ Args:
147
+ schema: Schema definition containing:
148
+ - required_fields: List of required field names
149
+ - field_types: Dict of field_name -> type
150
+ - dependencies: Dict of field_name -> list of dependent fields
151
+
152
+ Example:
153
+ validator.define_schema({
154
+ "required_fields": ["processor_id", "state_version"],
155
+ "field_types": {
156
+ "processor_id": str,
157
+ "counter": int,
158
+ "data": dict,
159
+ },
160
+ "dependencies": {
161
+ "session_id": ["session_state"], # If session_id present, session_state required
162
+ }
163
+ })
164
+ """
165
+ self._required_fields = set(schema.get("required_fields", []))
166
+ self._field_types = schema.get("field_types", {})
167
+ self._dependencies = schema.get("dependencies", {})
168
+
169
+ logger.info(
170
+ f"Schema defined: {len(self._required_fields)} required fields, "
171
+ f"{len(self._field_types)} typed fields, "
172
+ f"{len(self._dependencies)} dependencies"
173
+ )
174
+
175
+ def add_field_validator(
176
+ self,
177
+ field: str,
178
+ validator: Callable[[Any], bool],
179
+ ) -> None:
180
+ """
181
+ Add a custom validator for a specific field.
182
+
183
+ Args:
184
+ field: Field name
185
+ validator: Function that returns True if valid
186
+
187
+ Example:
188
+ validator.add_field_validator(
189
+ "counter",
190
+ lambda value: value >= 0
191
+ )
192
+ """
193
+ if field not in self._field_validators:
194
+ self._field_validators[field] = []
195
+ self._field_validators[field].append(validator)
196
+
197
+ def add_custom_validator(
198
+ self,
199
+ validator: Callable[[Dict[str, Any]], bool],
200
+ ) -> None:
201
+ """
202
+ Add a custom validator for the entire state.
203
+
204
+ Args:
205
+ validator: Function that receives state dict and returns True if valid
206
+
207
+ Example:
208
+ validator.add_custom_validator(
209
+ lambda state: state.get("counter", 0) < state.get("max_value", 100)
210
+ )
211
+ """
212
+ self._custom_validators.append(validator)
213
+
214
+ async def validate_state(
215
+ self,
216
+ state_data: Dict[str, Any],
217
+ context: Optional[str] = None,
218
+ ) -> ValidationResult:
219
+ """
220
+ Validate state data against schema.
221
+
222
+ Args:
223
+ state_data: State data to validate
224
+ context: Context for validation (e.g., "pre_capture", "post_restore")
225
+
226
+ Returns:
227
+ ValidationResult with issues found
228
+ """
229
+ issues: List[ValidationIssue] = []
230
+
231
+ # Check required fields
232
+ for field in self._required_fields:
233
+ if field not in state_data:
234
+ issues.append(ValidationIssue(
235
+ field=field,
236
+ severity=ValidationSeverity.ERROR,
237
+ message=f"Required field '{field}' is missing",
238
+ code="MISSING_REQUIRED_FIELD",
239
+ ))
240
+
241
+ # Check field types
242
+ for field, expected_type in self._field_types.items():
243
+ if field in state_data:
244
+ value = state_data[field]
245
+ if not isinstance(value, expected_type):
246
+ issues.append(ValidationIssue(
247
+ field=field,
248
+ severity=ValidationSeverity.ERROR,
249
+ message=f"Field '{field}' has wrong type: expected {expected_type.__name__}, got {type(value).__name__}",
250
+ code="WRONG_TYPE",
251
+ value=value,
252
+ ))
253
+
254
+ # Check dependencies
255
+ for field, deps in self._dependencies.items():
256
+ if field in state_data:
257
+ for dep in deps:
258
+ if dep not in state_data:
259
+ issues.append(ValidationIssue(
260
+ field=dep,
261
+ severity=ValidationSeverity.WARNING,
262
+ message=f"Field '{field}' requires '{dep}' but it's missing",
263
+ code="MISSING_DEPENDENCY",
264
+ ))
265
+
266
+ # Check unknown fields
267
+ if not self.allow_unknown_fields:
268
+ known_fields = set(self._required_fields) | set(self._field_types.keys())
269
+ for field in state_data.keys():
270
+ if field not in known_fields:
271
+ issues.append(ValidationIssue(
272
+ field=field,
273
+ severity=ValidationSeverity.WARNING,
274
+ message=f"Unknown field '{field}' found",
275
+ code="UNKNOWN_FIELD",
276
+ ))
277
+
278
+ # Run field validators
279
+ for field, validators in self._field_validators.items():
280
+ if field in state_data:
281
+ value = state_data[field]
282
+ for validator_fn in validators:
283
+ try:
284
+ if not validator_fn(value):
285
+ issues.append(ValidationIssue(
286
+ field=field,
287
+ severity=ValidationSeverity.ERROR,
288
+ message=f"Field '{field}' failed custom validation",
289
+ code="CUSTOM_VALIDATION_FAILED",
290
+ value=value,
291
+ ))
292
+ except Exception as e:
293
+ issues.append(ValidationIssue(
294
+ field=field,
295
+ severity=ValidationSeverity.ERROR,
296
+ message=f"Field '{field}' validation raised exception: {e}",
297
+ code="VALIDATION_EXCEPTION",
298
+ value=value,
299
+ ))
300
+
301
+ # Run custom validators
302
+ for validator_fn in self._custom_validators:
303
+ try:
304
+ if not validator_fn(state_data):
305
+ issues.append(ValidationIssue(
306
+ field="__global__",
307
+ severity=ValidationSeverity.ERROR,
308
+ message="State failed custom validation",
309
+ code="CUSTOM_STATE_VALIDATION_FAILED",
310
+ ))
311
+ except Exception as e:
312
+ issues.append(ValidationIssue(
313
+ field="__global__",
314
+ severity=ValidationSeverity.ERROR,
315
+ message=f"Custom validation raised exception: {e}",
316
+ code="VALIDATION_EXCEPTION",
317
+ ))
318
+
319
+ # Count issues by severity
320
+ warnings_count = sum(1 for i in issues if i.severity == ValidationSeverity.WARNING)
321
+ errors_count = sum(1 for i in issues if i.severity == ValidationSeverity.ERROR)
322
+ critical_count = sum(1 for i in issues if i.severity == ValidationSeverity.CRITICAL)
323
+
324
+ # Determine pass/fail
325
+ if self.strict_mode:
326
+ passed = len(issues) == 0
327
+ else:
328
+ passed = errors_count == 0 and critical_count == 0
329
+
330
+ result = ValidationResult(
331
+ passed=passed,
332
+ issues=issues,
333
+ warnings_count=warnings_count,
334
+ errors_count=errors_count,
335
+ critical_count=critical_count,
336
+ )
337
+
338
+ # Log result
339
+ if not passed:
340
+ logger.warning(
341
+ f"Validation failed: {errors_count} errors, {critical_count} critical, "
342
+ f"{warnings_count} warnings"
343
+ )
344
+ for issue in issues:
345
+ if issue.severity in [ValidationSeverity.ERROR, ValidationSeverity.CRITICAL]:
346
+ logger.error(f" [{issue.severity.value}] {issue.field}: {issue.message}")
347
+ else:
348
+ logger.info(f"Validation passed ({warnings_count} warnings)")
349
+
350
+ return result
351
+
352
+ async def validate_pre_capture(
353
+ self,
354
+ state_data: Dict[str, Any],
355
+ ) -> ValidationResult:
356
+ """
357
+ Validate state before capturing snapshot.
358
+
359
+ Args:
360
+ state_data: State data to capture
361
+
362
+ Returns:
363
+ ValidationResult
364
+ """
365
+ logger.info("Running pre-capture validation")
366
+ return await self.validate_state(state_data, context="pre_capture")
367
+
368
+ async def validate_post_capture(
369
+ self,
370
+ original_state: Dict[str, Any],
371
+ restored_state: Dict[str, Any],
372
+ ) -> ValidationResult:
373
+ """
374
+ Validate state after capturing and restoring snapshot (round-trip test).
375
+
376
+ Args:
377
+ original_state: Original state before capture
378
+ restored_state: State restored from snapshot
379
+
380
+ Returns:
381
+ ValidationResult
382
+ """
383
+ logger.info("Running post-capture validation (round-trip)")
384
+ issues: List[ValidationIssue] = []
385
+
386
+ # Check if all keys are preserved
387
+ original_keys = set(original_state.keys())
388
+ restored_keys = set(restored_state.keys())
389
+
390
+ missing_keys = original_keys - restored_keys
391
+ extra_keys = restored_keys - original_keys
392
+
393
+ for key in missing_keys:
394
+ issues.append(ValidationIssue(
395
+ field=key,
396
+ severity=ValidationSeverity.ERROR,
397
+ message=f"Key '{key}' was lost during snapshot round-trip",
398
+ code="MISSING_KEY",
399
+ ))
400
+
401
+ for key in extra_keys:
402
+ issues.append(ValidationIssue(
403
+ field=key,
404
+ severity=ValidationSeverity.WARNING,
405
+ message=f"Extra key '{key}' appeared during snapshot round-trip",
406
+ code="EXTRA_KEY",
407
+ ))
408
+
409
+ # Check if values match
410
+ for key in original_keys & restored_keys:
411
+ if original_state[key] != restored_state[key]:
412
+ issues.append(ValidationIssue(
413
+ field=key,
414
+ severity=ValidationSeverity.ERROR,
415
+ message=f"Value for '{key}' changed during snapshot round-trip",
416
+ code="VALUE_MISMATCH",
417
+ value=f"{original_state[key]} -> {restored_state[key]}",
418
+ ))
419
+
420
+ # Count issues
421
+ warnings_count = sum(1 for i in issues if i.severity == ValidationSeverity.WARNING)
422
+ errors_count = sum(1 for i in issues if i.severity == ValidationSeverity.ERROR)
423
+ critical_count = sum(1 for i in issues if i.severity == ValidationSeverity.CRITICAL)
424
+
425
+ passed = errors_count == 0 and critical_count == 0
426
+
427
+ return ValidationResult(
428
+ passed=passed,
429
+ issues=issues,
430
+ warnings_count=warnings_count,
431
+ errors_count=errors_count,
432
+ critical_count=critical_count,
433
+ )
434
+
435
+ async def validate_pre_reset(
436
+ self,
437
+ processor_id: str,
438
+ reset_level: str,
439
+ ) -> ValidationResult:
440
+ """
441
+ Validate before performing reset.
442
+
443
+ Args:
444
+ processor_id: Processor ID to reset
445
+ reset_level: Reset level (SOFT, MODERATE, FULL, FACTORY)
446
+
447
+ Returns:
448
+ ValidationResult
449
+ """
450
+ logger.info(f"Running pre-reset validation: processor={processor_id}, level={reset_level}")
451
+ issues: List[ValidationIssue] = []
452
+
453
+ # Basic validation
454
+ if not processor_id:
455
+ issues.append(ValidationIssue(
456
+ field="processor_id",
457
+ severity=ValidationSeverity.CRITICAL,
458
+ message="Processor ID is empty",
459
+ code="EMPTY_PROCESSOR_ID",
460
+ ))
461
+
462
+ valid_levels = ["SOFT", "MODERATE", "FULL", "FACTORY"]
463
+ if reset_level not in valid_levels:
464
+ issues.append(ValidationIssue(
465
+ field="reset_level",
466
+ severity=ValidationSeverity.ERROR,
467
+ message=f"Invalid reset level: {reset_level}",
468
+ code="INVALID_RESET_LEVEL",
469
+ value=reset_level,
470
+ ))
471
+
472
+ passed = len([i for i in issues if i.severity in [ValidationSeverity.ERROR, ValidationSeverity.CRITICAL]]) == 0
473
+
474
+ return ValidationResult(
475
+ passed=passed,
476
+ issues=issues,
477
+ warnings_count=sum(1 for i in issues if i.severity == ValidationSeverity.WARNING),
478
+ errors_count=sum(1 for i in issues if i.severity == ValidationSeverity.ERROR),
479
+ critical_count=sum(1 for i in issues if i.severity == ValidationSeverity.CRITICAL),
480
+ )
481
+
482
+ async def validate_post_reset(
483
+ self,
484
+ processor_id: str,
485
+ reset_level: str,
486
+ reset_successful: bool,
487
+ ) -> ValidationResult:
488
+ """
489
+ Validate after performing reset.
490
+
491
+ Args:
492
+ processor_id: Processor ID that was reset
493
+ reset_level: Reset level used
494
+ reset_successful: Whether reset was successful
495
+
496
+ Returns:
497
+ ValidationResult
498
+ """
499
+ logger.info(f"Running post-reset validation: processor={processor_id}, success={reset_successful}")
500
+ issues: List[ValidationIssue] = []
501
+
502
+ if not reset_successful:
503
+ issues.append(ValidationIssue(
504
+ field="reset_result",
505
+ severity=ValidationSeverity.CRITICAL,
506
+ message="Reset operation failed",
507
+ code="RESET_FAILED",
508
+ ))
509
+
510
+ passed = reset_successful
511
+
512
+ return ValidationResult(
513
+ passed=passed,
514
+ issues=issues,
515
+ warnings_count=0,
516
+ errors_count=0 if reset_successful else 0,
517
+ critical_count=0 if reset_successful else 1,
518
+ )