dory-processor-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. dory/__init__.py +101 -0
  2. dory/auth/__init__.py +10 -0
  3. dory/auth/oauth2.py +153 -0
  4. dory/auto_instrument.py +142 -0
  5. dory/cli/__init__.py +5 -0
  6. dory/cli/main.py +137 -0
  7. dory/cli/templates.py +123 -0
  8. dory/config/__init__.py +23 -0
  9. dory/config/defaults.py +24 -0
  10. dory/config/loader.py +430 -0
  11. dory/config/presets.py +73 -0
  12. dory/config/schema.py +84 -0
  13. dory/core/__init__.py +27 -0
  14. dory/core/app.py +434 -0
  15. dory/core/context.py +209 -0
  16. dory/core/lifecycle.py +214 -0
  17. dory/core/meta.py +121 -0
  18. dory/core/modes.py +479 -0
  19. dory/core/processor.py +564 -0
  20. dory/core/signals.py +122 -0
  21. dory/decorators.py +142 -0
  22. dory/edge/__init__.py +88 -0
  23. dory/edge/adaptive.py +644 -0
  24. dory/edge/detector.py +546 -0
  25. dory/edge/fencing.py +488 -0
  26. dory/edge/heartbeat.py +598 -0
  27. dory/edge/role.py +419 -0
  28. dory/errors/__init__.py +139 -0
  29. dory/errors/classification.py +362 -0
  30. dory/errors/codes.py +498 -0
  31. dory/geo/__init__.py +40 -0
  32. dory/geo/geolocalizer.py +1034 -0
  33. dory/health/__init__.py +12 -0
  34. dory/health/probes.py +210 -0
  35. dory/health/server.py +635 -0
  36. dory/k8s/__init__.py +80 -0
  37. dory/k8s/annotation_watcher.py +184 -0
  38. dory/k8s/client.py +251 -0
  39. dory/k8s/labels.py +505 -0
  40. dory/k8s/pod_metadata.py +182 -0
  41. dory/logging/__init__.py +9 -0
  42. dory/logging/logger.py +148 -0
  43. dory/metrics/__init__.py +7 -0
  44. dory/metrics/collector.py +301 -0
  45. dory/middleware/__init__.py +46 -0
  46. dory/middleware/connection_tracker.py +608 -0
  47. dory/middleware/request_id.py +325 -0
  48. dory/middleware/request_tracker.py +511 -0
  49. dory/migration/__init__.py +33 -0
  50. dory/migration/configmap.py +232 -0
  51. dory/migration/s3_store.py +594 -0
  52. dory/migration/serialization.py +135 -0
  53. dory/migration/state_manager.py +286 -0
  54. dory/migration/transfer.py +382 -0
  55. dory/monitoring/__init__.py +29 -0
  56. dory/monitoring/opentelemetry.py +489 -0
  57. dory/output/__init__.py +31 -0
  58. dory/output/envelope.py +137 -0
  59. dory/output/formatter.py +113 -0
  60. dory/output/rabbitmq.py +632 -0
  61. dory/output/routing.py +318 -0
  62. dory/output/validator.py +199 -0
  63. dory/py.typed +2 -0
  64. dory/recovery/__init__.py +60 -0
  65. dory/recovery/golden_image.py +487 -0
  66. dory/recovery/golden_snapshot.py +713 -0
  67. dory/recovery/golden_validator.py +518 -0
  68. dory/recovery/partial_recovery.py +482 -0
  69. dory/recovery/recovery_decision.py +242 -0
  70. dory/recovery/restart_detector.py +142 -0
  71. dory/recovery/state_validator.py +183 -0
  72. dory/resilience/__init__.py +45 -0
  73. dory/resilience/circuit_breaker.py +457 -0
  74. dory/resilience/retry.py +389 -0
  75. dory/simple.py +342 -0
  76. dory/types.py +68 -0
  77. dory/utils/__init__.py +31 -0
  78. dory/utils/errors.py +59 -0
  79. dory/utils/retry.py +115 -0
  80. dory/utils/timeout.py +80 -0
  81. dory_processor_sdk-0.0.1.dist-info/METADATA +424 -0
  82. dory_processor_sdk-0.0.1.dist-info/RECORD +86 -0
  83. dory_processor_sdk-0.0.1.dist-info/WHEEL +5 -0
  84. dory_processor_sdk-0.0.1.dist-info/entry_points.txt +2 -0
  85. dory_processor_sdk-0.0.1.dist-info/licenses/LICENSE +201 -0
  86. dory_processor_sdk-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,713 @@
1
+ """
2
+ Golden Snapshot Manager
3
+
4
+ Captures and manages golden snapshots of processor state to prevent
5
+ 100% data loss during resets. Implements:
6
+ - Snapshot capture with checksums
7
+ - Versioned snapshot storage using StateManager
8
+ - Snapshot validation
9
+ - Restoration from snapshots
10
+
11
+ Storage: Uses StateManager to persist snapshots to the same backend
12
+ as processor state (ConfigMap in production, local file in development).
13
+ This ensures snapshots survive pod restarts and node failures.
14
+ """
15
+
16
+ import asyncio
17
+ import hashlib
18
+ import json
19
+ import logging
20
+ import time
21
+ from dataclasses import dataclass, field, asdict
22
+ from enum import Enum
23
+ from typing import Any, Dict, Optional, List, Callable, TYPE_CHECKING
24
+ import gzip
25
+ import base64
26
+
27
+ if TYPE_CHECKING:
28
+ from dory.migration.state_manager import StateManager
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class SnapshotStorageError(Exception):
34
+ """Raised when snapshot storage operations fail."""
35
+ pass
36
+
37
+
38
+ class SnapshotValidationError(Exception):
39
+ """Raised when snapshot validation fails."""
40
+ pass
41
+
42
+
43
+ class SnapshotFormat(Enum):
44
+ """Snapshot storage format."""
45
+ JSON = "json"
46
+ JSON_GZ = "json.gz" # Compressed JSON
47
+ BINARY = "binary"
48
+
49
+
50
+ @dataclass
51
+ class SnapshotMetadata:
52
+ """
53
+ Metadata about a golden snapshot.
54
+
55
+ Includes version, timestamps, checksums, and size information.
56
+ """
57
+ snapshot_id: str
58
+ processor_id: str
59
+ created_at: float
60
+ state_version: str
61
+ checksum: str
62
+ size_bytes: int
63
+ compressed: bool = False
64
+ format: str = "json"
65
+ validation_passed: bool = True
66
+ restore_count: int = 0
67
+ last_restored_at: Optional[float] = None
68
+ tags: Dict[str, str] = field(default_factory=dict)
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ """Convert to dictionary."""
72
+ return asdict(self)
73
+
74
+ @classmethod
75
+ def from_dict(cls, data: Dict[str, Any]) -> "SnapshotMetadata":
76
+ """Create from dictionary."""
77
+ return cls(**data)
78
+
79
+ def age_seconds(self) -> float:
80
+ """Get age of snapshot in seconds."""
81
+ return time.time() - self.created_at
82
+
83
+
84
+ @dataclass
85
+ class Snapshot:
86
+ """
87
+ Complete snapshot including metadata and state data.
88
+ """
89
+ metadata: SnapshotMetadata
90
+ state_data: Dict[str, Any]
91
+
92
+ def to_dict(self) -> Dict[str, Any]:
93
+ """Convert to dictionary."""
94
+ return {
95
+ "metadata": self.metadata.to_dict(),
96
+ "state_data": self.state_data
97
+ }
98
+
99
+ @classmethod
100
+ def from_dict(cls, data: Dict[str, Any]) -> "Snapshot":
101
+ """Create from dictionary."""
102
+ return cls(
103
+ metadata=SnapshotMetadata.from_dict(data["metadata"]),
104
+ state_data=data["state_data"]
105
+ )
106
+
107
+
108
+ class GoldenSnapshotManager:
109
+ """
110
+ Manages golden snapshots of processor state.
111
+
112
+ Features:
113
+ - Automatic snapshot capture at key points
114
+ - Checksum validation
115
+ - Multiple snapshot versions
116
+ - Compression support
117
+ - Restore with validation
118
+ - Snapshot lifecycle management
119
+ - Persistent storage via StateManager (ConfigMap/S3/PVC)
120
+
121
+ Usage:
122
+ from dory.migration.state_manager import StateManager
123
+
124
+ state_manager = StateManager(backend="configmap")
125
+ manager = GoldenSnapshotManager(state_manager=state_manager)
126
+
127
+ # Capture snapshot
128
+ snapshot = await manager.capture_snapshot(
129
+ processor_id="my-processor",
130
+ state_data={"key": "value"},
131
+ tags={"version": "1.0"}
132
+ )
133
+
134
+ # List snapshots
135
+ snapshots = await manager.list_snapshots(processor_id="my-processor")
136
+
137
+ # Restore from snapshot
138
+ state = await manager.restore_snapshot(snapshot.metadata.snapshot_id)
139
+ """
140
+
141
+ # Key prefix for snapshot storage
142
+ SNAPSHOT_KEY_PREFIX = "snapshot"
143
+
144
+ def __init__(
145
+ self,
146
+ state_manager: "StateManager",
147
+ max_snapshots_per_processor: int = 5,
148
+ compression_enabled: bool = True,
149
+ checksum_algorithm: str = "sha256",
150
+ auto_cleanup: bool = True,
151
+ on_capture: Optional[Callable] = None,
152
+ on_restore: Optional[Callable] = None,
153
+ ):
154
+ """
155
+ Initialize golden snapshot manager.
156
+
157
+ Args:
158
+ state_manager: StateManager instance for persistent storage
159
+ max_snapshots_per_processor: Maximum snapshots per processor
160
+ compression_enabled: Whether to compress snapshots
161
+ checksum_algorithm: Algorithm for checksums (sha256, md5)
162
+ auto_cleanup: Automatically cleanup old snapshots
163
+ on_capture: Callback when snapshot is captured
164
+ on_restore: Callback when snapshot is restored
165
+ """
166
+ self._state_manager = state_manager
167
+ self.max_snapshots_per_processor = max_snapshots_per_processor
168
+ self.compression_enabled = compression_enabled
169
+ self.checksum_algorithm = checksum_algorithm
170
+ self.auto_cleanup = auto_cleanup
171
+ self.on_capture = on_capture
172
+ self.on_restore = on_restore
173
+
174
+ # In-memory cache of snapshot metadata (loaded on first access)
175
+ self._snapshot_cache: Dict[str, List[Dict[str, Any]]] = {}
176
+ self._cache_lock = asyncio.Lock()
177
+
178
+ # Metrics
179
+ self._capture_count = 0
180
+ self._restore_count = 0
181
+ self._validation_failures = 0
182
+
183
+ logger.info(
184
+ f"GoldenSnapshotManager initialized: "
185
+ f"compression={compression_enabled}, max_per_processor={max_snapshots_per_processor}"
186
+ )
187
+
188
+ async def capture_snapshot(
189
+ self,
190
+ processor_id: str,
191
+ state_data: Dict[str, Any],
192
+ state_version: str = "1.0",
193
+ tags: Optional[Dict[str, str]] = None,
194
+ validate_before_save: bool = True,
195
+ ) -> Snapshot:
196
+ """
197
+ Capture a golden snapshot of processor state.
198
+
199
+ Args:
200
+ processor_id: ID of processor
201
+ state_data: State data to snapshot
202
+ state_version: Version of state schema
203
+ tags: Optional tags for the snapshot
204
+ validate_before_save: Validate state before saving
205
+
206
+ Returns:
207
+ Captured snapshot with metadata
208
+
209
+ Raises:
210
+ SnapshotValidationError: If validation fails
211
+ SnapshotStorageError: If storage fails
212
+ """
213
+ logger.info(f"Capturing snapshot for processor {processor_id}")
214
+
215
+ # Validate state data
216
+ if validate_before_save:
217
+ if not self._validate_state_data(state_data):
218
+ raise SnapshotValidationError("State data validation failed")
219
+
220
+ # Generate snapshot ID
221
+ snapshot_id = self._generate_snapshot_id(processor_id)
222
+
223
+ # Serialize state data
224
+ state_json = json.dumps(state_data, sort_keys=True)
225
+
226
+ # Compress if enabled
227
+ if self.compression_enabled:
228
+ state_bytes = gzip.compress(state_json.encode())
229
+ compressed = True
230
+ format_type = SnapshotFormat.JSON_GZ.value
231
+ else:
232
+ state_bytes = state_json.encode()
233
+ compressed = False
234
+ format_type = SnapshotFormat.JSON.value
235
+
236
+ # Calculate checksum
237
+ checksum = self._calculate_checksum(state_bytes)
238
+
239
+ # Create metadata
240
+ metadata = SnapshotMetadata(
241
+ snapshot_id=snapshot_id,
242
+ processor_id=processor_id,
243
+ created_at=time.time(),
244
+ state_version=state_version,
245
+ checksum=checksum,
246
+ size_bytes=len(state_bytes),
247
+ compressed=compressed,
248
+ format=format_type,
249
+ tags=tags or {},
250
+ )
251
+
252
+ # Create snapshot
253
+ snapshot = Snapshot(metadata=metadata, state_data=state_data)
254
+
255
+ # Save to storage
256
+ try:
257
+ await self._save_snapshot(snapshot, state_bytes)
258
+ except Exception as e:
259
+ logger.error(f"Failed to save snapshot: {e}")
260
+ raise SnapshotStorageError(f"Failed to save snapshot: {e}")
261
+
262
+ # Update metrics
263
+ self._capture_count += 1
264
+
265
+ # Cleanup old snapshots if auto_cleanup enabled
266
+ if self.auto_cleanup:
267
+ await self._cleanup_old_snapshots(processor_id)
268
+
269
+ # Call capture callback
270
+ if self.on_capture:
271
+ try:
272
+ if asyncio.iscoroutinefunction(self.on_capture):
273
+ await self.on_capture(snapshot)
274
+ else:
275
+ self.on_capture(snapshot)
276
+ except Exception as e:
277
+ logger.warning(f"Capture callback failed: {e}")
278
+
279
+ logger.info(
280
+ f"Snapshot captured: id={snapshot_id}, size={len(state_bytes)} bytes, "
281
+ f"compressed={compressed}, checksum={checksum[:8]}..."
282
+ )
283
+
284
+ return snapshot
285
+
286
+ async def restore_snapshot(
287
+ self,
288
+ snapshot_id: str,
289
+ validate_checksum: bool = True,
290
+ update_metadata: bool = True,
291
+ ) -> Dict[str, Any]:
292
+ """
293
+ Restore state from a snapshot.
294
+
295
+ Args:
296
+ snapshot_id: ID of snapshot to restore
297
+ validate_checksum: Validate checksum before restoring
298
+ update_metadata: Update metadata (restore count, timestamp)
299
+
300
+ Returns:
301
+ Restored state data
302
+
303
+ Raises:
304
+ SnapshotValidationError: If validation fails
305
+ SnapshotStorageError: If snapshot not found or load fails
306
+ """
307
+ logger.info(f"Restoring snapshot {snapshot_id}")
308
+
309
+ # Load snapshot
310
+ try:
311
+ snapshot, state_bytes = await self._load_snapshot(snapshot_id)
312
+ except Exception as e:
313
+ logger.error(f"Failed to load snapshot: {e}")
314
+ raise SnapshotStorageError(f"Failed to load snapshot: {e}")
315
+
316
+ # Validate checksum
317
+ if validate_checksum:
318
+ calculated_checksum = self._calculate_checksum(state_bytes)
319
+ if calculated_checksum != snapshot.metadata.checksum:
320
+ self._validation_failures += 1
321
+ raise SnapshotValidationError(
322
+ f"Checksum mismatch: expected {snapshot.metadata.checksum}, "
323
+ f"got {calculated_checksum}"
324
+ )
325
+
326
+ # Update metadata
327
+ if update_metadata:
328
+ snapshot.metadata.restore_count += 1
329
+ snapshot.metadata.last_restored_at = time.time()
330
+ await self._update_metadata(snapshot.metadata)
331
+
332
+ # Update metrics
333
+ self._restore_count += 1
334
+
335
+ # Call restore callback
336
+ if self.on_restore:
337
+ try:
338
+ if asyncio.iscoroutinefunction(self.on_restore):
339
+ await self.on_restore(snapshot)
340
+ else:
341
+ self.on_restore(snapshot)
342
+ except Exception as e:
343
+ logger.warning(f"Restore callback failed: {e}")
344
+
345
+ logger.info(
346
+ f"Snapshot restored: id={snapshot_id}, "
347
+ f"restore_count={snapshot.metadata.restore_count}"
348
+ )
349
+
350
+ return snapshot.state_data
351
+
352
+ async def list_snapshots(
353
+ self,
354
+ processor_id: Optional[str] = None,
355
+ limit: Optional[int] = None,
356
+ ) -> List[SnapshotMetadata]:
357
+ """
358
+ List available snapshots.
359
+
360
+ Args:
361
+ processor_id: Filter by processor ID (required - StateManager requires processor ID)
362
+ limit: Maximum number of snapshots to return
363
+
364
+ Returns:
365
+ List of snapshot metadata, sorted by created_at (newest first)
366
+
367
+ Raises:
368
+ ValueError: If processor_id is not provided
369
+ """
370
+ if processor_id is None:
371
+ raise ValueError("processor_id is required for listing snapshots")
372
+
373
+ # Load snapshots from StateManager
374
+ snapshot_entries = await self._load_all_snapshots(processor_id)
375
+
376
+ # Extract metadata
377
+ snapshots = []
378
+ for entry in snapshot_entries:
379
+ try:
380
+ metadata = SnapshotMetadata.from_dict(entry["metadata"])
381
+ snapshots.append(metadata)
382
+ except Exception as e:
383
+ logger.warning(f"Failed to parse snapshot metadata: {e}")
384
+
385
+ # Already sorted by created_at (newest first) in _load_all_snapshots
386
+ # Apply limit
387
+ if limit is not None:
388
+ snapshots = snapshots[:limit]
389
+
390
+ return snapshots
391
+
392
+ async def delete_snapshot(self, snapshot_id: str) -> bool:
393
+ """
394
+ Delete a snapshot.
395
+
396
+ Args:
397
+ snapshot_id: ID of snapshot to delete
398
+
399
+ Returns:
400
+ True if deleted successfully
401
+ """
402
+ logger.info(f"Deleting snapshot {snapshot_id}")
403
+
404
+ try:
405
+ # Extract processor_id from snapshot_id (format: {processor_id}_{timestamp})
406
+ parts = snapshot_id.rsplit("_", 1)
407
+ if len(parts) != 2:
408
+ logger.error(f"Invalid snapshot ID format: {snapshot_id}")
409
+ return False
410
+
411
+ processor_id = parts[0]
412
+
413
+ # Load all snapshots
414
+ snapshots = await self._load_all_snapshots(processor_id)
415
+
416
+ # Find and remove the snapshot
417
+ original_count = len(snapshots)
418
+ snapshots = [
419
+ s for s in snapshots
420
+ if s["metadata"]["snapshot_id"] != snapshot_id
421
+ ]
422
+
423
+ if len(snapshots) == original_count:
424
+ logger.warning(f"Snapshot not found: {snapshot_id}")
425
+ return False
426
+
427
+ # Save back (or delete if no snapshots left)
428
+ if snapshots:
429
+ await self._save_all_snapshots(processor_id, snapshots)
430
+ else:
431
+ # No snapshots left, delete the entire state entry
432
+ storage_key = self._get_storage_key(processor_id)
433
+ await self._state_manager.delete_state(storage_key)
434
+ # Clear cache
435
+ self._snapshot_cache.pop(processor_id, None)
436
+
437
+ logger.info(f"Snapshot deleted: {snapshot_id}")
438
+ return True
439
+
440
+ except Exception as e:
441
+ logger.error(f"Failed to delete snapshot {snapshot_id}: {e}")
442
+ return False
443
+
444
+ async def get_latest_snapshot(
445
+ self,
446
+ processor_id: str
447
+ ) -> Optional[SnapshotMetadata]:
448
+ """
449
+ Get the latest snapshot for a processor.
450
+
451
+ Args:
452
+ processor_id: Processor ID
453
+
454
+ Returns:
455
+ Latest snapshot metadata, or None if no snapshots exist
456
+ """
457
+ snapshots = await self.list_snapshots(processor_id=processor_id, limit=1)
458
+ return snapshots[0] if snapshots else None
459
+
460
+ async def delete_all_snapshots(self, processor_id: str) -> int:
461
+ """
462
+ Delete all snapshots for a processor.
463
+
464
+ Args:
465
+ processor_id: Processor ID
466
+
467
+ Returns:
468
+ Number of snapshots deleted
469
+ """
470
+ logger.info(f"Deleting all snapshots for processor {processor_id}")
471
+
472
+ # Get count before deletion
473
+ snapshots = await self._load_all_snapshots(processor_id)
474
+ count = len(snapshots)
475
+
476
+ if count == 0:
477
+ return 0
478
+
479
+ # Delete the entire state entry
480
+ storage_key = self._get_storage_key(processor_id)
481
+ await self._state_manager.delete_state(storage_key)
482
+
483
+ # Clear cache
484
+ self._snapshot_cache.pop(processor_id, None)
485
+
486
+ logger.info(f"Deleted {count} snapshots for processor {processor_id}")
487
+ return count
488
+
489
+ def clear_cache(self, processor_id: Optional[str] = None) -> None:
490
+ """
491
+ Clear the in-memory snapshot cache.
492
+
493
+ Args:
494
+ processor_id: Clear cache for specific processor, or all if None
495
+ """
496
+ if processor_id:
497
+ self._snapshot_cache.pop(processor_id, None)
498
+ else:
499
+ self._snapshot_cache.clear()
500
+
501
+ def get_stats(self) -> Dict[str, Any]:
502
+ """
503
+ Get snapshot manager statistics.
504
+
505
+ Returns:
506
+ Dictionary of statistics
507
+ """
508
+ return {
509
+ "storage_backend": str(self._state_manager._backend),
510
+ "capture_count": self._capture_count,
511
+ "restore_count": self._restore_count,
512
+ "validation_failures": self._validation_failures,
513
+ "compression_enabled": self.compression_enabled,
514
+ "max_snapshots_per_processor": self.max_snapshots_per_processor,
515
+ }
516
+
517
+ # Private methods
518
+
519
+ def _get_storage_key(self, processor_id: str) -> str:
520
+ """Get the StateManager key for storing snapshots."""
521
+ return f"{processor_id}-{self.SNAPSHOT_KEY_PREFIX}"
522
+
523
+ def _generate_snapshot_id(self, processor_id: str) -> str:
524
+ """Generate unique snapshot ID."""
525
+ timestamp = int(time.time() * 1000)
526
+ return f"{processor_id}_{timestamp}"
527
+
528
+ def _calculate_checksum(self, data: bytes) -> str:
529
+ """Calculate checksum for data."""
530
+ if self.checksum_algorithm == "sha256":
531
+ return hashlib.sha256(data).hexdigest()
532
+ elif self.checksum_algorithm == "md5":
533
+ return hashlib.md5(data).hexdigest()
534
+ else:
535
+ raise ValueError(f"Unsupported checksum algorithm: {self.checksum_algorithm}")
536
+
537
+ def _validate_state_data(self, state_data: Dict[str, Any]) -> bool:
538
+ """
539
+ Validate state data before capture.
540
+
541
+ Args:
542
+ state_data: State data to validate
543
+
544
+ Returns:
545
+ True if valid
546
+ """
547
+ # Basic validation
548
+ if not isinstance(state_data, dict):
549
+ logger.error("State data must be a dictionary")
550
+ return False
551
+
552
+ # Check if serializable
553
+ try:
554
+ json.dumps(state_data)
555
+ except (TypeError, ValueError) as e:
556
+ logger.error(f"State data is not JSON serializable: {e}")
557
+ return False
558
+
559
+ return True
560
+
561
+ async def _load_all_snapshots(self, processor_id: str) -> List[Dict[str, Any]]:
562
+ """
563
+ Load all snapshots for a processor from StateManager.
564
+
565
+ Returns:
566
+ List of snapshot entries (each with 'metadata' and 'data_b64' keys)
567
+ """
568
+ async with self._cache_lock:
569
+ # Check cache first
570
+ if processor_id in self._snapshot_cache:
571
+ return self._snapshot_cache[processor_id]
572
+
573
+ storage_key = self._get_storage_key(processor_id)
574
+ state = await self._state_manager.load_state(storage_key)
575
+
576
+ if state is None:
577
+ self._snapshot_cache[processor_id] = []
578
+ return []
579
+
580
+ snapshots = state.get("snapshots", [])
581
+ self._snapshot_cache[processor_id] = snapshots
582
+ return snapshots
583
+
584
+ async def _save_all_snapshots(
585
+ self, processor_id: str, snapshots: List[Dict[str, Any]]
586
+ ) -> None:
587
+ """
588
+ Save all snapshots for a processor to StateManager.
589
+
590
+ Args:
591
+ processor_id: Processor ID
592
+ snapshots: List of snapshot entries
593
+ """
594
+ storage_key = self._get_storage_key(processor_id)
595
+
596
+ state = {
597
+ "processor_id": processor_id,
598
+ "snapshot_count": len(snapshots),
599
+ "snapshots": snapshots,
600
+ }
601
+
602
+ await self._state_manager.save_state(storage_key, state)
603
+
604
+ # Update cache
605
+ async with self._cache_lock:
606
+ self._snapshot_cache[processor_id] = snapshots
607
+
608
+ async def _save_snapshot(self, snapshot: Snapshot, state_bytes: bytes) -> None:
609
+ """Save snapshot to StateManager."""
610
+ processor_id = snapshot.metadata.processor_id
611
+
612
+ # Load existing snapshots
613
+ snapshots = await self._load_all_snapshots(processor_id)
614
+
615
+ # Create snapshot entry with base64-encoded data
616
+ snapshot_entry = {
617
+ "metadata": snapshot.metadata.to_dict(),
618
+ "data_b64": base64.b64encode(state_bytes).decode("ascii"),
619
+ }
620
+
621
+ # Add new snapshot at the beginning (newest first)
622
+ snapshots.insert(0, snapshot_entry)
623
+
624
+ # Save back to StateManager
625
+ await self._save_all_snapshots(processor_id, snapshots)
626
+
627
+ async def _load_snapshot(self, snapshot_id: str) -> tuple[Snapshot, bytes]:
628
+ """Load snapshot from StateManager."""
629
+ # Extract processor_id from snapshot_id (format: {processor_id}_{timestamp})
630
+ parts = snapshot_id.rsplit("_", 1)
631
+ if len(parts) != 2:
632
+ raise SnapshotStorageError(f"Invalid snapshot ID format: {snapshot_id}")
633
+
634
+ processor_id = parts[0]
635
+
636
+ # Load all snapshots for this processor
637
+ snapshots = await self._load_all_snapshots(processor_id)
638
+
639
+ # Find the specific snapshot
640
+ snapshot_entry = None
641
+ for entry in snapshots:
642
+ if entry["metadata"]["snapshot_id"] == snapshot_id:
643
+ snapshot_entry = entry
644
+ break
645
+
646
+ if snapshot_entry is None:
647
+ raise SnapshotStorageError(f"Snapshot not found: {snapshot_id}")
648
+
649
+ # Decode data
650
+ state_bytes = base64.b64decode(snapshot_entry["data_b64"])
651
+ metadata = SnapshotMetadata.from_dict(snapshot_entry["metadata"])
652
+
653
+ # Decompress if needed
654
+ if metadata.compressed:
655
+ state_json = gzip.decompress(state_bytes).decode()
656
+ else:
657
+ state_json = state_bytes.decode()
658
+
659
+ # Parse JSON
660
+ state_data = json.loads(state_json)
661
+
662
+ # Create snapshot object
663
+ snapshot = Snapshot(metadata=metadata, state_data=state_data)
664
+
665
+ return snapshot, state_bytes
666
+
667
+ async def _update_metadata(self, metadata: SnapshotMetadata) -> None:
668
+ """Update snapshot metadata in StateManager."""
669
+ processor_id = metadata.processor_id
670
+
671
+ # Load all snapshots
672
+ snapshots = await self._load_all_snapshots(processor_id)
673
+
674
+ # Find and update the specific snapshot
675
+ for entry in snapshots:
676
+ if entry["metadata"]["snapshot_id"] == metadata.snapshot_id:
677
+ entry["metadata"] = metadata.to_dict()
678
+ break
679
+
680
+ # Save back
681
+ await self._save_all_snapshots(processor_id, snapshots)
682
+
683
+ async def _cleanup_old_snapshots(self, processor_id: str) -> int:
684
+ """
685
+ Cleanup old snapshots exceeding max limit.
686
+
687
+ Args:
688
+ processor_id: Processor ID
689
+
690
+ Returns:
691
+ Number of snapshots deleted
692
+ """
693
+ snapshots = await self._load_all_snapshots(processor_id)
694
+
695
+ # Keep only max_snapshots_per_processor newest snapshots
696
+ if len(snapshots) <= self.max_snapshots_per_processor:
697
+ return 0
698
+
699
+ # Count snapshots to delete
700
+ deleted_count = len(snapshots) - self.max_snapshots_per_processor
701
+
702
+ # Keep only the newest snapshots
703
+ snapshots = snapshots[:self.max_snapshots_per_processor]
704
+
705
+ # Save back
706
+ await self._save_all_snapshots(processor_id, snapshots)
707
+
708
+ if deleted_count > 0:
709
+ logger.info(
710
+ f"Cleaned up {deleted_count} old snapshots for processor {processor_id}"
711
+ )
712
+
713
+ return deleted_count