dory-processor-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. dory/__init__.py +101 -0
  2. dory/auth/__init__.py +10 -0
  3. dory/auth/oauth2.py +153 -0
  4. dory/auto_instrument.py +142 -0
  5. dory/cli/__init__.py +5 -0
  6. dory/cli/main.py +137 -0
  7. dory/cli/templates.py +123 -0
  8. dory/config/__init__.py +23 -0
  9. dory/config/defaults.py +24 -0
  10. dory/config/loader.py +430 -0
  11. dory/config/presets.py +73 -0
  12. dory/config/schema.py +84 -0
  13. dory/core/__init__.py +27 -0
  14. dory/core/app.py +434 -0
  15. dory/core/context.py +209 -0
  16. dory/core/lifecycle.py +214 -0
  17. dory/core/meta.py +121 -0
  18. dory/core/modes.py +479 -0
  19. dory/core/processor.py +564 -0
  20. dory/core/signals.py +122 -0
  21. dory/decorators.py +142 -0
  22. dory/edge/__init__.py +88 -0
  23. dory/edge/adaptive.py +644 -0
  24. dory/edge/detector.py +546 -0
  25. dory/edge/fencing.py +488 -0
  26. dory/edge/heartbeat.py +598 -0
  27. dory/edge/role.py +419 -0
  28. dory/errors/__init__.py +139 -0
  29. dory/errors/classification.py +362 -0
  30. dory/errors/codes.py +498 -0
  31. dory/geo/__init__.py +40 -0
  32. dory/geo/geolocalizer.py +1034 -0
  33. dory/health/__init__.py +12 -0
  34. dory/health/probes.py +210 -0
  35. dory/health/server.py +635 -0
  36. dory/k8s/__init__.py +80 -0
  37. dory/k8s/annotation_watcher.py +184 -0
  38. dory/k8s/client.py +251 -0
  39. dory/k8s/labels.py +505 -0
  40. dory/k8s/pod_metadata.py +182 -0
  41. dory/logging/__init__.py +9 -0
  42. dory/logging/logger.py +148 -0
  43. dory/metrics/__init__.py +7 -0
  44. dory/metrics/collector.py +301 -0
  45. dory/middleware/__init__.py +46 -0
  46. dory/middleware/connection_tracker.py +608 -0
  47. dory/middleware/request_id.py +325 -0
  48. dory/middleware/request_tracker.py +511 -0
  49. dory/migration/__init__.py +33 -0
  50. dory/migration/configmap.py +232 -0
  51. dory/migration/s3_store.py +594 -0
  52. dory/migration/serialization.py +135 -0
  53. dory/migration/state_manager.py +286 -0
  54. dory/migration/transfer.py +382 -0
  55. dory/monitoring/__init__.py +29 -0
  56. dory/monitoring/opentelemetry.py +489 -0
  57. dory/output/__init__.py +31 -0
  58. dory/output/envelope.py +137 -0
  59. dory/output/formatter.py +113 -0
  60. dory/output/rabbitmq.py +632 -0
  61. dory/output/routing.py +318 -0
  62. dory/output/validator.py +199 -0
  63. dory/py.typed +2 -0
  64. dory/recovery/__init__.py +60 -0
  65. dory/recovery/golden_image.py +487 -0
  66. dory/recovery/golden_snapshot.py +713 -0
  67. dory/recovery/golden_validator.py +518 -0
  68. dory/recovery/partial_recovery.py +482 -0
  69. dory/recovery/recovery_decision.py +242 -0
  70. dory/recovery/restart_detector.py +142 -0
  71. dory/recovery/state_validator.py +183 -0
  72. dory/resilience/__init__.py +45 -0
  73. dory/resilience/circuit_breaker.py +457 -0
  74. dory/resilience/retry.py +389 -0
  75. dory/simple.py +342 -0
  76. dory/types.py +68 -0
  77. dory/utils/__init__.py +31 -0
  78. dory/utils/errors.py +59 -0
  79. dory/utils/retry.py +115 -0
  80. dory/utils/timeout.py +80 -0
  81. dory_processor_sdk-0.0.1.dist-info/METADATA +424 -0
  82. dory_processor_sdk-0.0.1.dist-info/RECORD +86 -0
  83. dory_processor_sdk-0.0.1.dist-info/WHEEL +5 -0
  84. dory_processor_sdk-0.0.1.dist-info/entry_points.txt +2 -0
  85. dory_processor_sdk-0.0.1.dist-info/licenses/LICENSE +201 -0
  86. dory_processor_sdk-0.0.1.dist-info/top_level.txt +1 -0
dory/core/processor.py ADDED
@@ -0,0 +1,564 @@
1
+ """
2
+ BaseProcessor - Abstract base class for processor implementations.
3
+
4
+ Developers implement this class to create their processor applications.
5
+ The SDK handles all lifecycle, state management, and health concerns.
6
+
7
+ Auto-initialized components (if available):
8
+ - Circuit breakers (self.circuit_breakers)
9
+ - Error classifier (self.error_classifier)
10
+ - OpenTelemetry (self.otel)
11
+ - Request tracker (self.request_tracker)
12
+ - Request ID middleware (self.request_id_middleware)
13
+ - Connection tracker (self.connection_tracker)
14
+ - RabbitMQ publisher (self.publisher)
15
+
16
+ All handler methods are automatically instrumented via AutoInstrumentMeta.
17
+ """
18
+
19
+ import asyncio
20
+ import logging
21
+ from abc import ABC, abstractmethod
22
+ from typing import TYPE_CHECKING, AsyncIterator, Dict, Any, Optional
23
+
24
+ from dory.decorators import get_stateful_vars, set_stateful_vars
25
+ from dory.core.meta import AutoInstrumentMeta
26
+
27
+ if TYPE_CHECKING:
28
+ from dory.core.context import ExecutionContext
29
+ from dory.geo import GeoPoint
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class BaseProcessor(ABC, metaclass=AutoInstrumentMeta):
35
+ """
36
+ Abstract base class for processor implementations.
37
+
38
+ Required method:
39
+ - run(): Main processing loop
40
+
41
+ Optional methods (have sensible defaults):
42
+ - startup(): Initialize resources (default: no-op)
43
+ - shutdown(): Cleanup resources (default: no-op)
44
+ - get_state(): Return state dict (default: returns @stateful vars or {})
45
+ - restore_state(): Restore state (default: restores @stateful vars)
46
+
47
+ Optional fault handling hooks:
48
+ - on_state_restore_failed(): Handle state restore errors
49
+ - on_rapid_restart_detected(): Handle restart loop
50
+ - on_health_check_failed(): Handle health check errors
51
+ - reset_caches(): Clean caches during golden image reset
52
+
53
+ Usage:
54
+ # Minimal implementation (just run method)
55
+ class MyProcessor(BaseProcessor):
56
+ counter = stateful(0)
57
+
58
+ async def run(self):
59
+ async for _ in self.run_loop(interval=1):
60
+ self.counter += 1
61
+
62
+ # Full implementation
63
+ class MyProcessor(BaseProcessor):
64
+ async def startup(self):
65
+ self.model = load_model()
66
+
67
+ async def run(self):
68
+ while not self.context.is_shutdown_requested():
69
+ process()
70
+
71
+ async def shutdown(self):
72
+ self.model.close()
73
+
74
+ def get_state(self):
75
+ return {"processed": self.count}
76
+
77
+ async def restore_state(self, state):
78
+ self.count = state.get("processed", 0)
79
+ """
80
+
81
+ # Optional: Define state schema for validation
82
+ # Schema example: {'processed_count': int, 'last_frame_id': int}
83
+ state_schema: dict[str, type] | None = None
84
+
85
+ # Context is auto-injected by DoryApp (no need to accept in __init__)
86
+ context: "ExecutionContext"
87
+
88
+ # =========================================================================
89
+ # SDK-Managed Components (auto-initialized with defaults)
90
+ # Declared here for IDE autocomplete and type checking.
91
+ # =========================================================================
92
+ error_classifier: Any
93
+ circuit_breakers: Dict[str, Any]
94
+ otel: Optional[Any]
95
+ request_tracker: Optional[Any]
96
+ request_id_middleware: Optional[Any]
97
+ connection_tracker: Optional[Any]
98
+ publisher: Optional[Any]
99
+
100
+ def __init__(self, context: "ExecutionContext | None" = None):
101
+ """
102
+ Initialize processor with auto-initialization of SDK components.
103
+
104
+ Args:
105
+ context: ExecutionContext (optional - will be auto-injected if not provided)
106
+
107
+ Note:
108
+ You can override __init__ and call super().__init__(context) to get
109
+ auto-initialization, or skip super() call to manually initialize.
110
+ """
111
+ if context is not None:
112
+ self.context = context
113
+
114
+ # Auto-initialize SDK components if context is available
115
+ self._auto_initialize_components()
116
+
117
+ # =========================================================================
118
+ # Required Method
119
+ # =========================================================================
120
+
121
+ @abstractmethod
122
+ async def run(self) -> None:
123
+ """
124
+ Main processing loop.
125
+
126
+ Called after startup() and restore_state(). Must check
127
+ context.is_shutdown_requested() periodically to exit gracefully.
128
+
129
+ You can use self.run_loop() helper for cleaner code:
130
+
131
+ async def run(self):
132
+ async for _ in self.run_loop(interval=1):
133
+ self.counter += 1
134
+
135
+ Or traditional while loop:
136
+
137
+ async def run(self):
138
+ while not self.context.is_shutdown_requested():
139
+ self.counter += 1
140
+ await asyncio.sleep(1)
141
+
142
+ Raises:
143
+ Any exception will cause pod crash
144
+ """
145
+ raise NotImplementedError
146
+
147
+ # =========================================================================
148
+ # Optional Lifecycle Methods (Override if needed)
149
+ # =========================================================================
150
+
151
+ async def startup(self) -> None:
152
+ """
153
+ Initialize processor resources (optional).
154
+
155
+ Called once at pod startup after __init__ but before run().
156
+ Override to load models, open connections, etc.
157
+
158
+ Default: No-op
159
+ """
160
+ pass
161
+
162
+ async def shutdown(self) -> None:
163
+ """
164
+ Cleanup processor resources (optional).
165
+
166
+ Called on graceful shutdown (SIGTERM). Has max timeout
167
+ (configurable via DORY_SHUTDOWN_TIMEOUT_SEC, default 30s).
168
+ Override to close connections, flush buffers, etc.
169
+
170
+ Default: No-op
171
+ """
172
+ pass
173
+
174
+ def get_state(self) -> dict:
175
+ """
176
+ Return state to migrate to next pod (optional).
177
+
178
+ Called during migration (must be fast, <1s). State must be
179
+ JSON-serializable.
180
+
181
+ Default: Returns all @stateful decorated attributes, or {} if none.
182
+
183
+ Override for custom state:
184
+ def get_state(self):
185
+ return {"counter": self.counter, "data": self.data}
186
+ """
187
+ # Auto-collect @stateful decorated attributes
188
+ stateful_state = get_stateful_vars(self)
189
+ if stateful_state:
190
+ return stateful_state
191
+ return {}
192
+
193
+ async def restore_state(self, state: dict) -> None:
194
+ """
195
+ Restore state from previous pod (optional).
196
+
197
+ Called after startup() but before run() if state exists.
198
+
199
+ Default: Restores all @stateful decorated attributes from state.
200
+
201
+ Override for custom restoration:
202
+ async def restore_state(self, state):
203
+ self.counter = state.get("counter", 0)
204
+ """
205
+ # Auto-restore @stateful decorated attributes
206
+ set_stateful_vars(self, state)
207
+
208
+ # =========================================================================
209
+ # Helper Methods
210
+ # =========================================================================
211
+
212
+ async def run_loop(
213
+ self,
214
+ interval: float = 1.0,
215
+ check_migration: bool = True,
216
+ ) -> AsyncIterator[int]:
217
+ """
218
+ Async iterator that yields until shutdown is requested.
219
+
220
+ Simplifies the common pattern of checking shutdown in a loop.
221
+
222
+ Args:
223
+ interval: Sleep interval between iterations (seconds)
224
+ check_migration: If True, also yields when migration is imminent
225
+
226
+ Yields:
227
+ Iteration count (0, 1, 2, ...)
228
+
229
+ Usage:
230
+ async def run(self):
231
+ async for i in self.run_loop(interval=1):
232
+ self.counter += 1
233
+ print(f"Iteration {i}")
234
+
235
+ # Equivalent to:
236
+ async def run(self):
237
+ i = 0
238
+ while not self.context.is_shutdown_requested():
239
+ self.counter += 1
240
+ print(f"Iteration {i}")
241
+ i += 1
242
+ await asyncio.sleep(1)
243
+ """
244
+ iteration = 0
245
+ while not self.context.is_shutdown_requested():
246
+ yield iteration
247
+ iteration += 1
248
+
249
+ # Check if migration is imminent
250
+ if check_migration and self.context.is_migration_imminent():
251
+ self.context.logger().info(
252
+ f"Migration imminent, completing iteration {iteration}"
253
+ )
254
+
255
+ await asyncio.sleep(interval)
256
+
257
+ def is_shutting_down(self) -> bool:
258
+ """
259
+ Convenience method to check if shutdown is requested.
260
+
261
+ Returns:
262
+ True if shutdown has been requested
263
+ """
264
+ return self.context.is_shutdown_requested()
265
+
266
+ async def publish(
267
+ self,
268
+ event_type: str,
269
+ location: "GeoPoint",
270
+ payload: dict[str, Any],
271
+ *,
272
+ headers: dict[str, Any] | None = None,
273
+ exchange: str | None = None,
274
+ ) -> None:
275
+ """Publish a single message with a geo location.
276
+
277
+ The developer provides the event type, a
278
+ :class:`~dory.geo.GeoPoint` (e.g. from
279
+ :meth:`CameraGeolocalizer.estimate`), and a payload already in
280
+ the unified envelope format. The SDK converts the coordinates
281
+ to a geohash-based routing key and publishes the payload as-is
282
+ (no additional envelope wrapping).
283
+
284
+ Examples::
285
+
286
+ loc = geo.estimate(box)
287
+ await self.publish(
288
+ "accident",
289
+ location=loc,
290
+ payload=MessageEnvelope(payload={...}).to_dict(),
291
+ )
292
+
293
+ Args:
294
+ event_type: Event type for routing (e.g., "accident", "detection").
295
+ location: A :class:`~dory.geo.GeoPoint` with ``.lat`` and
296
+ ``.lng`` attributes.
297
+ payload: Message payload in unified envelope format.
298
+ headers: Optional AMQP message headers.
299
+ exchange: Optional exchange override.
300
+
301
+ Raises:
302
+ RuntimeError: If publisher is not initialized.
303
+ ValueError: If event_type is empty or coordinates are out of range.
304
+ """
305
+ if self.publisher is None:
306
+ raise RuntimeError(
307
+ "RabbitMQ publisher not initialized. "
308
+ "Set DORY_RABBITMQ_OAUTH2_TOKEN_URL and OAuth2 credentials to enable."
309
+ )
310
+
311
+ from dory.output.routing import build_routing_key_from_geo
312
+
313
+ routing_key = build_routing_key_from_geo(
314
+ event_type, location.lat, location.lng
315
+ )
316
+ logger.info(
317
+ "Publishing message event_type=%s lat=%.6f lon=%.6f routing_key=%s",
318
+ event_type,
319
+ location.lat,
320
+ location.lng,
321
+ routing_key,
322
+ )
323
+
324
+ await self.publisher.publish(
325
+ routing_key=routing_key,
326
+ data=payload,
327
+ exchange=exchange,
328
+ headers=headers,
329
+ raw=True,
330
+ )
331
+
332
+ # =========================================================================
333
+ # Optional Fault Handling Hooks
334
+ # =========================================================================
335
+
336
+ async def on_state_restore_failed(self, error: Exception) -> bool:
337
+ """
338
+ Called if state restore fails.
339
+
340
+ Override to attempt recovery (e.g., fetch from external backup).
341
+ Return True to start with golden image, False to exit and crash.
342
+
343
+ Args:
344
+ error: Exception from restore_state() or validation
345
+
346
+ Returns:
347
+ True to continue with golden image, False to exit
348
+ """
349
+ return True # Default: continue with golden image
350
+
351
+ async def on_rapid_restart_detected(self, restart_count: int) -> bool:
352
+ """
353
+ Called if restart loop detected (3+ restarts in 5 minutes).
354
+
355
+ Override to attempt recovery (e.g., reinitialize state, reset
356
+ connections). Return True to continue, False to trigger golden reset.
357
+
358
+ Args:
359
+ restart_count: Number of restarts detected
360
+
361
+ Returns:
362
+ True to continue, False to force golden reset
363
+ """
364
+ return True # Default: continue (SDK will start golden)
365
+
366
+ async def on_health_check_failed(self, error: Exception) -> bool:
367
+ """
368
+ Called if health check fails.
369
+
370
+ Override to attempt recovery (e.g., reconnect to external services).
371
+ Return True to retry health check, False to fail.
372
+
373
+ Args:
374
+ error: Exception from health check
375
+
376
+ Returns:
377
+ True to retry, False to fail
378
+ """
379
+ return False # Default: fail health check
380
+
381
+ def reset_caches(self) -> None:
382
+ """
383
+ Called during golden image reset.
384
+
385
+ Override to clear any in-memory caches, buffers, or temporary
386
+ state that should not persist through a golden reset.
387
+ """
388
+ pass # Default: no caches to reset
389
+
390
+ # =========================================================================
391
+ # Auto-Initialization
392
+ # =========================================================================
393
+
394
+ def _auto_initialize_components(self) -> None:
395
+ """
396
+ Auto-initialize SDK components with sensible defaults.
397
+
398
+ Called automatically during __init__ if context is available.
399
+ Each component is optional — if its package is not installed,
400
+ initialization is silently skipped.
401
+ """
402
+ if not hasattr(self, "context") or self.context is None:
403
+ logger.debug("Context not available, skipping auto-initialization")
404
+ return
405
+
406
+ self._init_error_classifier()
407
+ self._init_circuit_breakers()
408
+ self._init_opentelemetry()
409
+ self._init_request_tracking()
410
+ self._init_request_id()
411
+ self._init_connection_tracking()
412
+ self._init_publisher()
413
+
414
+ logger.debug("Auto-initialization complete")
415
+
416
+ def _init_error_classifier(self) -> None:
417
+ """Initialize error classifier."""
418
+ try:
419
+ from dory.errors import ErrorClassifier
420
+ self.error_classifier = ErrorClassifier()
421
+ except ImportError:
422
+ self.error_classifier = None
423
+
424
+ def _init_circuit_breakers(self) -> None:
425
+ """Create default circuit breakers for common services."""
426
+ self.circuit_breakers = {}
427
+
428
+ try:
429
+ from dory.resilience import CircuitBreaker
430
+ except ImportError:
431
+ return
432
+
433
+ for name in ("database", "external_api", "cache"):
434
+ self.circuit_breakers[name] = CircuitBreaker(
435
+ name=name,
436
+ failure_threshold=5,
437
+ success_threshold=2,
438
+ timeout_seconds=30.0,
439
+ half_open_max_calls=3,
440
+ )
441
+
442
+ def _init_opentelemetry(self) -> None:
443
+ """Initialize OpenTelemetry with defaults.
444
+
445
+ Uses DORY_APP_VERSION env var (injected by orchestrator from DB) for
446
+ service.version. Falls back to "1.0.0" if not set.
447
+ """
448
+ self.otel = None
449
+
450
+ try:
451
+ import os
452
+ from dory.monitoring import OpenTelemetryManager
453
+ service_version = os.environ.get("DORY_APP_VERSION", "1.0.0")
454
+ self.otel = OpenTelemetryManager(
455
+ service_name="dory-app",
456
+ service_version=service_version,
457
+ environment="production",
458
+ console_export=True,
459
+ )
460
+ self.otel.initialize()
461
+ except ImportError:
462
+ pass
463
+ except Exception as e:
464
+ logger.warning(f"Failed to initialize OpenTelemetry: {e}")
465
+
466
+ def _init_request_tracking(self) -> None:
467
+ """Initialize request tracking."""
468
+ self.request_tracker = None
469
+
470
+ try:
471
+ from dory.middleware import RequestTracker
472
+ self.request_tracker = RequestTracker(
473
+ max_history=1000,
474
+ enable_history=True,
475
+ )
476
+ except ImportError:
477
+ pass
478
+ except Exception as e:
479
+ logger.warning(f"Failed to initialize request tracking: {e}")
480
+
481
+ def _init_request_id(self) -> None:
482
+ """Initialize request ID middleware."""
483
+ self.request_id_middleware = None
484
+
485
+ try:
486
+ from dory.middleware import RequestIdMiddleware
487
+ self.request_id_middleware = RequestIdMiddleware(
488
+ header_name="X-Request-ID",
489
+ log_request_id=True,
490
+ )
491
+ except ImportError:
492
+ pass
493
+ except Exception as e:
494
+ logger.warning(f"Failed to initialize request ID middleware: {e}")
495
+
496
+ def _init_connection_tracking(self) -> None:
497
+ """Initialize connection tracking."""
498
+ self.connection_tracker = None
499
+
500
+ try:
501
+ from dory.middleware import ConnectionTracker
502
+ self.connection_tracker = ConnectionTracker()
503
+ except ImportError:
504
+ pass
505
+ except Exception as e:
506
+ logger.warning(f"Failed to initialize connection tracking: {e}")
507
+
508
+ def _init_publisher(self) -> None:
509
+ """Initialize OAuth2-authenticated RabbitMQ publisher.
510
+
511
+ The publisher is created when DORY_RABBITMQ_OAUTH2_TOKEN_URL is set.
512
+ All configuration is read from environment variables.
513
+ """
514
+ self.publisher = None
515
+
516
+ import os
517
+ token_url = os.environ.get("DORY_RABBITMQ_OAUTH2_TOKEN_URL", "")
518
+ if not token_url:
519
+ return
520
+
521
+ try:
522
+ from dory.output.rabbitmq import RabbitMQPublisher, PublisherConfig
523
+ except ImportError:
524
+ return
525
+
526
+ try:
527
+ from dory.output.formatter import JSONFormatter
528
+ from dory.auth.oauth2 import OAuth2TokenProvider
529
+
530
+ publisher_config = PublisherConfig(
531
+ url="", # populated by url_provider
532
+ exchange=os.environ.get("DORY_RABBITMQ_EXCHANGE", "dory.output"),
533
+ exchange_type="topic",
534
+ durable=True,
535
+ )
536
+
537
+ token_provider = OAuth2TokenProvider(
538
+ token_url=token_url,
539
+ client_id=os.environ.get("DORY_RABBITMQ_OAUTH2_CLIENT_ID", ""),
540
+ client_secret=os.environ.get("DORY_RABBITMQ_OAUTH2_CLIENT_SECRET", ""),
541
+ scopes=["rabbitmq/write:all"],
542
+ )
543
+ _host = os.environ.get("DORY_RABBITMQ_HOST", "")
544
+ _port = 5671
545
+ _vhost = os.environ.get("DORY_RABBITMQ_VHOST", "/")
546
+ _tls = os.environ.get("DORY_RABBITMQ_TLS_ENABLED", "true").lower() == "true"
547
+
548
+ async def url_provider(
549
+ _tp=token_provider, _h=_host, _p=_port, _v=_vhost, _t=_tls
550
+ ):
551
+ return await _tp.build_amqp_url(
552
+ host=_h, port=_p, vhost=_v, tls=_t
553
+ )
554
+
555
+ self.publisher = RabbitMQPublisher(
556
+ config=publisher_config,
557
+ formatter=JSONFormatter(),
558
+ url_provider=url_provider,
559
+ )
560
+ logger.info("RabbitMQ publisher initialized (OAuth2)")
561
+
562
+ except Exception as e:
563
+ logger.warning(f"Failed to initialize RabbitMQ publisher: {e}")
564
+ self.publisher = None
dory/core/signals.py ADDED
@@ -0,0 +1,122 @@
1
+ """
2
+ SignalHandler - Handles OS signals for graceful shutdown.
3
+
4
+ Captures SIGTERM, SIGINT, and SIGUSR1 and triggers appropriate
5
+ actions in the SDK.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import signal
11
+ import sys
12
+ from typing import Callable, Awaitable
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class SignalHandler:
18
+ """
19
+ Handles OS signals for graceful shutdown.
20
+
21
+ Signals handled:
22
+ SIGTERM: Graceful shutdown (from Kubelet)
23
+ SIGINT: Graceful shutdown (Ctrl+C for local testing)
24
+ SIGUSR1: Trigger state snapshot (for debugging)
25
+ """
26
+
27
+ def __init__(self):
28
+ self._shutdown_callback: Callable[[], Awaitable[None]] | None = None
29
+ self._snapshot_callback: Callable[[], Awaitable[None]] | None = None
30
+ self._loop: asyncio.AbstractEventLoop | None = None
31
+ self._shutdown_triggered = False
32
+
33
+ def setup(
34
+ self,
35
+ shutdown_callback: Callable[[], Awaitable[None]],
36
+ snapshot_callback: Callable[[], Awaitable[None]] | None = None,
37
+ ) -> None:
38
+ """
39
+ Setup signal handlers.
40
+
41
+ Args:
42
+ shutdown_callback: Async callback for graceful shutdown
43
+ snapshot_callback: Optional async callback for state snapshot
44
+ """
45
+ self._shutdown_callback = shutdown_callback
46
+ self._snapshot_callback = snapshot_callback
47
+ self._loop = asyncio.get_event_loop()
48
+
49
+ # Register signal handlers
50
+ if sys.platform != "win32":
51
+ # Unix signals
52
+ self._loop.add_signal_handler(
53
+ signal.SIGTERM,
54
+ self._handle_shutdown_signal,
55
+ "SIGTERM",
56
+ )
57
+ self._loop.add_signal_handler(
58
+ signal.SIGINT,
59
+ self._handle_shutdown_signal,
60
+ "SIGINT",
61
+ )
62
+ self._loop.add_signal_handler(
63
+ signal.SIGUSR1,
64
+ self._handle_snapshot_signal,
65
+ )
66
+ logger.debug("Signal handlers registered (Unix)")
67
+ else:
68
+ # Windows - limited signal support
69
+ signal.signal(signal.SIGTERM, self._handle_shutdown_signal_sync)
70
+ signal.signal(signal.SIGINT, self._handle_shutdown_signal_sync)
71
+ logger.debug("Signal handlers registered (Windows)")
72
+
73
+ def _handle_shutdown_signal(self, sig_name: str) -> None:
74
+ """Handle SIGTERM/SIGINT asynchronously."""
75
+ if self._shutdown_triggered:
76
+ logger.warning(f"Received {sig_name} but shutdown already in progress")
77
+ return
78
+
79
+ self._shutdown_triggered = True
80
+ logger.info(f"Received {sig_name}, initiating graceful shutdown")
81
+
82
+ if self._shutdown_callback and self._loop:
83
+ asyncio.ensure_future(
84
+ self._shutdown_callback(),
85
+ loop=self._loop,
86
+ )
87
+
88
+ def _handle_shutdown_signal_sync(self, signum: int, frame) -> None:
89
+ """Handle signal synchronously (Windows compatibility)."""
90
+ sig_name = signal.Signals(signum).name
91
+ self._handle_shutdown_signal(sig_name)
92
+
93
+ def _handle_snapshot_signal(self) -> None:
94
+ """Handle SIGUSR1 for debug state snapshot."""
95
+ logger.info("Received SIGUSR1, triggering state snapshot")
96
+
97
+ if self._snapshot_callback and self._loop:
98
+ asyncio.ensure_future(
99
+ self._snapshot_callback(),
100
+ loop=self._loop,
101
+ )
102
+
103
+ def remove_handlers(self) -> None:
104
+ """Remove signal handlers during shutdown."""
105
+ if self._loop and sys.platform != "win32":
106
+ try:
107
+ self._loop.remove_signal_handler(signal.SIGTERM)
108
+ self._loop.remove_signal_handler(signal.SIGINT)
109
+ self._loop.remove_signal_handler(signal.SIGUSR1)
110
+ logger.debug("Signal handlers removed")
111
+ except (ValueError, RuntimeError):
112
+ # Handler not registered or loop closed
113
+ pass
114
+
115
+ @property
116
+ def shutdown_triggered(self) -> bool:
117
+ """Check if shutdown has been triggered."""
118
+ return self._shutdown_triggered
119
+
120
+ def reset(self) -> None:
121
+ """Reset shutdown state (for testing)."""
122
+ self._shutdown_triggered = False