kryten-robot 0.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,383 @@
1
+ """Graceful shutdown coordination for Kryten.
2
+
3
+ This module provides the ShutdownHandler that orchestrates clean termination
4
+ of all Kryten components in the correct order: publisher → connector → NATS → logging.
5
+
6
+ The handler enforces timeouts, tracks shutdown state, handles idempotent shutdown
7
+ requests, and returns metrics about the shutdown operation.
8
+
9
+ Examples:
10
+ Basic shutdown:
11
+ >>> handler = ShutdownHandler(publisher, connector, nats, logger)
12
+ >>> result = await handler.shutdown()
13
+ >>> if result.clean_exit:
14
+ ... print("Clean shutdown complete")
15
+
16
+ With timeout:
17
+ >>> try:
18
+ ... result = await asyncio.wait_for(handler.shutdown(), timeout=30.0)
19
+ ... except asyncio.TimeoutError:
20
+ ... logger.critical("Shutdown timeout - forcing exit")
21
+
22
+ As async context manager:
23
+ >>> async with ShutdownHandler(publisher, connector, nats, logger) as handler:
24
+ ... # Application runs
25
+ ... pass
26
+ >>> # Automatic shutdown on exit
27
+
28
+ Note:
29
+ Shutdown is idempotent - calling shutdown() multiple times is safe.
30
+ The first call executes the shutdown sequence, subsequent calls wait
31
+ for the same result.
32
+ """
33
+
34
+ import asyncio
35
+ import logging
36
+ import time
37
+ from dataclasses import dataclass, field
38
+ from enum import Enum
39
+
40
+ from .cytube_connector import CytubeConnector
41
+ from .event_publisher import EventPublisher
42
+ from .nats_client import NatsClient
43
+
44
+
45
+ class ShutdownPhase(Enum):
46
+ """Shutdown execution phases.
47
+
48
+ Tracks the progress of the shutdown sequence for logging and debugging.
49
+ """
50
+ IDLE = "idle"
51
+ INITIATED = "initiated"
52
+ DRAINING = "draining"
53
+ DISCONNECTING = "disconnecting"
54
+ FINALIZING = "finalizing"
55
+ COMPLETE = "complete"
56
+
57
+
58
+ @dataclass
59
+ class ShutdownResult:
60
+ """Result of shutdown operation with metrics.
61
+
62
+ Provides visibility into the shutdown process for operational monitoring
63
+ and debugging.
64
+
65
+ Attributes:
66
+ clean_exit: True if all components shut down gracefully within timeout.
67
+ duration: Total time spent in shutdown (seconds).
68
+ events_processed: Number of events processed before shutdown.
69
+ errors: List of error messages encountered during shutdown.
70
+ phase_timings: Time spent in each shutdown phase (seconds).
71
+
72
+ Examples:
73
+ >>> result = await handler.shutdown()
74
+ >>> print(f"Clean: {result.clean_exit}, Duration: {result.duration:.2f}s")
75
+ >>> if result.errors:
76
+ ... print(f"Errors: {result.errors}")
77
+ """
78
+ clean_exit: bool
79
+ duration: float
80
+ events_processed: int
81
+ errors: list[str] = field(default_factory=list)
82
+ phase_timings: dict = field(default_factory=dict)
83
+
84
+
85
+ class ShutdownHandler:
86
+ """Coordinates graceful shutdown of Kryten components.
87
+
88
+ Ensures clean termination by stopping components in reverse startup order:
89
+ 1. EventPublisher - Stop accepting new events, complete current batch
90
+ 2. CytubeConnector - Disconnect from CyTube server
91
+ 3. NatsClient - Drain pending messages, close connection
92
+ 4. Logging - Flush all log handlers
93
+
94
+ The handler enforces timeouts at both component and total shutdown level,
95
+ tracks shutdown state, and provides idempotent shutdown semantics.
96
+
97
+ Args:
98
+ publisher: EventPublisher instance to stop first.
99
+ connector: CytubeConnector instance to disconnect second.
100
+ nats_client: NatsClient instance to drain and disconnect third.
101
+ logger: Logger instance for shutdown progress tracking.
102
+ timeout: Total shutdown timeout in seconds (default: 30.0).
103
+ component_timeout: Individual component timeout in seconds (default: 10.0).
104
+
105
+ Examples:
106
+ >>> handler = ShutdownHandler(publisher, connector, nats, logger)
107
+ >>> result = await handler.shutdown()
108
+ >>> if not result.clean_exit:
109
+ ... logger.error("Forced shutdown", extra={"errors": result.errors})
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ publisher: EventPublisher,
115
+ connector: CytubeConnector,
116
+ nats_client: NatsClient,
117
+ logger: logging.Logger,
118
+ timeout: float = 30.0,
119
+ component_timeout: float = 10.0,
120
+ ):
121
+ """Initialize shutdown handler with components and timeouts."""
122
+ self._publisher = publisher
123
+ self._connector = connector
124
+ self._nats_client = nats_client
125
+ self._logger = logger
126
+ self._timeout = timeout
127
+ self._component_timeout = component_timeout
128
+
129
+ self._phase = ShutdownPhase.IDLE
130
+ self._shutdown_task: asyncio.Task | None = None
131
+ self._shutdown_result: ShutdownResult | None = None
132
+ self._shutdown_lock = asyncio.Lock()
133
+
134
+ @property
135
+ def is_shutting_down(self) -> bool:
136
+ """Check if shutdown is currently in progress.
137
+
138
+ Returns:
139
+ True if shutdown has been initiated but not completed.
140
+
141
+ Examples:
142
+ >>> handler.is_shutting_down
143
+ False
144
+ >>> asyncio.create_task(handler.shutdown())
145
+ >>> handler.is_shutting_down
146
+ True
147
+ """
148
+ return self._phase not in (ShutdownPhase.IDLE, ShutdownPhase.COMPLETE)
149
+
150
+ async def shutdown(self) -> ShutdownResult:
151
+ """Execute graceful shutdown sequence.
152
+
153
+ Stops all components in reverse startup order with timeout enforcement.
154
+ This method is idempotent - concurrent calls will wait for the same
155
+ shutdown operation to complete and return the same result.
156
+
157
+ Returns:
158
+ ShutdownResult with metrics and status.
159
+
160
+ Raises:
161
+ asyncio.TimeoutError: If total shutdown exceeds configured timeout.
162
+
163
+ Examples:
164
+ >>> result = await handler.shutdown()
165
+ >>> assert result.clean_exit
166
+ >>> assert result.duration < 30.0
167
+ """
168
+ # Idempotent shutdown - if already shutting down, wait for result
169
+ async with self._shutdown_lock:
170
+ if self._shutdown_result is not None:
171
+ self._logger.debug("Shutdown already complete, returning cached result")
172
+ return self._shutdown_result
173
+
174
+ if self._shutdown_task is not None:
175
+ self._logger.debug("Shutdown in progress, waiting for completion")
176
+ return await self._shutdown_task
177
+
178
+ # First shutdown request - create task
179
+ self._shutdown_task = asyncio.create_task(self._execute_shutdown())
180
+
181
+ try:
182
+ result = await asyncio.wait_for(self._shutdown_task, timeout=self._timeout)
183
+ self._shutdown_result = result
184
+ return result
185
+ except TimeoutError:
186
+ self._logger.critical(
187
+ "Shutdown timeout exceeded - forcing termination",
188
+ extra={"timeout": self._timeout, "phase": self._phase.value}
189
+ )
190
+ result = ShutdownResult(
191
+ clean_exit=False,
192
+ duration=self._timeout,
193
+ events_processed=self._publisher.stats.get("events_published", 0),
194
+ errors=[f"Shutdown timeout exceeded ({self._timeout}s)"],
195
+ )
196
+ self._shutdown_result = result
197
+ return result
198
+
199
+ async def _execute_shutdown(self) -> ShutdownResult:
200
+ """Execute the actual shutdown sequence.
201
+
202
+ Internal method that performs the shutdown steps. Called by shutdown()
203
+ to ensure only one execution happens even with concurrent calls.
204
+ """
205
+ start_time = time.time()
206
+ errors: list[str] = []
207
+ phase_timings: dict = {}
208
+
209
+ try:
210
+ # Phase 1: Initiate shutdown
211
+ self._phase = ShutdownPhase.INITIATED
212
+ self._logger.info("Graceful shutdown initiated")
213
+ phase_start = time.time()
214
+
215
+ events_processed = self._publisher.stats.get("events_published", 0)
216
+
217
+ phase_timings["initiated"] = time.time() - phase_start
218
+
219
+ # Phase 2: Stop publisher (drain in-flight events)
220
+ self._phase = ShutdownPhase.DRAINING
221
+ self._logger.info("Stopping event publisher")
222
+ phase_start = time.time()
223
+
224
+ try:
225
+ await asyncio.wait_for(
226
+ self._publisher.stop(),
227
+ timeout=self._component_timeout
228
+ )
229
+ self._logger.info("Event publisher stopped cleanly")
230
+ except TimeoutError:
231
+ error = f"Publisher stop timeout ({self._component_timeout}s)"
232
+ self._logger.warning(error)
233
+ errors.append(error)
234
+ except Exception as e:
235
+ error = f"Publisher stop error: {e}"
236
+ self._logger.error(error, exc_info=True)
237
+ errors.append(error)
238
+
239
+ phase_timings["draining"] = time.time() - phase_start
240
+
241
+ # Phase 3: Disconnect connector and NATS
242
+ self._phase = ShutdownPhase.DISCONNECTING
243
+ phase_start = time.time()
244
+
245
+ # Disconnect CyTube connector
246
+ self._logger.info("Disconnecting CyTube connector")
247
+ try:
248
+ await asyncio.wait_for(
249
+ self._connector.disconnect(),
250
+ timeout=self._component_timeout
251
+ )
252
+ self._logger.info("CyTube connector disconnected cleanly")
253
+ except TimeoutError:
254
+ error = f"Connector disconnect timeout ({self._component_timeout}s)"
255
+ self._logger.warning(error)
256
+ errors.append(error)
257
+ except Exception as e:
258
+ error = f"Connector disconnect error: {e}"
259
+ self._logger.error(error, exc_info=True)
260
+ errors.append(error)
261
+
262
+ # Disconnect NATS (includes drain)
263
+ self._logger.info("Disconnecting NATS client")
264
+ try:
265
+ await asyncio.wait_for(
266
+ self._nats_client.disconnect(),
267
+ timeout=self._component_timeout
268
+ )
269
+ self._logger.info("NATS client disconnected cleanly")
270
+ except TimeoutError:
271
+ error = f"NATS disconnect timeout ({self._component_timeout}s)"
272
+ self._logger.warning(error)
273
+ errors.append(error)
274
+ except Exception as e:
275
+ error = f"NATS disconnect error: {e}"
276
+ self._logger.error(error, exc_info=True)
277
+ errors.append(error)
278
+
279
+ phase_timings["disconnecting"] = time.time() - phase_start
280
+
281
+ # Phase 4: Flush logging
282
+ self._phase = ShutdownPhase.FINALIZING
283
+ self._logger.info("Flushing log handlers")
284
+ phase_start = time.time()
285
+
286
+ flush_errors = self._flush_logging()
287
+ if flush_errors:
288
+ for flush_error in flush_errors:
289
+ errors.append(flush_error)
290
+ self._logger.error(f"Log flush errors: {flush_errors}")
291
+ else:
292
+ self._logger.info("Log handlers flushed cleanly")
293
+
294
+ phase_timings["finalizing"] = time.time() - phase_start
295
+
296
+ # Phase 5: Complete
297
+ self._phase = ShutdownPhase.COMPLETE
298
+ duration = time.time() - start_time
299
+ clean_exit = len(errors) == 0
300
+
301
+ self._logger.info(
302
+ "Graceful shutdown complete",
303
+ extra={
304
+ "clean_exit": clean_exit,
305
+ "duration": duration,
306
+ "events_processed": events_processed,
307
+ "error_count": len(errors),
308
+ }
309
+ )
310
+
311
+ return ShutdownResult(
312
+ clean_exit=clean_exit,
313
+ duration=duration,
314
+ events_processed=events_processed,
315
+ errors=errors,
316
+ phase_timings=phase_timings,
317
+ )
318
+
319
+ except Exception as e:
320
+ # Unexpected error during shutdown
321
+ duration = time.time() - start_time
322
+ error = f"Unexpected shutdown error: {e}"
323
+ self._logger.critical(error, exc_info=True)
324
+ errors.append(error)
325
+
326
+ return ShutdownResult(
327
+ clean_exit=False,
328
+ duration=duration,
329
+ events_processed=self._publisher.stats.get("events_published", 0),
330
+ errors=errors,
331
+ phase_timings=phase_timings,
332
+ )
333
+
334
+ def _flush_logging(self) -> list[str]:
335
+ """Flush and close all logging handlers.
336
+
337
+ Ensures all buffered log messages are written before process exit.
338
+ Safe to call even if handlers are already closed.
339
+
340
+ Returns:
341
+ List of error messages if any handlers fail to flush.
342
+ """
343
+ root_logger = logging.getLogger()
344
+ errors = []
345
+
346
+ for handler in root_logger.handlers[:]: # Copy list to avoid modification during iteration
347
+ try:
348
+ handler.flush()
349
+ # Note: We don't close handlers here because they might be used
350
+ # by other loggers or the logging module might need them for
351
+ # final cleanup messages. Let Python's shutdown handle that.
352
+ except Exception as e:
353
+ # Log to stderr if logging system is broken
354
+ error_msg = f"Log flush error: {e}"
355
+ print(f"Error flushing log handler {handler}: {e}", flush=True)
356
+ errors.append(error_msg)
357
+
358
+ return errors
359
+
360
+ async def __aenter__(self) -> "ShutdownHandler":
361
+ """Enter async context manager.
362
+
363
+ Returns:
364
+ Self for use in async with statement.
365
+
366
+ Examples:
367
+ >>> async with ShutdownHandler(pub, conn, nats, log) as handler:
368
+ ... # Application runs
369
+ ... pass
370
+ """
371
+ return self
372
+
373
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
374
+ """Exit async context manager with automatic shutdown.
375
+
376
+ Performs graceful shutdown on context exit. Does not suppress exceptions.
377
+
378
+ Returns:
379
+ False to propagate any exception.
380
+ """
381
+ if not self.is_shutting_down and self._shutdown_result is None:
382
+ await self.shutdown()
383
+ return False