kryten-robot 0.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kryten/CONFIG.md +504 -0
- kryten/__init__.py +127 -0
- kryten/__main__.py +882 -0
- kryten/application_state.py +98 -0
- kryten/audit_logger.py +237 -0
- kryten/command_subscriber.py +341 -0
- kryten/config.example.json +35 -0
- kryten/config.py +510 -0
- kryten/connection_watchdog.py +209 -0
- kryten/correlation.py +241 -0
- kryten/cytube_connector.py +754 -0
- kryten/cytube_event_sender.py +1476 -0
- kryten/errors.py +161 -0
- kryten/event_publisher.py +416 -0
- kryten/health_monitor.py +482 -0
- kryten/lifecycle_events.py +274 -0
- kryten/logging_config.py +314 -0
- kryten/nats_client.py +468 -0
- kryten/raw_event.py +165 -0
- kryten/service_registry.py +371 -0
- kryten/shutdown_handler.py +383 -0
- kryten/socket_io.py +903 -0
- kryten/state_manager.py +711 -0
- kryten/state_query_handler.py +698 -0
- kryten/state_updater.py +314 -0
- kryten/stats_tracker.py +108 -0
- kryten/subject_builder.py +330 -0
- kryten_robot-0.6.9.dist-info/METADATA +469 -0
- kryten_robot-0.6.9.dist-info/RECORD +32 -0
- kryten_robot-0.6.9.dist-info/WHEEL +4 -0
- kryten_robot-0.6.9.dist-info/entry_points.txt +3 -0
- kryten_robot-0.6.9.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Graceful shutdown coordination for Kryten.
|
|
2
|
+
|
|
3
|
+
This module provides the ShutdownHandler that orchestrates clean termination
|
|
4
|
+
of all Kryten components in the correct order: publisher → connector → NATS → logging.
|
|
5
|
+
|
|
6
|
+
The handler enforces timeouts, tracks shutdown state, handles idempotent shutdown
|
|
7
|
+
requests, and returns metrics about the shutdown operation.
|
|
8
|
+
|
|
9
|
+
Examples:
|
|
10
|
+
Basic shutdown:
|
|
11
|
+
>>> handler = ShutdownHandler(publisher, connector, nats, logger)
|
|
12
|
+
>>> result = await handler.shutdown()
|
|
13
|
+
>>> if result.clean_exit:
|
|
14
|
+
... print("Clean shutdown complete")
|
|
15
|
+
|
|
16
|
+
With timeout:
|
|
17
|
+
>>> try:
|
|
18
|
+
... result = await asyncio.wait_for(handler.shutdown(), timeout=30.0)
|
|
19
|
+
... except asyncio.TimeoutError:
|
|
20
|
+
... logger.critical("Shutdown timeout - forcing exit")
|
|
21
|
+
|
|
22
|
+
As async context manager:
|
|
23
|
+
>>> async with ShutdownHandler(publisher, connector, nats, logger) as handler:
|
|
24
|
+
... # Application runs
|
|
25
|
+
... pass
|
|
26
|
+
>>> # Automatic shutdown on exit
|
|
27
|
+
|
|
28
|
+
Note:
|
|
29
|
+
Shutdown is idempotent - calling shutdown() multiple times is safe.
|
|
30
|
+
The first call executes the shutdown sequence, subsequent calls wait
|
|
31
|
+
for the same result.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
import asyncio
|
|
35
|
+
import logging
|
|
36
|
+
import time
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from enum import Enum
|
|
39
|
+
|
|
40
|
+
from .cytube_connector import CytubeConnector
|
|
41
|
+
from .event_publisher import EventPublisher
|
|
42
|
+
from .nats_client import NatsClient
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ShutdownPhase(Enum):
|
|
46
|
+
"""Shutdown execution phases.
|
|
47
|
+
|
|
48
|
+
Tracks the progress of the shutdown sequence for logging and debugging.
|
|
49
|
+
"""
|
|
50
|
+
IDLE = "idle"
|
|
51
|
+
INITIATED = "initiated"
|
|
52
|
+
DRAINING = "draining"
|
|
53
|
+
DISCONNECTING = "disconnecting"
|
|
54
|
+
FINALIZING = "finalizing"
|
|
55
|
+
COMPLETE = "complete"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ShutdownResult:
|
|
60
|
+
"""Result of shutdown operation with metrics.
|
|
61
|
+
|
|
62
|
+
Provides visibility into the shutdown process for operational monitoring
|
|
63
|
+
and debugging.
|
|
64
|
+
|
|
65
|
+
Attributes:
|
|
66
|
+
clean_exit: True if all components shut down gracefully within timeout.
|
|
67
|
+
duration: Total time spent in shutdown (seconds).
|
|
68
|
+
events_processed: Number of events processed before shutdown.
|
|
69
|
+
errors: List of error messages encountered during shutdown.
|
|
70
|
+
phase_timings: Time spent in each shutdown phase (seconds).
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
>>> result = await handler.shutdown()
|
|
74
|
+
>>> print(f"Clean: {result.clean_exit}, Duration: {result.duration:.2f}s")
|
|
75
|
+
>>> if result.errors:
|
|
76
|
+
... print(f"Errors: {result.errors}")
|
|
77
|
+
"""
|
|
78
|
+
clean_exit: bool
|
|
79
|
+
duration: float
|
|
80
|
+
events_processed: int
|
|
81
|
+
errors: list[str] = field(default_factory=list)
|
|
82
|
+
phase_timings: dict = field(default_factory=dict)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ShutdownHandler:
|
|
86
|
+
"""Coordinates graceful shutdown of Kryten components.
|
|
87
|
+
|
|
88
|
+
Ensures clean termination by stopping components in reverse startup order:
|
|
89
|
+
1. EventPublisher - Stop accepting new events, complete current batch
|
|
90
|
+
2. CytubeConnector - Disconnect from CyTube server
|
|
91
|
+
3. NatsClient - Drain pending messages, close connection
|
|
92
|
+
4. Logging - Flush all log handlers
|
|
93
|
+
|
|
94
|
+
The handler enforces timeouts at both component and total shutdown level,
|
|
95
|
+
tracks shutdown state, and provides idempotent shutdown semantics.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
publisher: EventPublisher instance to stop first.
|
|
99
|
+
connector: CytubeConnector instance to disconnect second.
|
|
100
|
+
nats_client: NatsClient instance to drain and disconnect third.
|
|
101
|
+
logger: Logger instance for shutdown progress tracking.
|
|
102
|
+
timeout: Total shutdown timeout in seconds (default: 30.0).
|
|
103
|
+
component_timeout: Individual component timeout in seconds (default: 10.0).
|
|
104
|
+
|
|
105
|
+
Examples:
|
|
106
|
+
>>> handler = ShutdownHandler(publisher, connector, nats, logger)
|
|
107
|
+
>>> result = await handler.shutdown()
|
|
108
|
+
>>> if not result.clean_exit:
|
|
109
|
+
... logger.error("Forced shutdown", extra={"errors": result.errors})
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
publisher: EventPublisher,
|
|
115
|
+
connector: CytubeConnector,
|
|
116
|
+
nats_client: NatsClient,
|
|
117
|
+
logger: logging.Logger,
|
|
118
|
+
timeout: float = 30.0,
|
|
119
|
+
component_timeout: float = 10.0,
|
|
120
|
+
):
|
|
121
|
+
"""Initialize shutdown handler with components and timeouts."""
|
|
122
|
+
self._publisher = publisher
|
|
123
|
+
self._connector = connector
|
|
124
|
+
self._nats_client = nats_client
|
|
125
|
+
self._logger = logger
|
|
126
|
+
self._timeout = timeout
|
|
127
|
+
self._component_timeout = component_timeout
|
|
128
|
+
|
|
129
|
+
self._phase = ShutdownPhase.IDLE
|
|
130
|
+
self._shutdown_task: asyncio.Task | None = None
|
|
131
|
+
self._shutdown_result: ShutdownResult | None = None
|
|
132
|
+
self._shutdown_lock = asyncio.Lock()
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def is_shutting_down(self) -> bool:
|
|
136
|
+
"""Check if shutdown is currently in progress.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
True if shutdown has been initiated but not completed.
|
|
140
|
+
|
|
141
|
+
Examples:
|
|
142
|
+
>>> handler.is_shutting_down
|
|
143
|
+
False
|
|
144
|
+
>>> asyncio.create_task(handler.shutdown())
|
|
145
|
+
>>> handler.is_shutting_down
|
|
146
|
+
True
|
|
147
|
+
"""
|
|
148
|
+
return self._phase not in (ShutdownPhase.IDLE, ShutdownPhase.COMPLETE)
|
|
149
|
+
|
|
150
|
+
async def shutdown(self) -> ShutdownResult:
|
|
151
|
+
"""Execute graceful shutdown sequence.
|
|
152
|
+
|
|
153
|
+
Stops all components in reverse startup order with timeout enforcement.
|
|
154
|
+
This method is idempotent - concurrent calls will wait for the same
|
|
155
|
+
shutdown operation to complete and return the same result.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
ShutdownResult with metrics and status.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
asyncio.TimeoutError: If total shutdown exceeds configured timeout.
|
|
162
|
+
|
|
163
|
+
Examples:
|
|
164
|
+
>>> result = await handler.shutdown()
|
|
165
|
+
>>> assert result.clean_exit
|
|
166
|
+
>>> assert result.duration < 30.0
|
|
167
|
+
"""
|
|
168
|
+
# Idempotent shutdown - if already shutting down, wait for result
|
|
169
|
+
async with self._shutdown_lock:
|
|
170
|
+
if self._shutdown_result is not None:
|
|
171
|
+
self._logger.debug("Shutdown already complete, returning cached result")
|
|
172
|
+
return self._shutdown_result
|
|
173
|
+
|
|
174
|
+
if self._shutdown_task is not None:
|
|
175
|
+
self._logger.debug("Shutdown in progress, waiting for completion")
|
|
176
|
+
return await self._shutdown_task
|
|
177
|
+
|
|
178
|
+
# First shutdown request - create task
|
|
179
|
+
self._shutdown_task = asyncio.create_task(self._execute_shutdown())
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
result = await asyncio.wait_for(self._shutdown_task, timeout=self._timeout)
|
|
183
|
+
self._shutdown_result = result
|
|
184
|
+
return result
|
|
185
|
+
except TimeoutError:
|
|
186
|
+
self._logger.critical(
|
|
187
|
+
"Shutdown timeout exceeded - forcing termination",
|
|
188
|
+
extra={"timeout": self._timeout, "phase": self._phase.value}
|
|
189
|
+
)
|
|
190
|
+
result = ShutdownResult(
|
|
191
|
+
clean_exit=False,
|
|
192
|
+
duration=self._timeout,
|
|
193
|
+
events_processed=self._publisher.stats.get("events_published", 0),
|
|
194
|
+
errors=[f"Shutdown timeout exceeded ({self._timeout}s)"],
|
|
195
|
+
)
|
|
196
|
+
self._shutdown_result = result
|
|
197
|
+
return result
|
|
198
|
+
|
|
199
|
+
async def _execute_shutdown(self) -> ShutdownResult:
|
|
200
|
+
"""Execute the actual shutdown sequence.
|
|
201
|
+
|
|
202
|
+
Internal method that performs the shutdown steps. Called by shutdown()
|
|
203
|
+
to ensure only one execution happens even with concurrent calls.
|
|
204
|
+
"""
|
|
205
|
+
start_time = time.time()
|
|
206
|
+
errors: list[str] = []
|
|
207
|
+
phase_timings: dict = {}
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
# Phase 1: Initiate shutdown
|
|
211
|
+
self._phase = ShutdownPhase.INITIATED
|
|
212
|
+
self._logger.info("Graceful shutdown initiated")
|
|
213
|
+
phase_start = time.time()
|
|
214
|
+
|
|
215
|
+
events_processed = self._publisher.stats.get("events_published", 0)
|
|
216
|
+
|
|
217
|
+
phase_timings["initiated"] = time.time() - phase_start
|
|
218
|
+
|
|
219
|
+
# Phase 2: Stop publisher (drain in-flight events)
|
|
220
|
+
self._phase = ShutdownPhase.DRAINING
|
|
221
|
+
self._logger.info("Stopping event publisher")
|
|
222
|
+
phase_start = time.time()
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
await asyncio.wait_for(
|
|
226
|
+
self._publisher.stop(),
|
|
227
|
+
timeout=self._component_timeout
|
|
228
|
+
)
|
|
229
|
+
self._logger.info("Event publisher stopped cleanly")
|
|
230
|
+
except TimeoutError:
|
|
231
|
+
error = f"Publisher stop timeout ({self._component_timeout}s)"
|
|
232
|
+
self._logger.warning(error)
|
|
233
|
+
errors.append(error)
|
|
234
|
+
except Exception as e:
|
|
235
|
+
error = f"Publisher stop error: {e}"
|
|
236
|
+
self._logger.error(error, exc_info=True)
|
|
237
|
+
errors.append(error)
|
|
238
|
+
|
|
239
|
+
phase_timings["draining"] = time.time() - phase_start
|
|
240
|
+
|
|
241
|
+
# Phase 3: Disconnect connector and NATS
|
|
242
|
+
self._phase = ShutdownPhase.DISCONNECTING
|
|
243
|
+
phase_start = time.time()
|
|
244
|
+
|
|
245
|
+
# Disconnect CyTube connector
|
|
246
|
+
self._logger.info("Disconnecting CyTube connector")
|
|
247
|
+
try:
|
|
248
|
+
await asyncio.wait_for(
|
|
249
|
+
self._connector.disconnect(),
|
|
250
|
+
timeout=self._component_timeout
|
|
251
|
+
)
|
|
252
|
+
self._logger.info("CyTube connector disconnected cleanly")
|
|
253
|
+
except TimeoutError:
|
|
254
|
+
error = f"Connector disconnect timeout ({self._component_timeout}s)"
|
|
255
|
+
self._logger.warning(error)
|
|
256
|
+
errors.append(error)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
error = f"Connector disconnect error: {e}"
|
|
259
|
+
self._logger.error(error, exc_info=True)
|
|
260
|
+
errors.append(error)
|
|
261
|
+
|
|
262
|
+
# Disconnect NATS (includes drain)
|
|
263
|
+
self._logger.info("Disconnecting NATS client")
|
|
264
|
+
try:
|
|
265
|
+
await asyncio.wait_for(
|
|
266
|
+
self._nats_client.disconnect(),
|
|
267
|
+
timeout=self._component_timeout
|
|
268
|
+
)
|
|
269
|
+
self._logger.info("NATS client disconnected cleanly")
|
|
270
|
+
except TimeoutError:
|
|
271
|
+
error = f"NATS disconnect timeout ({self._component_timeout}s)"
|
|
272
|
+
self._logger.warning(error)
|
|
273
|
+
errors.append(error)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
error = f"NATS disconnect error: {e}"
|
|
276
|
+
self._logger.error(error, exc_info=True)
|
|
277
|
+
errors.append(error)
|
|
278
|
+
|
|
279
|
+
phase_timings["disconnecting"] = time.time() - phase_start
|
|
280
|
+
|
|
281
|
+
# Phase 4: Flush logging
|
|
282
|
+
self._phase = ShutdownPhase.FINALIZING
|
|
283
|
+
self._logger.info("Flushing log handlers")
|
|
284
|
+
phase_start = time.time()
|
|
285
|
+
|
|
286
|
+
flush_errors = self._flush_logging()
|
|
287
|
+
if flush_errors:
|
|
288
|
+
for flush_error in flush_errors:
|
|
289
|
+
errors.append(flush_error)
|
|
290
|
+
self._logger.error(f"Log flush errors: {flush_errors}")
|
|
291
|
+
else:
|
|
292
|
+
self._logger.info("Log handlers flushed cleanly")
|
|
293
|
+
|
|
294
|
+
phase_timings["finalizing"] = time.time() - phase_start
|
|
295
|
+
|
|
296
|
+
# Phase 5: Complete
|
|
297
|
+
self._phase = ShutdownPhase.COMPLETE
|
|
298
|
+
duration = time.time() - start_time
|
|
299
|
+
clean_exit = len(errors) == 0
|
|
300
|
+
|
|
301
|
+
self._logger.info(
|
|
302
|
+
"Graceful shutdown complete",
|
|
303
|
+
extra={
|
|
304
|
+
"clean_exit": clean_exit,
|
|
305
|
+
"duration": duration,
|
|
306
|
+
"events_processed": events_processed,
|
|
307
|
+
"error_count": len(errors),
|
|
308
|
+
}
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
return ShutdownResult(
|
|
312
|
+
clean_exit=clean_exit,
|
|
313
|
+
duration=duration,
|
|
314
|
+
events_processed=events_processed,
|
|
315
|
+
errors=errors,
|
|
316
|
+
phase_timings=phase_timings,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
except Exception as e:
|
|
320
|
+
# Unexpected error during shutdown
|
|
321
|
+
duration = time.time() - start_time
|
|
322
|
+
error = f"Unexpected shutdown error: {e}"
|
|
323
|
+
self._logger.critical(error, exc_info=True)
|
|
324
|
+
errors.append(error)
|
|
325
|
+
|
|
326
|
+
return ShutdownResult(
|
|
327
|
+
clean_exit=False,
|
|
328
|
+
duration=duration,
|
|
329
|
+
events_processed=self._publisher.stats.get("events_published", 0),
|
|
330
|
+
errors=errors,
|
|
331
|
+
phase_timings=phase_timings,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
def _flush_logging(self) -> list[str]:
|
|
335
|
+
"""Flush and close all logging handlers.
|
|
336
|
+
|
|
337
|
+
Ensures all buffered log messages are written before process exit.
|
|
338
|
+
Safe to call even if handlers are already closed.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
List of error messages if any handlers fail to flush.
|
|
342
|
+
"""
|
|
343
|
+
root_logger = logging.getLogger()
|
|
344
|
+
errors = []
|
|
345
|
+
|
|
346
|
+
for handler in root_logger.handlers[:]: # Copy list to avoid modification during iteration
|
|
347
|
+
try:
|
|
348
|
+
handler.flush()
|
|
349
|
+
# Note: We don't close handlers here because they might be used
|
|
350
|
+
# by other loggers or the logging module might need them for
|
|
351
|
+
# final cleanup messages. Let Python's shutdown handle that.
|
|
352
|
+
except Exception as e:
|
|
353
|
+
# Log to stderr if logging system is broken
|
|
354
|
+
error_msg = f"Log flush error: {e}"
|
|
355
|
+
print(f"Error flushing log handler {handler}: {e}", flush=True)
|
|
356
|
+
errors.append(error_msg)
|
|
357
|
+
|
|
358
|
+
return errors
|
|
359
|
+
|
|
360
|
+
async def __aenter__(self) -> "ShutdownHandler":
|
|
361
|
+
"""Enter async context manager.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Self for use in async with statement.
|
|
365
|
+
|
|
366
|
+
Examples:
|
|
367
|
+
>>> async with ShutdownHandler(pub, conn, nats, log) as handler:
|
|
368
|
+
... # Application runs
|
|
369
|
+
... pass
|
|
370
|
+
"""
|
|
371
|
+
return self
|
|
372
|
+
|
|
373
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
|
|
374
|
+
"""Exit async context manager with automatic shutdown.
|
|
375
|
+
|
|
376
|
+
Performs graceful shutdown on context exit. Does not suppress exceptions.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
False to propagate any exception.
|
|
380
|
+
"""
|
|
381
|
+
if not self.is_shutting_down and self._shutdown_result is None:
|
|
382
|
+
await self.shutdown()
|
|
383
|
+
return False
|