kryten-robot 0.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kryten/CONFIG.md +504 -0
- kryten/__init__.py +127 -0
- kryten/__main__.py +882 -0
- kryten/application_state.py +98 -0
- kryten/audit_logger.py +237 -0
- kryten/command_subscriber.py +341 -0
- kryten/config.example.json +35 -0
- kryten/config.py +510 -0
- kryten/connection_watchdog.py +209 -0
- kryten/correlation.py +241 -0
- kryten/cytube_connector.py +754 -0
- kryten/cytube_event_sender.py +1476 -0
- kryten/errors.py +161 -0
- kryten/event_publisher.py +416 -0
- kryten/health_monitor.py +482 -0
- kryten/lifecycle_events.py +274 -0
- kryten/logging_config.py +314 -0
- kryten/nats_client.py +468 -0
- kryten/raw_event.py +165 -0
- kryten/service_registry.py +371 -0
- kryten/shutdown_handler.py +383 -0
- kryten/socket_io.py +903 -0
- kryten/state_manager.py +711 -0
- kryten/state_query_handler.py +698 -0
- kryten/state_updater.py +314 -0
- kryten/stats_tracker.py +108 -0
- kryten/subject_builder.py +330 -0
- kryten_robot-0.6.9.dist-info/METADATA +469 -0
- kryten_robot-0.6.9.dist-info/RECORD +32 -0
- kryten_robot-0.6.9.dist-info/WHEEL +4 -0
- kryten_robot-0.6.9.dist-info/entry_points.txt +3 -0
- kryten_robot-0.6.9.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
"""Service Registry - Track and monitor Kryten microservices.
|
|
2
|
+
|
|
3
|
+
This module provides service discovery and health monitoring for the Kryten
|
|
4
|
+
ecosystem. It subscribes to lifecycle events from all services and maintains
|
|
5
|
+
an inventory of active services with their heartbeat status.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import UTC, datetime
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from .nats_client import NatsClient
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ServiceInfo:
|
|
21
|
+
"""Information about a registered service.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
name: Service name (e.g., "userstats", "moderator")
|
|
25
|
+
version: Service version string
|
|
26
|
+
hostname: Hostname where service is running
|
|
27
|
+
first_seen: Timestamp when service was first discovered
|
|
28
|
+
last_heartbeat: Timestamp of most recent heartbeat
|
|
29
|
+
last_startup: Timestamp of most recent startup event
|
|
30
|
+
heartbeat_count: Total number of heartbeats received
|
|
31
|
+
metadata: Additional service-specific metadata
|
|
32
|
+
"""
|
|
33
|
+
name: str
|
|
34
|
+
version: str
|
|
35
|
+
hostname: str
|
|
36
|
+
first_seen: datetime
|
|
37
|
+
last_heartbeat: datetime
|
|
38
|
+
last_startup: datetime
|
|
39
|
+
heartbeat_count: int = 0
|
|
40
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def seconds_since_heartbeat(self) -> float:
|
|
44
|
+
"""Calculate seconds since last heartbeat."""
|
|
45
|
+
return (datetime.now(UTC) - self.last_heartbeat).total_seconds()
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_stale(self) -> bool:
|
|
49
|
+
"""Check if service appears offline (no heartbeat in 90 seconds)."""
|
|
50
|
+
return self.seconds_since_heartbeat > 90
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
|
53
|
+
"""Convert to dictionary for serialization."""
|
|
54
|
+
return {
|
|
55
|
+
"name": self.name,
|
|
56
|
+
"version": self.version,
|
|
57
|
+
"hostname": self.hostname,
|
|
58
|
+
"first_seen": self.first_seen.isoformat(),
|
|
59
|
+
"last_heartbeat": self.last_heartbeat.isoformat(),
|
|
60
|
+
"last_startup": self.last_startup.isoformat(),
|
|
61
|
+
"heartbeat_count": self.heartbeat_count,
|
|
62
|
+
"seconds_since_heartbeat": self.seconds_since_heartbeat,
|
|
63
|
+
"is_stale": self.is_stale,
|
|
64
|
+
"metadata": self.metadata,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ServiceRegistry:
|
|
69
|
+
"""Monitor and track Kryten microservices.
|
|
70
|
+
|
|
71
|
+
Subscribes to lifecycle events from all services and maintains a registry
|
|
72
|
+
of active services with their health status.
|
|
73
|
+
|
|
74
|
+
Subscriptions:
|
|
75
|
+
- kryten.lifecycle.*.startup - Service startup notifications
|
|
76
|
+
- kryten.lifecycle.*.heartbeat - Service heartbeat events
|
|
77
|
+
- kryten.lifecycle.*.shutdown - Service shutdown notifications
|
|
78
|
+
|
|
79
|
+
Attributes:
|
|
80
|
+
nats_client: NATS client for subscriptions
|
|
81
|
+
logger: Logger instance
|
|
82
|
+
services: Dictionary of registered services by name
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
>>> registry = ServiceRegistry(nats_client, logger)
|
|
86
|
+
>>> await registry.start()
|
|
87
|
+
>>> services = registry.get_active_services()
|
|
88
|
+
>>> await registry.stop()
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
nats_client: NatsClient,
|
|
94
|
+
logger: logging.Logger,
|
|
95
|
+
):
|
|
96
|
+
"""Initialize service registry.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
nats_client: NATS client for event subscriptions
|
|
100
|
+
logger: Logger for structured output
|
|
101
|
+
"""
|
|
102
|
+
self._nats = nats_client
|
|
103
|
+
self._logger = logger
|
|
104
|
+
self._running = False
|
|
105
|
+
|
|
106
|
+
# Service tracking
|
|
107
|
+
self._services: dict[str, ServiceInfo] = {}
|
|
108
|
+
self._lock = asyncio.Lock()
|
|
109
|
+
|
|
110
|
+
# Subscriptions
|
|
111
|
+
self._startup_sub = None
|
|
112
|
+
self._heartbeat_sub = None
|
|
113
|
+
self._shutdown_sub = None
|
|
114
|
+
|
|
115
|
+
# Callbacks for service events
|
|
116
|
+
self._on_service_registered: Callable[[ServiceInfo], None] | None = None
|
|
117
|
+
self._on_service_heartbeat: Callable[[ServiceInfo], None] | None = None
|
|
118
|
+
self._on_service_shutdown: Callable[[str], None] | None = None
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def is_running(self) -> bool:
|
|
122
|
+
"""Check if registry is running."""
|
|
123
|
+
return self._running
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def service_count(self) -> int:
|
|
127
|
+
"""Get count of registered services."""
|
|
128
|
+
return len(self._services)
|
|
129
|
+
|
|
130
|
+
def on_service_registered(self, callback: Callable[[ServiceInfo], None]) -> None:
|
|
131
|
+
"""Register callback for when new service is discovered.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
callback: Function to call with ServiceInfo when service registers
|
|
135
|
+
"""
|
|
136
|
+
self._on_service_registered = callback
|
|
137
|
+
|
|
138
|
+
def on_service_heartbeat(self, callback: Callable[[ServiceInfo], None]) -> None:
|
|
139
|
+
"""Register callback for service heartbeat events.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
callback: Function to call with ServiceInfo on each heartbeat
|
|
143
|
+
"""
|
|
144
|
+
self._on_service_heartbeat = callback
|
|
145
|
+
|
|
146
|
+
def on_service_shutdown(self, callback: Callable[[str], None]) -> None:
|
|
147
|
+
"""Register callback for service shutdown events.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
callback: Function to call with service name on shutdown
|
|
151
|
+
"""
|
|
152
|
+
self._on_service_shutdown = callback
|
|
153
|
+
|
|
154
|
+
async def start(self) -> None:
|
|
155
|
+
"""Start service registry and subscribe to lifecycle events."""
|
|
156
|
+
if self._running:
|
|
157
|
+
self._logger.warning("Service registry already running")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
self._running = True
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
# Subscribe to startup events from all services
|
|
164
|
+
self._startup_sub = await self._nats.subscribe_request_reply(
|
|
165
|
+
"kryten.lifecycle.*.startup",
|
|
166
|
+
callback=self._handle_startup
|
|
167
|
+
)
|
|
168
|
+
self._logger.info("Subscribed to kryten.lifecycle.*.startup")
|
|
169
|
+
|
|
170
|
+
# Subscribe to heartbeat events from all services
|
|
171
|
+
self._heartbeat_sub = await self._nats.subscribe_request_reply(
|
|
172
|
+
"kryten.lifecycle.*.heartbeat",
|
|
173
|
+
callback=self._handle_heartbeat
|
|
174
|
+
)
|
|
175
|
+
self._logger.info("Subscribed to kryten.lifecycle.*.heartbeat")
|
|
176
|
+
|
|
177
|
+
# Subscribe to shutdown events from all services
|
|
178
|
+
self._shutdown_sub = await self._nats.subscribe_request_reply(
|
|
179
|
+
"kryten.lifecycle.*.shutdown",
|
|
180
|
+
callback=self._handle_shutdown
|
|
181
|
+
)
|
|
182
|
+
self._logger.info("Subscribed to kryten.lifecycle.*.shutdown")
|
|
183
|
+
|
|
184
|
+
self._logger.info("Service registry started")
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
self._logger.error(f"Failed to start service registry: {e}", exc_info=True)
|
|
188
|
+
self._running = False
|
|
189
|
+
raise
|
|
190
|
+
|
|
191
|
+
async def stop(self) -> None:
|
|
192
|
+
"""Stop service registry and unsubscribe from events."""
|
|
193
|
+
if not self._running:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
self._running = False
|
|
197
|
+
|
|
198
|
+
# Unsubscribe from all events
|
|
199
|
+
for sub in [self._startup_sub, self._heartbeat_sub, self._shutdown_sub]:
|
|
200
|
+
if sub:
|
|
201
|
+
try:
|
|
202
|
+
await sub.unsubscribe()
|
|
203
|
+
except Exception as e:
|
|
204
|
+
self._logger.warning(f"Error unsubscribing: {e}")
|
|
205
|
+
|
|
206
|
+
self._startup_sub = None
|
|
207
|
+
self._heartbeat_sub = None
|
|
208
|
+
self._shutdown_sub = None
|
|
209
|
+
|
|
210
|
+
self._logger.info("Service registry stopped")
|
|
211
|
+
|
|
212
|
+
async def _handle_startup(self, msg) -> None:
|
|
213
|
+
"""Handle service startup event."""
|
|
214
|
+
try:
|
|
215
|
+
data = json.loads(msg.data.decode('utf-8'))
|
|
216
|
+
service_name = data.get("service")
|
|
217
|
+
|
|
218
|
+
if not service_name:
|
|
219
|
+
return
|
|
220
|
+
|
|
221
|
+
# Extract service information
|
|
222
|
+
version = data.get("version", "unknown")
|
|
223
|
+
hostname = data.get("hostname", "unknown")
|
|
224
|
+
timestamp = datetime.fromisoformat(data.get("timestamp", datetime.now(UTC).isoformat()))
|
|
225
|
+
|
|
226
|
+
async with self._lock:
|
|
227
|
+
is_new = service_name not in self._services
|
|
228
|
+
|
|
229
|
+
if is_new:
|
|
230
|
+
# New service discovered
|
|
231
|
+
service_info = ServiceInfo(
|
|
232
|
+
name=service_name,
|
|
233
|
+
version=version,
|
|
234
|
+
hostname=hostname,
|
|
235
|
+
first_seen=timestamp,
|
|
236
|
+
last_heartbeat=timestamp,
|
|
237
|
+
last_startup=timestamp,
|
|
238
|
+
metadata=data,
|
|
239
|
+
)
|
|
240
|
+
self._services[service_name] = service_info
|
|
241
|
+
self._logger.info(
|
|
242
|
+
f"Service registered: {service_name} v{version} on {hostname}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Trigger callback
|
|
246
|
+
if self._on_service_registered:
|
|
247
|
+
try:
|
|
248
|
+
self._on_service_registered(service_info)
|
|
249
|
+
except Exception as e:
|
|
250
|
+
self._logger.error(f"Error in service registered callback: {e}")
|
|
251
|
+
else:
|
|
252
|
+
# Service restarted
|
|
253
|
+
service_info = self._services[service_name]
|
|
254
|
+
service_info.version = version
|
|
255
|
+
service_info.hostname = hostname
|
|
256
|
+
service_info.last_startup = timestamp
|
|
257
|
+
service_info.last_heartbeat = timestamp
|
|
258
|
+
service_info.metadata = data
|
|
259
|
+
self._logger.info(
|
|
260
|
+
f"Service restarted: {service_name} v{version} on {hostname}"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
except json.JSONDecodeError as e:
|
|
264
|
+
self._logger.error(f"Invalid startup event JSON: {e}")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
self._logger.error(f"Error handling startup event: {e}", exc_info=True)
|
|
267
|
+
|
|
268
|
+
async def _handle_heartbeat(self, msg) -> None:
|
|
269
|
+
"""Handle service heartbeat event."""
|
|
270
|
+
try:
|
|
271
|
+
data = json.loads(msg.data.decode('utf-8'))
|
|
272
|
+
service_name = data.get("service")
|
|
273
|
+
|
|
274
|
+
if not service_name:
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
timestamp = datetime.fromisoformat(data.get("timestamp", datetime.now(UTC).isoformat()))
|
|
278
|
+
|
|
279
|
+
async with self._lock:
|
|
280
|
+
if service_name in self._services:
|
|
281
|
+
service_info = self._services[service_name]
|
|
282
|
+
service_info.last_heartbeat = timestamp
|
|
283
|
+
service_info.heartbeat_count += 1
|
|
284
|
+
|
|
285
|
+
self._logger.debug(
|
|
286
|
+
f"Heartbeat from {service_name} "
|
|
287
|
+
f"(count: {service_info.heartbeat_count})"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Trigger callback
|
|
291
|
+
if self._on_service_heartbeat:
|
|
292
|
+
try:
|
|
293
|
+
self._on_service_heartbeat(service_info)
|
|
294
|
+
except Exception as e:
|
|
295
|
+
self._logger.error(f"Error in heartbeat callback: {e}")
|
|
296
|
+
else:
|
|
297
|
+
# Heartbeat from unknown service - log warning
|
|
298
|
+
self._logger.warning(
|
|
299
|
+
f"Heartbeat from unregistered service: {service_name} "
|
|
300
|
+
"(may have missed startup event)"
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
except json.JSONDecodeError as e:
|
|
304
|
+
self._logger.error(f"Invalid heartbeat event JSON: {e}")
|
|
305
|
+
except Exception as e:
|
|
306
|
+
self._logger.error(f"Error handling heartbeat event: {e}", exc_info=True)
|
|
307
|
+
|
|
308
|
+
async def _handle_shutdown(self, msg) -> None:
|
|
309
|
+
"""Handle service shutdown event."""
|
|
310
|
+
try:
|
|
311
|
+
data = json.loads(msg.data.decode('utf-8'))
|
|
312
|
+
service_name = data.get("service")
|
|
313
|
+
reason = data.get("reason", "Unknown")
|
|
314
|
+
|
|
315
|
+
if not service_name:
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
async with self._lock:
|
|
319
|
+
if service_name in self._services:
|
|
320
|
+
del self._services[service_name]
|
|
321
|
+
self._logger.info(f"Service shutdown: {service_name} ({reason})")
|
|
322
|
+
|
|
323
|
+
# Trigger callback
|
|
324
|
+
if self._on_service_shutdown:
|
|
325
|
+
try:
|
|
326
|
+
self._on_service_shutdown(service_name)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
self._logger.error(f"Error in shutdown callback: {e}")
|
|
329
|
+
|
|
330
|
+
except json.JSONDecodeError as e:
|
|
331
|
+
self._logger.error(f"Invalid shutdown event JSON: {e}")
|
|
332
|
+
except Exception as e:
|
|
333
|
+
self._logger.error(f"Error handling shutdown event: {e}", exc_info=True)
|
|
334
|
+
|
|
335
|
+
def get_service(self, name: str) -> ServiceInfo | None:
|
|
336
|
+
"""Get information about a specific service.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
name: Service name
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
ServiceInfo if service is registered, None otherwise
|
|
343
|
+
"""
|
|
344
|
+
return self._services.get(name)
|
|
345
|
+
|
|
346
|
+
def get_all_services(self) -> list[ServiceInfo]:
|
|
347
|
+
"""Get information about all registered services.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
List of ServiceInfo objects for all services
|
|
351
|
+
"""
|
|
352
|
+
return list(self._services.values())
|
|
353
|
+
|
|
354
|
+
def get_active_services(self) -> list[ServiceInfo]:
|
|
355
|
+
"""Get only active services (not stale).
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
List of ServiceInfo objects for services with recent heartbeats
|
|
359
|
+
"""
|
|
360
|
+
return [s for s in self._services.values() if not s.is_stale]
|
|
361
|
+
|
|
362
|
+
def get_stale_services(self) -> list[ServiceInfo]:
|
|
363
|
+
"""Get services that appear offline (stale heartbeats).
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
List of ServiceInfo objects for services with stale heartbeats
|
|
367
|
+
"""
|
|
368
|
+
return [s for s in self._services.values() if s.is_stale]
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
__all__ = ["ServiceRegistry", "ServiceInfo"]
|