kryten-robot 0.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kryten/CONFIG.md +504 -0
- kryten/__init__.py +127 -0
- kryten/__main__.py +882 -0
- kryten/application_state.py +98 -0
- kryten/audit_logger.py +237 -0
- kryten/command_subscriber.py +341 -0
- kryten/config.example.json +35 -0
- kryten/config.py +510 -0
- kryten/connection_watchdog.py +209 -0
- kryten/correlation.py +241 -0
- kryten/cytube_connector.py +754 -0
- kryten/cytube_event_sender.py +1476 -0
- kryten/errors.py +161 -0
- kryten/event_publisher.py +416 -0
- kryten/health_monitor.py +482 -0
- kryten/lifecycle_events.py +274 -0
- kryten/logging_config.py +314 -0
- kryten/nats_client.py +468 -0
- kryten/raw_event.py +165 -0
- kryten/service_registry.py +371 -0
- kryten/shutdown_handler.py +383 -0
- kryten/socket_io.py +903 -0
- kryten/state_manager.py +711 -0
- kryten/state_query_handler.py +698 -0
- kryten/state_updater.py +314 -0
- kryten/stats_tracker.py +108 -0
- kryten/subject_builder.py +330 -0
- kryten_robot-0.6.9.dist-info/METADATA +469 -0
- kryten_robot-0.6.9.dist-info/RECORD +32 -0
- kryten_robot-0.6.9.dist-info/WHEEL +4 -0
- kryten_robot-0.6.9.dist-info/entry_points.txt +3 -0
- kryten_robot-0.6.9.dist-info/licenses/LICENSE +21 -0
kryten/errors.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Kryten Connection Error Hierarchy.
|
|
2
|
+
|
|
3
|
+
This module defines exception classes for connection, authentication, protocol,
|
|
4
|
+
and send failures, standardizing error handling across the Kryten connector.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Final
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ConnectionError(Exception): # noqa: A001 (shadows builtin but intentional)
|
|
11
|
+
"""Base exception for all connection-related errors.
|
|
12
|
+
|
|
13
|
+
This is the root of Kryten's error hierarchy. Catching this exception
|
|
14
|
+
will catch all Kryten-specific connection failures.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
message: Human-readable error description.
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> try:
|
|
21
|
+
... await connector.connect()
|
|
22
|
+
... except ConnectionError as e:
|
|
23
|
+
... logger.error(f"Connection failed: {e}")
|
|
24
|
+
|
|
25
|
+
Note:
|
|
26
|
+
Most connection errors are transient and may succeed on retry.
|
|
27
|
+
Check specific subclass types to determine retry strategy.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AuthenticationError(ConnectionError):
|
|
32
|
+
"""Authentication or login failed.
|
|
33
|
+
|
|
34
|
+
Raised when credentials are rejected or login sequence fails.
|
|
35
|
+
This typically indicates a configuration problem requiring manual
|
|
36
|
+
intervention.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
message: Human-readable error description.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
>>> if not logged_in:
|
|
43
|
+
... raise AuthenticationError('Invalid CyTube password')
|
|
44
|
+
|
|
45
|
+
Note:
|
|
46
|
+
**Not recoverable by retry**. Requires credential update or
|
|
47
|
+
account verification.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class NotConnectedError(ConnectionError):
|
|
52
|
+
"""Operation requires an active connection.
|
|
53
|
+
|
|
54
|
+
Raised when attempting operations (emit, recv, etc.) before calling
|
|
55
|
+
connect() or after connection has been closed.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
message: Human-readable error description.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
>>> if not self.socket:
|
|
62
|
+
... raise NotConnectedError('Must call connect() first')
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
**Recoverable**. Call connect() to establish connection before
|
|
66
|
+
retrying the operation.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class SendError(ConnectionError):
|
|
71
|
+
"""Failed to send message or data.
|
|
72
|
+
|
|
73
|
+
Raised when message transmission fails due to network issues,
|
|
74
|
+
buffer overflow, or other transport problems.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
message: Human-readable error description.
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> try:
|
|
81
|
+
... await websocket.send(data)
|
|
82
|
+
... except websockets.ConnectionClosed as e:
|
|
83
|
+
... raise SendError('Failed to send message') from e
|
|
84
|
+
|
|
85
|
+
Note:
|
|
86
|
+
**May be recoverable**. Check if connection is still active
|
|
87
|
+
before deciding retry strategy.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ProtocolError(ConnectionError):
|
|
92
|
+
"""Platform protocol violation.
|
|
93
|
+
|
|
94
|
+
Raised when Socket.IO or CyTube protocol expectations are violated,
|
|
95
|
+
such as malformed frames, unexpected event sequences, or invalid
|
|
96
|
+
handshake responses.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
message: Human-readable error description.
|
|
100
|
+
|
|
101
|
+
Examples:
|
|
102
|
+
>>> if response != '3probe':
|
|
103
|
+
... raise ProtocolError(f'Invalid probe response: {response}')
|
|
104
|
+
|
|
105
|
+
Note:
|
|
106
|
+
**Usually not recoverable**. Indicates client/server version
|
|
107
|
+
mismatch or protocol implementation bug.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class PingTimeout(ConnectionError):
|
|
112
|
+
"""Heartbeat timeout occurred.
|
|
113
|
+
|
|
114
|
+
Raised when server fails to respond to ping within configured
|
|
115
|
+
timeout period. Indicates connection may be dead or server
|
|
116
|
+
is unresponsive.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
message: Human-readable error description.
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
>>> if not pong_received:
|
|
123
|
+
... raise PingTimeout('Server did not respond to ping')
|
|
124
|
+
|
|
125
|
+
Note:
|
|
126
|
+
**Recoverable by reconnection**. Network may be temporarily
|
|
127
|
+
disrupted. Attempt reconnect with backoff.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class SocketIOError(ConnectionError):
|
|
132
|
+
"""Socket.IO transport error.
|
|
133
|
+
|
|
134
|
+
General Socket.IO transport layer error for issues not covered
|
|
135
|
+
by more specific exception types. May include websocket errors,
|
|
136
|
+
handshake failures, or framing problems.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
message: Human-readable error description.
|
|
140
|
+
|
|
141
|
+
Examples:
|
|
142
|
+
>>> try:
|
|
143
|
+
... config = await get_handshake()
|
|
144
|
+
... except InvalidHandshake as e:
|
|
145
|
+
... raise SocketIOError('Handshake failed') from e
|
|
146
|
+
|
|
147
|
+
Note:
|
|
148
|
+
**May be recoverable**. Check __cause__ attribute to determine
|
|
149
|
+
if retry is appropriate based on underlying error.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
__all__: Final[list[str]] = [
|
|
154
|
+
"ConnectionError",
|
|
155
|
+
"AuthenticationError",
|
|
156
|
+
"NotConnectedError",
|
|
157
|
+
"SendError",
|
|
158
|
+
"ProtocolError",
|
|
159
|
+
"PingTimeout",
|
|
160
|
+
"SocketIOError",
|
|
161
|
+
]
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""NATS Event Publisher for CyTube Events.
|
|
2
|
+
|
|
3
|
+
This module provides the EventPublisher component that bridges the CytubeConnector
|
|
4
|
+
event stream with NATS publishing, consuming raw events and publishing them to
|
|
5
|
+
appropriate NATS subjects.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from .cytube_connector import CytubeConnector
|
|
15
|
+
from .nats_client import NatsClient
|
|
16
|
+
from .raw_event import RawEvent
|
|
17
|
+
from .stats_tracker import StatsTracker
|
|
18
|
+
from .subject_builder import build_event_subject
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EventPublisher:
|
|
22
|
+
"""Bridge between CytubeConnector event stream and NATS publishing.
|
|
23
|
+
|
|
24
|
+
Consumes events from a CytubeConnector's async iterator and publishes them
|
|
25
|
+
to NATS with proper subject routing, error handling, and flow control.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
connector: CytubeConnector providing event stream.
|
|
29
|
+
nats_client: NatsClient for publishing to NATS.
|
|
30
|
+
logger: Logger for structured output.
|
|
31
|
+
batch_size: Number of events to batch (currently supports only 1).
|
|
32
|
+
is_running: Whether publisher is actively running.
|
|
33
|
+
stats: Publishing statistics.
|
|
34
|
+
|
|
35
|
+
Examples:
|
|
36
|
+
>>> connector = CytubeConnector(config, logger)
|
|
37
|
+
>>> nats_client = NatsClient(nats_config, logger)
|
|
38
|
+
>>> publisher = EventPublisher(connector, nats_client, logger)
|
|
39
|
+
>>>
|
|
40
|
+
>>> await connector.connect()
|
|
41
|
+
>>> await nats_client.connect()
|
|
42
|
+
>>> task = asyncio.create_task(publisher.run())
|
|
43
|
+
>>> # Publisher now bridging events
|
|
44
|
+
>>> await publisher.stop()
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
connector: CytubeConnector,
|
|
50
|
+
nats_client: NatsClient,
|
|
51
|
+
logger: logging.Logger,
|
|
52
|
+
batch_size: int = 1,
|
|
53
|
+
retry_attempts: int = 3,
|
|
54
|
+
retry_delay: float = 1.0,
|
|
55
|
+
):
|
|
56
|
+
"""Initialize event publisher.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
connector: CytubeConnector instance providing event stream.
|
|
60
|
+
nats_client: NatsClient instance for NATS publishing.
|
|
61
|
+
logger: Logger for structured logging.
|
|
62
|
+
batch_size: Events per batch (currently only 1 supported).
|
|
63
|
+
retry_attempts: Number of retry attempts for transient failures.
|
|
64
|
+
retry_delay: Initial delay between retries in seconds.
|
|
65
|
+
"""
|
|
66
|
+
self.connector = connector
|
|
67
|
+
self.nats_client = nats_client
|
|
68
|
+
self.logger = logger
|
|
69
|
+
self.batch_size = batch_size
|
|
70
|
+
self.retry_attempts = retry_attempts
|
|
71
|
+
self.retry_delay = retry_delay
|
|
72
|
+
|
|
73
|
+
# State
|
|
74
|
+
self._running = False
|
|
75
|
+
self._stop_requested = False
|
|
76
|
+
|
|
77
|
+
# Kick detection callback
|
|
78
|
+
self._on_kicked: Callable[[], None] | None = None
|
|
79
|
+
|
|
80
|
+
# Statistics
|
|
81
|
+
self._events_received = 0
|
|
82
|
+
self._events_published = 0
|
|
83
|
+
self._publish_errors = 0
|
|
84
|
+
self._total_publish_time = 0.0
|
|
85
|
+
|
|
86
|
+
# Log throttling for noisy events
|
|
87
|
+
self._media_update_count = 0
|
|
88
|
+
self._media_update_publish_count = 0
|
|
89
|
+
self._media_update_log_interval = 20 # Log every N occurrences
|
|
90
|
+
|
|
91
|
+
# Rate tracking
|
|
92
|
+
self._stats_tracker = StatsTracker()
|
|
93
|
+
|
|
94
|
+
def on_kicked(self, callback: Callable[[], None]) -> None:
|
|
95
|
+
"""Register callback to be called when bot is kicked from channel.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
callback: Function to call when a kick event is detected.
|
|
99
|
+
This should trigger graceful shutdown.
|
|
100
|
+
|
|
101
|
+
Examples:
|
|
102
|
+
>>> def handle_kick():
|
|
103
|
+
... app_state.shutdown_event.set()
|
|
104
|
+
>>> publisher.on_kicked(handle_kick)
|
|
105
|
+
"""
|
|
106
|
+
self._on_kicked = callback
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def is_running(self) -> bool:
|
|
110
|
+
"""Check if publisher is actively running.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
True if publisher run loop is active, False otherwise.
|
|
114
|
+
|
|
115
|
+
Examples:
|
|
116
|
+
>>> publisher = EventPublisher(connector, nats, logger)
|
|
117
|
+
>>> publisher.is_running
|
|
118
|
+
False
|
|
119
|
+
>>> # After starting run()
|
|
120
|
+
>>> publisher.is_running
|
|
121
|
+
True
|
|
122
|
+
"""
|
|
123
|
+
return self._running
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def stats(self) -> dict[str, Any]:
|
|
127
|
+
"""Get publishing statistics.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Dictionary with events_received, events_published, publish_errors,
|
|
131
|
+
average_publish_time_ms, success_rate, and rate information.
|
|
132
|
+
|
|
133
|
+
Examples:
|
|
134
|
+
>>> stats = publisher.stats
|
|
135
|
+
>>> print(f"Published: {stats['events_published']}")
|
|
136
|
+
>>> print(f"Success rate: {stats['success_rate']:.1%}")
|
|
137
|
+
>>> print(f"Rate (1m): {stats['rate_1min']:.2f}/sec")
|
|
138
|
+
"""
|
|
139
|
+
avg_time_ms = 0.0
|
|
140
|
+
if self._events_published > 0:
|
|
141
|
+
avg_time_ms = (self._total_publish_time / self._events_published) * 1000
|
|
142
|
+
|
|
143
|
+
success_rate = 0.0
|
|
144
|
+
if self._events_received > 0:
|
|
145
|
+
success_rate = self._events_published / self._events_received
|
|
146
|
+
|
|
147
|
+
# Get rate information from StatsTracker
|
|
148
|
+
last_time, last_type = self._stats_tracker.get_last()
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
"events_received": self._events_received,
|
|
152
|
+
"events_published": self._events_published,
|
|
153
|
+
"publish_errors": self._publish_errors,
|
|
154
|
+
"average_publish_time_ms": avg_time_ms,
|
|
155
|
+
"success_rate": success_rate,
|
|
156
|
+
"rate_1min": self._stats_tracker.get_rate(60),
|
|
157
|
+
"rate_5min": self._stats_tracker.get_rate(300),
|
|
158
|
+
"last_event_time": last_time,
|
|
159
|
+
"last_event_type": last_type,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async def run(self) -> None:
|
|
163
|
+
"""Start publishing events from connector to NATS.
|
|
164
|
+
|
|
165
|
+
Runs until stop() is called or an unrecoverable error occurs.
|
|
166
|
+
Consumes events from connector's recv_events() iterator and publishes
|
|
167
|
+
each to NATS with appropriate subject routing.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
asyncio.CancelledError: If task is cancelled.
|
|
171
|
+
Exception: If connector or NATS encounters unrecoverable error.
|
|
172
|
+
|
|
173
|
+
Examples:
|
|
174
|
+
>>> task = asyncio.create_task(publisher.run())
|
|
175
|
+
>>> await asyncio.sleep(10)
|
|
176
|
+
>>> await publisher.stop()
|
|
177
|
+
"""
|
|
178
|
+
if self._running:
|
|
179
|
+
self.logger.warning("Publisher already running, ignoring run() call")
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
self._running = True
|
|
183
|
+
self._stop_requested = False
|
|
184
|
+
|
|
185
|
+
self.logger.info(
|
|
186
|
+
"Event publisher started",
|
|
187
|
+
extra={
|
|
188
|
+
"batch_size": self.batch_size,
|
|
189
|
+
"retry_attempts": self.retry_attempts,
|
|
190
|
+
},
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
async for event_name, payload in self.connector.recv_events():
|
|
195
|
+
# Check if stop requested
|
|
196
|
+
if self._stop_requested:
|
|
197
|
+
self.logger.info("Stop requested, finishing current batch")
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
self._events_received += 1
|
|
201
|
+
|
|
202
|
+
# Detect kick event - this means we were kicked from the channel
|
|
203
|
+
if event_name == "kick":
|
|
204
|
+
kicked_user = payload.get("name", "")
|
|
205
|
+
reason = payload.get("reason", "No reason given")
|
|
206
|
+
self.logger.warning(
|
|
207
|
+
f"Received kick event: user={kicked_user}, reason={reason}",
|
|
208
|
+
extra={"kicked_user": kicked_user, "reason": reason},
|
|
209
|
+
)
|
|
210
|
+
# If we have a kick callback, trigger it to initiate shutdown
|
|
211
|
+
if self._on_kicked:
|
|
212
|
+
self.logger.warning("Bot was kicked from channel, initiating graceful shutdown")
|
|
213
|
+
self._on_kicked()
|
|
214
|
+
# Continue processing to publish the kick event to NATS before shutting down
|
|
215
|
+
|
|
216
|
+
# Create RawEvent wrapper
|
|
217
|
+
raw_event = RawEvent(
|
|
218
|
+
event_name=event_name,
|
|
219
|
+
payload=payload,
|
|
220
|
+
channel=self.connector.config.channel,
|
|
221
|
+
domain=self.connector.config.domain,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Log received event (with payload for error messages)
|
|
225
|
+
if event_name == "errorMsg":
|
|
226
|
+
self.logger.error(
|
|
227
|
+
f"Received CyTube error: {payload}",
|
|
228
|
+
extra={
|
|
229
|
+
"event_name": event_name,
|
|
230
|
+
"error_payload": payload,
|
|
231
|
+
"correlation_id": raw_event.correlation_id,
|
|
232
|
+
},
|
|
233
|
+
)
|
|
234
|
+
elif event_name == "queueFail":
|
|
235
|
+
self.logger.error(
|
|
236
|
+
f"Queue failed: {payload}",
|
|
237
|
+
extra={
|
|
238
|
+
"event_name": event_name,
|
|
239
|
+
"error_payload": payload,
|
|
240
|
+
"correlation_id": raw_event.correlation_id,
|
|
241
|
+
},
|
|
242
|
+
)
|
|
243
|
+
else:
|
|
244
|
+
# Throttle logging for noisy events like mediaUpdate
|
|
245
|
+
if event_name == "mediaUpdate":
|
|
246
|
+
self._media_update_count += 1
|
|
247
|
+
if self._media_update_count % self._media_update_log_interval == 1:
|
|
248
|
+
self.logger.info(
|
|
249
|
+
f"Received event: {event_name} (#{self._media_update_count}, logging every {self._media_update_log_interval})",
|
|
250
|
+
extra={
|
|
251
|
+
"event_name": event_name,
|
|
252
|
+
"correlation_id": raw_event.correlation_id,
|
|
253
|
+
"count": self._media_update_count,
|
|
254
|
+
},
|
|
255
|
+
)
|
|
256
|
+
else:
|
|
257
|
+
self.logger.info(
|
|
258
|
+
f"Received event: {event_name}",
|
|
259
|
+
extra={
|
|
260
|
+
"event_name": event_name,
|
|
261
|
+
"correlation_id": raw_event.correlation_id,
|
|
262
|
+
},
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Build NATS subject
|
|
266
|
+
try:
|
|
267
|
+
subject = build_event_subject(raw_event)
|
|
268
|
+
except ValueError as e:
|
|
269
|
+
# Log the problematic event and skip it
|
|
270
|
+
self.logger.warning(
|
|
271
|
+
f"Skipping event with invalid name: {e}",
|
|
272
|
+
extra={
|
|
273
|
+
"event_name": event_name,
|
|
274
|
+
"raw_event_name": event_name,
|
|
275
|
+
"payload_preview": str(payload)[:200],
|
|
276
|
+
"correlation_id": raw_event.correlation_id,
|
|
277
|
+
},
|
|
278
|
+
)
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
# Publish to NATS with retry
|
|
282
|
+
await self._publish_with_retry(subject, raw_event)
|
|
283
|
+
|
|
284
|
+
except asyncio.CancelledError:
|
|
285
|
+
self.logger.info("Publisher cancelled")
|
|
286
|
+
raise
|
|
287
|
+
|
|
288
|
+
except Exception as e:
|
|
289
|
+
self.logger.error(
|
|
290
|
+
f"Publisher failed with error: {e}",
|
|
291
|
+
extra={"error": str(e), "type": type(e).__name__},
|
|
292
|
+
)
|
|
293
|
+
raise
|
|
294
|
+
|
|
295
|
+
finally:
|
|
296
|
+
self._running = False
|
|
297
|
+
self.logger.info(
|
|
298
|
+
"Event publisher stopped",
|
|
299
|
+
extra=self.stats,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
async def stop(self) -> None:
|
|
303
|
+
"""Gracefully stop the publisher.
|
|
304
|
+
|
|
305
|
+
Requests stop and waits for current batch to complete.
|
|
306
|
+
Safe to call multiple times.
|
|
307
|
+
|
|
308
|
+
Examples:
|
|
309
|
+
>>> await publisher.stop()
|
|
310
|
+
>>> assert not publisher.is_running
|
|
311
|
+
"""
|
|
312
|
+
if not self._running:
|
|
313
|
+
self.logger.debug("Publisher not running, stop() is a no-op")
|
|
314
|
+
return
|
|
315
|
+
|
|
316
|
+
self.logger.info("Requesting publisher stop")
|
|
317
|
+
self._stop_requested = True
|
|
318
|
+
|
|
319
|
+
# Wait for run loop to finish (with timeout)
|
|
320
|
+
timeout = 5.0
|
|
321
|
+
start_time = time.time()
|
|
322
|
+
while self._running and (time.time() - start_time) < timeout:
|
|
323
|
+
await asyncio.sleep(0.1)
|
|
324
|
+
|
|
325
|
+
if self._running:
|
|
326
|
+
self.logger.warning("Publisher did not stop within timeout")
|
|
327
|
+
|
|
328
|
+
async def _publish_with_retry(self, subject: str, event: RawEvent) -> None:
|
|
329
|
+
"""Publish event to NATS with retry logic.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
subject: NATS subject string.
|
|
333
|
+
event: RawEvent to publish.
|
|
334
|
+
|
|
335
|
+
Logs errors and updates statistics.
|
|
336
|
+
"""
|
|
337
|
+
attempt = 0
|
|
338
|
+
last_error: Exception | None = None
|
|
339
|
+
|
|
340
|
+
while attempt <= self.retry_attempts:
|
|
341
|
+
try:
|
|
342
|
+
start_time = time.time()
|
|
343
|
+
|
|
344
|
+
# Serialize and publish
|
|
345
|
+
event_bytes = event.to_bytes()
|
|
346
|
+
await self.nats_client.publish(subject, event_bytes)
|
|
347
|
+
|
|
348
|
+
# Track timing
|
|
349
|
+
elapsed = time.time() - start_time
|
|
350
|
+
self._total_publish_time += elapsed
|
|
351
|
+
|
|
352
|
+
# Update stats
|
|
353
|
+
self._events_published += 1
|
|
354
|
+
self._stats_tracker.record(event.event_name)
|
|
355
|
+
|
|
356
|
+
# Log success (throttled for noisy events like mediaUpdate)
|
|
357
|
+
if event.event_name == "mediaUpdate":
|
|
358
|
+
self._media_update_publish_count += 1
|
|
359
|
+
if self._media_update_publish_count % self._media_update_log_interval == 1:
|
|
360
|
+
self.logger.info(
|
|
361
|
+
f"Published event '{event.event_name}' to NATS subject: {subject} (#{self._media_update_publish_count}, logging every {self._media_update_log_interval})",
|
|
362
|
+
extra={
|
|
363
|
+
"subject": subject,
|
|
364
|
+
"event_name": event.event_name,
|
|
365
|
+
"correlation_id": event.correlation_id,
|
|
366
|
+
"size": len(event_bytes),
|
|
367
|
+
"elapsed_ms": elapsed * 1000,
|
|
368
|
+
"count": self._media_update_publish_count,
|
|
369
|
+
},
|
|
370
|
+
)
|
|
371
|
+
else:
|
|
372
|
+
self.logger.info(
|
|
373
|
+
f"Published event '{event.event_name}' to NATS subject: {subject}",
|
|
374
|
+
extra={
|
|
375
|
+
"subject": subject,
|
|
376
|
+
"event_name": event.event_name,
|
|
377
|
+
"correlation_id": event.correlation_id,
|
|
378
|
+
"size": len(event_bytes),
|
|
379
|
+
"elapsed_ms": elapsed * 1000,
|
|
380
|
+
},
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
return # Success!
|
|
384
|
+
|
|
385
|
+
except Exception as e:
|
|
386
|
+
last_error = e
|
|
387
|
+
attempt += 1
|
|
388
|
+
|
|
389
|
+
if attempt <= self.retry_attempts:
|
|
390
|
+
# Retry with exponential backoff
|
|
391
|
+
delay = self.retry_delay * (2 ** (attempt - 1))
|
|
392
|
+
self.logger.warning(
|
|
393
|
+
f"Publish failed, retrying in {delay}s (attempt {attempt}/{self.retry_attempts})",
|
|
394
|
+
extra={
|
|
395
|
+
"subject": subject,
|
|
396
|
+
"event_name": event.event_name,
|
|
397
|
+
"error": str(e),
|
|
398
|
+
},
|
|
399
|
+
)
|
|
400
|
+
await asyncio.sleep(delay)
|
|
401
|
+
|
|
402
|
+
# All retries exhausted
|
|
403
|
+
self._publish_errors += 1
|
|
404
|
+
self.logger.error(
|
|
405
|
+
"Publish failed permanently after retries",
|
|
406
|
+
extra={
|
|
407
|
+
"subject": subject,
|
|
408
|
+
"event_name": event.event_name,
|
|
409
|
+
"correlation_id": event.correlation_id,
|
|
410
|
+
"attempts": self.retry_attempts + 1,
|
|
411
|
+
"error": str(last_error),
|
|
412
|
+
},
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
__all__ = ["EventPublisher"]
|