kstlib 0.0.1a0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kstlib/__init__.py +266 -1
- kstlib/__main__.py +16 -0
- kstlib/alerts/__init__.py +110 -0
- kstlib/alerts/channels/__init__.py +36 -0
- kstlib/alerts/channels/base.py +197 -0
- kstlib/alerts/channels/email.py +227 -0
- kstlib/alerts/channels/slack.py +389 -0
- kstlib/alerts/exceptions.py +72 -0
- kstlib/alerts/manager.py +651 -0
- kstlib/alerts/models.py +142 -0
- kstlib/alerts/throttle.py +263 -0
- kstlib/auth/__init__.py +139 -0
- kstlib/auth/callback.py +399 -0
- kstlib/auth/config.py +502 -0
- kstlib/auth/errors.py +127 -0
- kstlib/auth/models.py +316 -0
- kstlib/auth/providers/__init__.py +14 -0
- kstlib/auth/providers/base.py +393 -0
- kstlib/auth/providers/oauth2.py +645 -0
- kstlib/auth/providers/oidc.py +821 -0
- kstlib/auth/session.py +338 -0
- kstlib/auth/token.py +482 -0
- kstlib/cache/__init__.py +50 -0
- kstlib/cache/decorator.py +261 -0
- kstlib/cache/strategies.py +516 -0
- kstlib/cli/__init__.py +8 -0
- kstlib/cli/app.py +195 -0
- kstlib/cli/commands/__init__.py +5 -0
- kstlib/cli/commands/auth/__init__.py +39 -0
- kstlib/cli/commands/auth/common.py +122 -0
- kstlib/cli/commands/auth/login.py +325 -0
- kstlib/cli/commands/auth/logout.py +74 -0
- kstlib/cli/commands/auth/providers.py +57 -0
- kstlib/cli/commands/auth/status.py +291 -0
- kstlib/cli/commands/auth/token.py +199 -0
- kstlib/cli/commands/auth/whoami.py +106 -0
- kstlib/cli/commands/config.py +89 -0
- kstlib/cli/commands/ops/__init__.py +39 -0
- kstlib/cli/commands/ops/attach.py +49 -0
- kstlib/cli/commands/ops/common.py +269 -0
- kstlib/cli/commands/ops/list_sessions.py +252 -0
- kstlib/cli/commands/ops/logs.py +49 -0
- kstlib/cli/commands/ops/start.py +98 -0
- kstlib/cli/commands/ops/status.py +138 -0
- kstlib/cli/commands/ops/stop.py +60 -0
- kstlib/cli/commands/rapi/__init__.py +60 -0
- kstlib/cli/commands/rapi/call.py +341 -0
- kstlib/cli/commands/rapi/list.py +99 -0
- kstlib/cli/commands/rapi/show.py +206 -0
- kstlib/cli/commands/secrets/__init__.py +35 -0
- kstlib/cli/commands/secrets/common.py +425 -0
- kstlib/cli/commands/secrets/decrypt.py +88 -0
- kstlib/cli/commands/secrets/doctor.py +743 -0
- kstlib/cli/commands/secrets/encrypt.py +242 -0
- kstlib/cli/commands/secrets/shred.py +96 -0
- kstlib/cli/common.py +86 -0
- kstlib/config/__init__.py +76 -0
- kstlib/config/exceptions.py +110 -0
- kstlib/config/export.py +225 -0
- kstlib/config/loader.py +963 -0
- kstlib/config/sops.py +287 -0
- kstlib/db/__init__.py +54 -0
- kstlib/db/aiosqlcipher.py +137 -0
- kstlib/db/cipher.py +112 -0
- kstlib/db/database.py +367 -0
- kstlib/db/exceptions.py +25 -0
- kstlib/db/pool.py +302 -0
- kstlib/helpers/__init__.py +35 -0
- kstlib/helpers/exceptions.py +11 -0
- kstlib/helpers/time_trigger.py +396 -0
- kstlib/kstlib.conf.yml +890 -0
- kstlib/limits.py +963 -0
- kstlib/logging/__init__.py +108 -0
- kstlib/logging/manager.py +633 -0
- kstlib/mail/__init__.py +42 -0
- kstlib/mail/builder.py +626 -0
- kstlib/mail/exceptions.py +27 -0
- kstlib/mail/filesystem.py +248 -0
- kstlib/mail/transport.py +224 -0
- kstlib/mail/transports/__init__.py +19 -0
- kstlib/mail/transports/gmail.py +268 -0
- kstlib/mail/transports/resend.py +324 -0
- kstlib/mail/transports/smtp.py +326 -0
- kstlib/meta.py +72 -0
- kstlib/metrics/__init__.py +88 -0
- kstlib/metrics/decorators.py +1090 -0
- kstlib/metrics/exceptions.py +14 -0
- kstlib/monitoring/__init__.py +116 -0
- kstlib/monitoring/_styles.py +163 -0
- kstlib/monitoring/cell.py +57 -0
- kstlib/monitoring/config.py +424 -0
- kstlib/monitoring/delivery.py +579 -0
- kstlib/monitoring/exceptions.py +63 -0
- kstlib/monitoring/image.py +220 -0
- kstlib/monitoring/kv.py +79 -0
- kstlib/monitoring/list.py +69 -0
- kstlib/monitoring/metric.py +88 -0
- kstlib/monitoring/monitoring.py +341 -0
- kstlib/monitoring/renderer.py +139 -0
- kstlib/monitoring/service.py +392 -0
- kstlib/monitoring/table.py +129 -0
- kstlib/monitoring/types.py +56 -0
- kstlib/ops/__init__.py +86 -0
- kstlib/ops/base.py +148 -0
- kstlib/ops/container.py +577 -0
- kstlib/ops/exceptions.py +209 -0
- kstlib/ops/manager.py +407 -0
- kstlib/ops/models.py +176 -0
- kstlib/ops/tmux.py +372 -0
- kstlib/ops/validators.py +287 -0
- kstlib/py.typed +0 -0
- kstlib/rapi/__init__.py +118 -0
- kstlib/rapi/client.py +875 -0
- kstlib/rapi/config.py +861 -0
- kstlib/rapi/credentials.py +887 -0
- kstlib/rapi/exceptions.py +213 -0
- kstlib/resilience/__init__.py +101 -0
- kstlib/resilience/circuit_breaker.py +440 -0
- kstlib/resilience/exceptions.py +95 -0
- kstlib/resilience/heartbeat.py +491 -0
- kstlib/resilience/rate_limiter.py +506 -0
- kstlib/resilience/shutdown.py +417 -0
- kstlib/resilience/watchdog.py +637 -0
- kstlib/secrets/__init__.py +29 -0
- kstlib/secrets/exceptions.py +19 -0
- kstlib/secrets/models.py +62 -0
- kstlib/secrets/providers/__init__.py +79 -0
- kstlib/secrets/providers/base.py +58 -0
- kstlib/secrets/providers/environment.py +66 -0
- kstlib/secrets/providers/keyring.py +107 -0
- kstlib/secrets/providers/kms.py +223 -0
- kstlib/secrets/providers/kwargs.py +101 -0
- kstlib/secrets/providers/sops.py +209 -0
- kstlib/secrets/resolver.py +221 -0
- kstlib/secrets/sensitive.py +130 -0
- kstlib/secure/__init__.py +23 -0
- kstlib/secure/fs.py +194 -0
- kstlib/secure/permissions.py +70 -0
- kstlib/ssl.py +347 -0
- kstlib/ui/__init__.py +23 -0
- kstlib/ui/exceptions.py +26 -0
- kstlib/ui/panels.py +484 -0
- kstlib/ui/spinner.py +864 -0
- kstlib/ui/tables.py +382 -0
- kstlib/utils/__init__.py +48 -0
- kstlib/utils/dict.py +36 -0
- kstlib/utils/formatting.py +338 -0
- kstlib/utils/http_trace.py +237 -0
- kstlib/utils/lazy.py +49 -0
- kstlib/utils/secure_delete.py +205 -0
- kstlib/utils/serialization.py +247 -0
- kstlib/utils/text.py +56 -0
- kstlib/utils/validators.py +124 -0
- kstlib/websocket/__init__.py +97 -0
- kstlib/websocket/exceptions.py +214 -0
- kstlib/websocket/manager.py +1102 -0
- kstlib/websocket/models.py +361 -0
- kstlib-1.0.1.dist-info/METADATA +201 -0
- kstlib-1.0.1.dist-info/RECORD +163 -0
- {kstlib-0.0.1a0.dist-info → kstlib-1.0.1.dist-info}/WHEEL +1 -1
- kstlib-1.0.1.dist-info/entry_points.txt +2 -0
- kstlib-1.0.1.dist-info/licenses/LICENSE.md +9 -0
- kstlib-0.0.1a0.dist-info/METADATA +0 -29
- kstlib-0.0.1a0.dist-info/RECORD +0 -6
- kstlib-0.0.1a0.dist-info/licenses/LICENSE.md +0 -5
- {kstlib-0.0.1a0.dist-info → kstlib-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""Heartbeat mechanism for process liveness signaling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextlib
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import socket
|
|
11
|
+
import threading
|
|
12
|
+
from collections.abc import Awaitable, Callable, Mapping
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
17
|
+
|
|
18
|
+
from typing_extensions import Self
|
|
19
|
+
|
|
20
|
+
from kstlib.limits import (
|
|
21
|
+
HARD_MAX_HEARTBEAT_INTERVAL,
|
|
22
|
+
HARD_MIN_HEARTBEAT_INTERVAL,
|
|
23
|
+
clamp_with_limits,
|
|
24
|
+
get_resilience_limits,
|
|
25
|
+
)
|
|
26
|
+
from kstlib.resilience.exceptions import HeartbeatError
|
|
27
|
+
|
|
28
|
+
log = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
import types
|
|
32
|
+
|
|
33
|
+
# Type aliases for callbacks
|
|
34
|
+
OnAlertCallback = Callable[[str, str, Mapping[str, Any]], Awaitable[None] | None]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@runtime_checkable
|
|
38
|
+
class HeartbeatTarget(Protocol):
|
|
39
|
+
"""Protocol for objects that can be monitored by Heartbeat.
|
|
40
|
+
|
|
41
|
+
Any object implementing `is_dead` property can be used as a target.
|
|
42
|
+
This allows Heartbeat to detect when a monitored component has failed.
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
>>> class MyWebSocket: # doctest: +SKIP
|
|
46
|
+
... @property
|
|
47
|
+
... def is_dead(self) -> bool:
|
|
48
|
+
... return not self.connected
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_dead(self) -> bool:
|
|
53
|
+
"""Check if the target is dead and needs restart."""
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True, slots=True)
|
|
58
|
+
class HeartbeatState:
|
|
59
|
+
"""Represents the state written to the heartbeat file.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
timestamp: Last heartbeat time (ISO 8601 UTC).
|
|
63
|
+
pid: Process ID.
|
|
64
|
+
hostname: Machine hostname.
|
|
65
|
+
metadata: Optional application-specific data.
|
|
66
|
+
|
|
67
|
+
Examples:
|
|
68
|
+
>>> state = HeartbeatState(
|
|
69
|
+
... timestamp="2026-01-12T10:00:00+00:00",
|
|
70
|
+
... pid=1234,
|
|
71
|
+
... hostname="myhost",
|
|
72
|
+
... )
|
|
73
|
+
>>> state.pid
|
|
74
|
+
1234
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
timestamp: str
|
|
78
|
+
pid: int
|
|
79
|
+
hostname: str
|
|
80
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
81
|
+
|
|
82
|
+
def to_dict(self) -> dict[str, Any]:
|
|
83
|
+
"""Serialize to JSON-compatible dictionary.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dictionary representation of the heartbeat state.
|
|
87
|
+
"""
|
|
88
|
+
return {
|
|
89
|
+
"timestamp": self.timestamp,
|
|
90
|
+
"pid": self.pid,
|
|
91
|
+
"hostname": self.hostname,
|
|
92
|
+
"metadata": self.metadata,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_dict(cls, data: dict[str, Any]) -> HeartbeatState:
|
|
97
|
+
"""Deserialize from dictionary.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
data: Dictionary with heartbeat state fields.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
HeartbeatState instance.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
KeyError: If required fields are missing.
|
|
107
|
+
"""
|
|
108
|
+
return cls(
|
|
109
|
+
timestamp=data["timestamp"],
|
|
110
|
+
pid=data["pid"],
|
|
111
|
+
hostname=data["hostname"],
|
|
112
|
+
metadata=data.get("metadata", {}),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class Heartbeat:
|
|
117
|
+
"""Periodic signal to indicate the process is alive.
|
|
118
|
+
|
|
119
|
+
Writes timestamp to a JSON state file at configurable intervals.
|
|
120
|
+
Supports both sync and async context managers.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
state_file: Path to the heartbeat state file. If None, no file is written
|
|
124
|
+
(useful when using on_beat callback for state management).
|
|
125
|
+
interval: Seconds between heartbeats (default from config or 10s).
|
|
126
|
+
on_missed_beat: Callback invoked when a beat write fails.
|
|
127
|
+
on_alert: Callback for alerting (channel, message, context).
|
|
128
|
+
target: Optional object with `is_dead` property to monitor.
|
|
129
|
+
on_target_dead: Callback invoked when target is detected as dead.
|
|
130
|
+
on_beat: Callback invoked after each successful beat. Can be sync or async.
|
|
131
|
+
Use this to delegate state writing to an external component.
|
|
132
|
+
metadata: Optional dict included in each heartbeat.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
Sync context manager:
|
|
136
|
+
|
|
137
|
+
>>> with Heartbeat("/tmp/bot.heartbeat") as hb: # doctest: +SKIP
|
|
138
|
+
... do_work()
|
|
139
|
+
|
|
140
|
+
Async context manager:
|
|
141
|
+
|
|
142
|
+
>>> async with Heartbeat("/tmp/bot.heartbeat") as hb: # doctest: +SKIP
|
|
143
|
+
... await do_async_work()
|
|
144
|
+
|
|
145
|
+
Check if a process is alive:
|
|
146
|
+
|
|
147
|
+
>>> Heartbeat.is_alive("/tmp/bot.heartbeat", max_age_seconds=30) # doctest: +SKIP
|
|
148
|
+
True
|
|
149
|
+
|
|
150
|
+
Monitor a WebSocket:
|
|
151
|
+
|
|
152
|
+
>>> hb = Heartbeat( # doctest: +SKIP
|
|
153
|
+
... "/tmp/bot.heartbeat",
|
|
154
|
+
... target=ws_manager,
|
|
155
|
+
... on_target_dead=lambda: restart_ws(),
|
|
156
|
+
... )
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
def __init__(
|
|
160
|
+
self,
|
|
161
|
+
state_file: str | Path | None = None,
|
|
162
|
+
*,
|
|
163
|
+
interval: float | None = None,
|
|
164
|
+
on_missed_beat: Callable[[Exception], None] | None = None,
|
|
165
|
+
on_alert: OnAlertCallback | None = None,
|
|
166
|
+
target: HeartbeatTarget | None = None,
|
|
167
|
+
on_target_dead: Callable[[], Awaitable[None] | None] | None = None,
|
|
168
|
+
on_beat: Callable[[], Awaitable[None] | None] | None = None,
|
|
169
|
+
metadata: dict[str, Any] | None = None,
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Initialize heartbeat.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
state_file: Path to the heartbeat state file. If None, no file is written.
|
|
175
|
+
interval: Seconds between heartbeats. Uses config default if None.
|
|
176
|
+
on_missed_beat: Callback invoked when a beat write fails.
|
|
177
|
+
on_alert: Callback for alerting (channel, message, context).
|
|
178
|
+
target: Optional object with `is_dead` property to monitor.
|
|
179
|
+
on_target_dead: Callback invoked when target is detected as dead.
|
|
180
|
+
on_beat: Callback invoked after each successful beat.
|
|
181
|
+
metadata: Optional dict included in each heartbeat.
|
|
182
|
+
"""
|
|
183
|
+
self._state_file = Path(state_file) if state_file else None
|
|
184
|
+
self._on_missed_beat = on_missed_beat
|
|
185
|
+
self._on_alert = on_alert
|
|
186
|
+
self._target = target
|
|
187
|
+
self._on_target_dead = on_target_dead
|
|
188
|
+
self._on_beat = on_beat
|
|
189
|
+
self._metadata = metadata or {}
|
|
190
|
+
|
|
191
|
+
# Load interval from config if not provided, or clamp user value
|
|
192
|
+
limits = get_resilience_limits()
|
|
193
|
+
self._interval = (
|
|
194
|
+
limits.heartbeat_interval
|
|
195
|
+
if interval is None
|
|
196
|
+
else clamp_with_limits(interval, HARD_MIN_HEARTBEAT_INTERVAL, HARD_MAX_HEARTBEAT_INTERVAL)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Threading state
|
|
200
|
+
self._running = False
|
|
201
|
+
self._thread: threading.Thread | None = None
|
|
202
|
+
self._lock = threading.Lock()
|
|
203
|
+
self._stop_event = threading.Event()
|
|
204
|
+
self._shutdown_requested = False
|
|
205
|
+
|
|
206
|
+
# Async state
|
|
207
|
+
self._async_task: asyncio.Task[None] | None = None
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def interval(self) -> float:
|
|
211
|
+
"""Return the heartbeat interval in seconds."""
|
|
212
|
+
return self._interval
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def state_file(self) -> Path | None:
|
|
216
|
+
"""Return the path to the state file, or None if not configured."""
|
|
217
|
+
return self._state_file
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def is_shutdown(self) -> bool:
|
|
221
|
+
"""Check if shutdown has been requested."""
|
|
222
|
+
return self._shutdown_requested
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def target(self) -> HeartbeatTarget | None:
|
|
226
|
+
"""Return the monitored target, if any."""
|
|
227
|
+
return self._target
|
|
228
|
+
|
|
229
|
+
def shutdown(self) -> None:
|
|
230
|
+
"""Signal shutdown and stop gracefully.
|
|
231
|
+
|
|
232
|
+
Sets the shutdown flag which can be checked by external code
|
|
233
|
+
to know that we're shutting down intentionally.
|
|
234
|
+
"""
|
|
235
|
+
log.info("Heartbeat shutdown requested")
|
|
236
|
+
self._shutdown_requested = True
|
|
237
|
+
self.stop()
|
|
238
|
+
|
|
239
|
+
async def ashutdown(self) -> None:
|
|
240
|
+
"""Signal shutdown and stop gracefully (async version)."""
|
|
241
|
+
log.info("Heartbeat shutdown requested")
|
|
242
|
+
self._shutdown_requested = True
|
|
243
|
+
await self.astop()
|
|
244
|
+
|
|
245
|
+
def start(self) -> None:
|
|
246
|
+
"""Start the heartbeat background thread.
|
|
247
|
+
|
|
248
|
+
Raises:
|
|
249
|
+
HeartbeatError: If heartbeat is already running.
|
|
250
|
+
"""
|
|
251
|
+
with self._lock:
|
|
252
|
+
if self._running:
|
|
253
|
+
raise HeartbeatError("Heartbeat is already running")
|
|
254
|
+
self._running = True
|
|
255
|
+
self._stop_event.clear()
|
|
256
|
+
self._thread = threading.Thread(target=self._run_loop, daemon=True)
|
|
257
|
+
self._thread.start()
|
|
258
|
+
|
|
259
|
+
def stop(self) -> None:
|
|
260
|
+
"""Stop the heartbeat and clean up.
|
|
261
|
+
|
|
262
|
+
Safe to call multiple times or if not started.
|
|
263
|
+
"""
|
|
264
|
+
with self._lock:
|
|
265
|
+
if not self._running:
|
|
266
|
+
return
|
|
267
|
+
self._running = False
|
|
268
|
+
self._stop_event.set()
|
|
269
|
+
|
|
270
|
+
if self._thread is not None:
|
|
271
|
+
self._thread.join(timeout=self._interval + 1.0)
|
|
272
|
+
self._thread = None
|
|
273
|
+
|
|
274
|
+
def beat(self) -> None:
|
|
275
|
+
"""Write a heartbeat immediately (manual trigger).
|
|
276
|
+
|
|
277
|
+
If state_file is configured, writes to file.
|
|
278
|
+
If on_beat callback is configured, it will be invoked by the loop (not here).
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
HeartbeatError: If state file is configured and cannot be written.
|
|
282
|
+
"""
|
|
283
|
+
# Skip file write if no state_file configured
|
|
284
|
+
if self._state_file is None:
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
state = HeartbeatState(
|
|
288
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
289
|
+
pid=os.getpid(),
|
|
290
|
+
hostname=socket.gethostname(),
|
|
291
|
+
metadata=self._metadata,
|
|
292
|
+
)
|
|
293
|
+
try:
|
|
294
|
+
# Ensure parent directory exists with proper permissions
|
|
295
|
+
self._state_file.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
|
296
|
+
# Write atomically using temp file
|
|
297
|
+
temp_file = self._state_file.with_suffix(".tmp")
|
|
298
|
+
temp_file.write_text(json.dumps(state.to_dict(), indent=2))
|
|
299
|
+
temp_file.replace(self._state_file)
|
|
300
|
+
except OSError as exc:
|
|
301
|
+
raise HeartbeatError(f"Failed to write heartbeat: {exc}") from exc
|
|
302
|
+
|
|
303
|
+
def _run_loop(self) -> None:
|
|
304
|
+
"""Background thread loop that writes heartbeats and checks target."""
|
|
305
|
+
while not self._stop_event.wait(timeout=self._interval):
|
|
306
|
+
if self._shutdown_requested:
|
|
307
|
+
break
|
|
308
|
+
try:
|
|
309
|
+
self.beat()
|
|
310
|
+
# Invoke on_beat callback after successful beat
|
|
311
|
+
if self._on_beat is not None:
|
|
312
|
+
with contextlib.suppress(Exception):
|
|
313
|
+
result = self._on_beat()
|
|
314
|
+
# Note: Cannot await in sync thread, result is ignored if coroutine
|
|
315
|
+
if asyncio.iscoroutine(result):
|
|
316
|
+
result.close()
|
|
317
|
+
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
318
|
+
if self._on_missed_beat is not None:
|
|
319
|
+
with contextlib.suppress(Exception):
|
|
320
|
+
self._on_missed_beat(exc)
|
|
321
|
+
|
|
322
|
+
# Check target if provided (sync version cannot use async callbacks)
|
|
323
|
+
if self._target is not None and self._target.is_dead and self._on_target_dead is not None:
|
|
324
|
+
with contextlib.suppress(Exception):
|
|
325
|
+
result = self._on_target_dead()
|
|
326
|
+
# Note: Cannot await in sync thread, result is ignored if coroutine
|
|
327
|
+
if asyncio.iscoroutine(result):
|
|
328
|
+
# Close the coroutine to avoid warning
|
|
329
|
+
result.close()
|
|
330
|
+
|
|
331
|
+
async def astart(self) -> None:
|
|
332
|
+
"""Start the heartbeat using asyncio (async version).
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
HeartbeatError: If heartbeat is already running.
|
|
336
|
+
"""
|
|
337
|
+
with self._lock:
|
|
338
|
+
if self._running:
|
|
339
|
+
raise HeartbeatError("Heartbeat is already running")
|
|
340
|
+
self._running = True
|
|
341
|
+
|
|
342
|
+
self._async_task = asyncio.create_task(self._async_loop())
|
|
343
|
+
|
|
344
|
+
async def astop(self) -> None:
|
|
345
|
+
"""Stop the heartbeat (async version).
|
|
346
|
+
|
|
347
|
+
Safe to call multiple times or if not started.
|
|
348
|
+
"""
|
|
349
|
+
with self._lock:
|
|
350
|
+
if not self._running:
|
|
351
|
+
return
|
|
352
|
+
self._running = False
|
|
353
|
+
|
|
354
|
+
if self._async_task is not None:
|
|
355
|
+
self._async_task.cancel()
|
|
356
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
357
|
+
await self._async_task
|
|
358
|
+
self._async_task = None
|
|
359
|
+
|
|
360
|
+
async def _invoke_callback_async(
|
|
361
|
+
self,
|
|
362
|
+
callback: Callable[[], Awaitable[None] | None] | None,
|
|
363
|
+
) -> None:
|
|
364
|
+
"""Invoke a callback that may be sync or async."""
|
|
365
|
+
if callback is not None:
|
|
366
|
+
try:
|
|
367
|
+
result = callback()
|
|
368
|
+
if asyncio.iscoroutine(result):
|
|
369
|
+
await result
|
|
370
|
+
except Exception as exc:
|
|
371
|
+
log.warning("Callback failed: %s", exc)
|
|
372
|
+
|
|
373
|
+
async def _check_target_async(self) -> None:
|
|
374
|
+
"""Check target health and invoke callbacks if dead."""
|
|
375
|
+
if self._target is None or not self._target.is_dead:
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
# Send alert if callback provided
|
|
379
|
+
if self._on_alert is not None:
|
|
380
|
+
with contextlib.suppress(Exception):
|
|
381
|
+
alert_result = self._on_alert(
|
|
382
|
+
"heartbeat",
|
|
383
|
+
"Target is dead, triggering recovery",
|
|
384
|
+
{"target": str(type(self._target).__name__)},
|
|
385
|
+
)
|
|
386
|
+
if asyncio.iscoroutine(alert_result):
|
|
387
|
+
await alert_result
|
|
388
|
+
|
|
389
|
+
# Invoke on_target_dead callback
|
|
390
|
+
await self._invoke_callback_async(self._on_target_dead)
|
|
391
|
+
|
|
392
|
+
async def _async_loop(self) -> None:
|
|
393
|
+
"""Async loop that writes heartbeats and monitors target."""
|
|
394
|
+
log.debug("Heartbeat async loop started (interval=%.1fs)", self._interval)
|
|
395
|
+
while self._running and not self._shutdown_requested:
|
|
396
|
+
try:
|
|
397
|
+
# Run beat in executor to avoid blocking
|
|
398
|
+
loop = asyncio.get_running_loop()
|
|
399
|
+
await loop.run_in_executor(None, self.beat)
|
|
400
|
+
# Invoke on_beat callback after successful beat
|
|
401
|
+
if self._on_beat is not None:
|
|
402
|
+
log.debug("Invoking on_beat callback")
|
|
403
|
+
await self._invoke_callback_async(self._on_beat)
|
|
404
|
+
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
405
|
+
log.warning("Heartbeat beat failed: %s", exc)
|
|
406
|
+
if self._on_missed_beat is not None:
|
|
407
|
+
with contextlib.suppress(Exception):
|
|
408
|
+
self._on_missed_beat(exc)
|
|
409
|
+
|
|
410
|
+
await self._check_target_async()
|
|
411
|
+
await asyncio.sleep(self._interval)
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def read_state(state_file: str | Path) -> HeartbeatState | None:
|
|
415
|
+
"""Read and parse an existing heartbeat state file.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
state_file: Path to heartbeat file.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
HeartbeatState if file exists and is valid, None otherwise.
|
|
422
|
+
|
|
423
|
+
Examples:
|
|
424
|
+
>>> state = Heartbeat.read_state("/tmp/bot.heartbeat") # doctest: +SKIP
|
|
425
|
+
>>> if state: # doctest: +SKIP
|
|
426
|
+
... print(f"Last beat: {state.timestamp}")
|
|
427
|
+
"""
|
|
428
|
+
path = Path(state_file)
|
|
429
|
+
if not path.exists():
|
|
430
|
+
return None
|
|
431
|
+
try:
|
|
432
|
+
data = json.loads(path.read_text())
|
|
433
|
+
return HeartbeatState.from_dict(data)
|
|
434
|
+
except (json.JSONDecodeError, KeyError, OSError):
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
@staticmethod
|
|
438
|
+
def is_alive(state_file: str | Path, max_age_seconds: float = 30.0) -> bool:
|
|
439
|
+
"""Check if a process is alive based on its heartbeat.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
state_file: Path to heartbeat file.
|
|
443
|
+
max_age_seconds: Maximum age before considering process dead.
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
True if heartbeat exists and is recent enough.
|
|
447
|
+
|
|
448
|
+
Examples:
|
|
449
|
+
>>> Heartbeat.is_alive("/tmp/bot.heartbeat", max_age_seconds=30) # doctest: +SKIP
|
|
450
|
+
True
|
|
451
|
+
"""
|
|
452
|
+
state = Heartbeat.read_state(state_file)
|
|
453
|
+
if state is None:
|
|
454
|
+
return False
|
|
455
|
+
try:
|
|
456
|
+
beat_time = datetime.fromisoformat(state.timestamp)
|
|
457
|
+
age = (datetime.now(timezone.utc) - beat_time).total_seconds()
|
|
458
|
+
return age <= max_age_seconds
|
|
459
|
+
except (ValueError, TypeError):
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
def __enter__(self) -> Self:
|
|
463
|
+
"""Enter sync context manager."""
|
|
464
|
+
self.start()
|
|
465
|
+
return self
|
|
466
|
+
|
|
467
|
+
def __exit__(
|
|
468
|
+
self,
|
|
469
|
+
exc_type: type[BaseException] | None,
|
|
470
|
+
exc_val: BaseException | None,
|
|
471
|
+
exc_tb: types.TracebackType | None,
|
|
472
|
+
) -> None:
|
|
473
|
+
"""Exit sync context manager."""
|
|
474
|
+
self.stop()
|
|
475
|
+
|
|
476
|
+
async def __aenter__(self) -> Self:
|
|
477
|
+
"""Enter async context manager."""
|
|
478
|
+
await self.astart()
|
|
479
|
+
return self
|
|
480
|
+
|
|
481
|
+
async def __aexit__(
|
|
482
|
+
self,
|
|
483
|
+
exc_type: type[BaseException] | None,
|
|
484
|
+
exc_val: BaseException | None,
|
|
485
|
+
exc_tb: types.TracebackType | None,
|
|
486
|
+
) -> None:
|
|
487
|
+
"""Exit async context manager."""
|
|
488
|
+
await self.astop()
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
__all__ = ["Heartbeat", "HeartbeatState", "HeartbeatTarget", "OnAlertCallback"]
|