kstlib 0.0.1a0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. kstlib/__init__.py +266 -1
  2. kstlib/__main__.py +16 -0
  3. kstlib/alerts/__init__.py +110 -0
  4. kstlib/alerts/channels/__init__.py +36 -0
  5. kstlib/alerts/channels/base.py +197 -0
  6. kstlib/alerts/channels/email.py +227 -0
  7. kstlib/alerts/channels/slack.py +389 -0
  8. kstlib/alerts/exceptions.py +72 -0
  9. kstlib/alerts/manager.py +651 -0
  10. kstlib/alerts/models.py +142 -0
  11. kstlib/alerts/throttle.py +263 -0
  12. kstlib/auth/__init__.py +139 -0
  13. kstlib/auth/callback.py +399 -0
  14. kstlib/auth/config.py +502 -0
  15. kstlib/auth/errors.py +127 -0
  16. kstlib/auth/models.py +316 -0
  17. kstlib/auth/providers/__init__.py +14 -0
  18. kstlib/auth/providers/base.py +393 -0
  19. kstlib/auth/providers/oauth2.py +645 -0
  20. kstlib/auth/providers/oidc.py +821 -0
  21. kstlib/auth/session.py +338 -0
  22. kstlib/auth/token.py +482 -0
  23. kstlib/cache/__init__.py +50 -0
  24. kstlib/cache/decorator.py +261 -0
  25. kstlib/cache/strategies.py +516 -0
  26. kstlib/cli/__init__.py +8 -0
  27. kstlib/cli/app.py +195 -0
  28. kstlib/cli/commands/__init__.py +5 -0
  29. kstlib/cli/commands/auth/__init__.py +39 -0
  30. kstlib/cli/commands/auth/common.py +122 -0
  31. kstlib/cli/commands/auth/login.py +325 -0
  32. kstlib/cli/commands/auth/logout.py +74 -0
  33. kstlib/cli/commands/auth/providers.py +57 -0
  34. kstlib/cli/commands/auth/status.py +291 -0
  35. kstlib/cli/commands/auth/token.py +199 -0
  36. kstlib/cli/commands/auth/whoami.py +106 -0
  37. kstlib/cli/commands/config.py +89 -0
  38. kstlib/cli/commands/ops/__init__.py +39 -0
  39. kstlib/cli/commands/ops/attach.py +49 -0
  40. kstlib/cli/commands/ops/common.py +269 -0
  41. kstlib/cli/commands/ops/list_sessions.py +252 -0
  42. kstlib/cli/commands/ops/logs.py +49 -0
  43. kstlib/cli/commands/ops/start.py +98 -0
  44. kstlib/cli/commands/ops/status.py +138 -0
  45. kstlib/cli/commands/ops/stop.py +60 -0
  46. kstlib/cli/commands/rapi/__init__.py +60 -0
  47. kstlib/cli/commands/rapi/call.py +341 -0
  48. kstlib/cli/commands/rapi/list.py +99 -0
  49. kstlib/cli/commands/rapi/show.py +206 -0
  50. kstlib/cli/commands/secrets/__init__.py +35 -0
  51. kstlib/cli/commands/secrets/common.py +425 -0
  52. kstlib/cli/commands/secrets/decrypt.py +88 -0
  53. kstlib/cli/commands/secrets/doctor.py +743 -0
  54. kstlib/cli/commands/secrets/encrypt.py +242 -0
  55. kstlib/cli/commands/secrets/shred.py +96 -0
  56. kstlib/cli/common.py +86 -0
  57. kstlib/config/__init__.py +76 -0
  58. kstlib/config/exceptions.py +110 -0
  59. kstlib/config/export.py +225 -0
  60. kstlib/config/loader.py +963 -0
  61. kstlib/config/sops.py +287 -0
  62. kstlib/db/__init__.py +54 -0
  63. kstlib/db/aiosqlcipher.py +137 -0
  64. kstlib/db/cipher.py +112 -0
  65. kstlib/db/database.py +367 -0
  66. kstlib/db/exceptions.py +25 -0
  67. kstlib/db/pool.py +302 -0
  68. kstlib/helpers/__init__.py +35 -0
  69. kstlib/helpers/exceptions.py +11 -0
  70. kstlib/helpers/time_trigger.py +396 -0
  71. kstlib/kstlib.conf.yml +890 -0
  72. kstlib/limits.py +963 -0
  73. kstlib/logging/__init__.py +108 -0
  74. kstlib/logging/manager.py +633 -0
  75. kstlib/mail/__init__.py +42 -0
  76. kstlib/mail/builder.py +626 -0
  77. kstlib/mail/exceptions.py +27 -0
  78. kstlib/mail/filesystem.py +248 -0
  79. kstlib/mail/transport.py +224 -0
  80. kstlib/mail/transports/__init__.py +19 -0
  81. kstlib/mail/transports/gmail.py +268 -0
  82. kstlib/mail/transports/resend.py +324 -0
  83. kstlib/mail/transports/smtp.py +326 -0
  84. kstlib/meta.py +72 -0
  85. kstlib/metrics/__init__.py +88 -0
  86. kstlib/metrics/decorators.py +1090 -0
  87. kstlib/metrics/exceptions.py +14 -0
  88. kstlib/monitoring/__init__.py +116 -0
  89. kstlib/monitoring/_styles.py +163 -0
  90. kstlib/monitoring/cell.py +57 -0
  91. kstlib/monitoring/config.py +424 -0
  92. kstlib/monitoring/delivery.py +579 -0
  93. kstlib/monitoring/exceptions.py +63 -0
  94. kstlib/monitoring/image.py +220 -0
  95. kstlib/monitoring/kv.py +79 -0
  96. kstlib/monitoring/list.py +69 -0
  97. kstlib/monitoring/metric.py +88 -0
  98. kstlib/monitoring/monitoring.py +341 -0
  99. kstlib/monitoring/renderer.py +139 -0
  100. kstlib/monitoring/service.py +392 -0
  101. kstlib/monitoring/table.py +129 -0
  102. kstlib/monitoring/types.py +56 -0
  103. kstlib/ops/__init__.py +86 -0
  104. kstlib/ops/base.py +148 -0
  105. kstlib/ops/container.py +577 -0
  106. kstlib/ops/exceptions.py +209 -0
  107. kstlib/ops/manager.py +407 -0
  108. kstlib/ops/models.py +176 -0
  109. kstlib/ops/tmux.py +372 -0
  110. kstlib/ops/validators.py +287 -0
  111. kstlib/py.typed +0 -0
  112. kstlib/rapi/__init__.py +118 -0
  113. kstlib/rapi/client.py +875 -0
  114. kstlib/rapi/config.py +861 -0
  115. kstlib/rapi/credentials.py +887 -0
  116. kstlib/rapi/exceptions.py +213 -0
  117. kstlib/resilience/__init__.py +101 -0
  118. kstlib/resilience/circuit_breaker.py +440 -0
  119. kstlib/resilience/exceptions.py +95 -0
  120. kstlib/resilience/heartbeat.py +491 -0
  121. kstlib/resilience/rate_limiter.py +506 -0
  122. kstlib/resilience/shutdown.py +417 -0
  123. kstlib/resilience/watchdog.py +637 -0
  124. kstlib/secrets/__init__.py +29 -0
  125. kstlib/secrets/exceptions.py +19 -0
  126. kstlib/secrets/models.py +62 -0
  127. kstlib/secrets/providers/__init__.py +79 -0
  128. kstlib/secrets/providers/base.py +58 -0
  129. kstlib/secrets/providers/environment.py +66 -0
  130. kstlib/secrets/providers/keyring.py +107 -0
  131. kstlib/secrets/providers/kms.py +223 -0
  132. kstlib/secrets/providers/kwargs.py +101 -0
  133. kstlib/secrets/providers/sops.py +209 -0
  134. kstlib/secrets/resolver.py +221 -0
  135. kstlib/secrets/sensitive.py +130 -0
  136. kstlib/secure/__init__.py +23 -0
  137. kstlib/secure/fs.py +194 -0
  138. kstlib/secure/permissions.py +70 -0
  139. kstlib/ssl.py +347 -0
  140. kstlib/ui/__init__.py +23 -0
  141. kstlib/ui/exceptions.py +26 -0
  142. kstlib/ui/panels.py +484 -0
  143. kstlib/ui/spinner.py +864 -0
  144. kstlib/ui/tables.py +382 -0
  145. kstlib/utils/__init__.py +48 -0
  146. kstlib/utils/dict.py +36 -0
  147. kstlib/utils/formatting.py +338 -0
  148. kstlib/utils/http_trace.py +237 -0
  149. kstlib/utils/lazy.py +49 -0
  150. kstlib/utils/secure_delete.py +205 -0
  151. kstlib/utils/serialization.py +247 -0
  152. kstlib/utils/text.py +56 -0
  153. kstlib/utils/validators.py +124 -0
  154. kstlib/websocket/__init__.py +97 -0
  155. kstlib/websocket/exceptions.py +214 -0
  156. kstlib/websocket/manager.py +1102 -0
  157. kstlib/websocket/models.py +361 -0
  158. kstlib-1.0.1.dist-info/METADATA +201 -0
  159. kstlib-1.0.1.dist-info/RECORD +163 -0
  160. {kstlib-0.0.1a0.dist-info → kstlib-1.0.1.dist-info}/WHEEL +1 -1
  161. kstlib-1.0.1.dist-info/entry_points.txt +2 -0
  162. kstlib-1.0.1.dist-info/licenses/LICENSE.md +9 -0
  163. kstlib-0.0.1a0.dist-info/METADATA +0 -29
  164. kstlib-0.0.1a0.dist-info/RECORD +0 -6
  165. kstlib-0.0.1a0.dist-info/licenses/LICENSE.md +0 -5
  166. {kstlib-0.0.1a0.dist-info → kstlib-1.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,637 @@
1
+ """Watchdog for detecting thread/process freezes and hangs.
2
+
3
+ Provides configurable timeout monitoring for long-running operations,
4
+ with automatic callback invocation when activity stops.
5
+
6
+ Examples:
7
+ Basic usage with callback:
8
+
9
+ >>> def on_freeze(): # doctest: +SKIP
10
+ ... print("Thread frozen!")
11
+ >>> watchdog = Watchdog(timeout=30, on_timeout=on_freeze) # doctest: +SKIP
12
+ >>> watchdog.start() # doctest: +SKIP
13
+ >>> while running: # doctest: +SKIP
14
+ ... watchdog.ping() # Reset timer
15
+ ... do_work()
16
+ >>> watchdog.stop() # doctest: +SKIP
17
+
18
+ As context manager:
19
+
20
+ >>> with Watchdog(timeout=30) as wd: # doctest: +SKIP
21
+ ... for item in items:
22
+ ... wd.ping()
23
+ ... process(item)
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import asyncio
29
+ import contextlib
30
+ import inspect
31
+ import json
32
+ import logging
33
+ import threading
34
+ import time
35
+ from collections.abc import Awaitable, Callable, Mapping
36
+ from dataclasses import dataclass
37
+ from datetime import datetime, timezone
38
+ from pathlib import Path
39
+ from typing import Any
40
+
41
+ from typing_extensions import Self
42
+
43
+ from kstlib.limits import (
44
+ DEFAULT_WATCHDOG_TIMEOUT,
45
+ HARD_MAX_WATCHDOG_TIMEOUT,
46
+ HARD_MIN_WATCHDOG_TIMEOUT,
47
+ clamp_with_limits,
48
+ get_resilience_limits,
49
+ )
50
+ from kstlib.resilience.exceptions import WatchdogTimeoutError
51
+
52
+ log = logging.getLogger(__name__)
53
+
54
+ # Type alias for alert callback
55
+ OnAlertCallback = Callable[[str, str, Mapping[str, Any]], Awaitable[None] | None]
56
+
57
+
58
+ @dataclass
59
+ class WatchdogStats:
60
+ """Statistics for watchdog monitoring.
61
+
62
+ Attributes:
63
+ pings_total: Total number of ping calls.
64
+ timeouts_triggered: Number of timeout events detected.
65
+ last_ping_time: Timestamp of last activity (monotonic).
66
+ start_time: Timestamp when watchdog started (monotonic).
67
+
68
+ Examples:
69
+ >>> stats = WatchdogStats()
70
+ >>> stats.record_ping()
71
+ >>> stats.pings_total
72
+ 1
73
+ """
74
+
75
+ pings_total: int = 0
76
+ timeouts_triggered: int = 0
77
+ last_ping_time: float | None = None
78
+ start_time: float | None = None
79
+
80
+ def record_ping(self) -> None:
81
+ """Record a ping event."""
82
+ self.pings_total += 1
83
+ self.last_ping_time = time.monotonic()
84
+
85
+ def record_timeout(self) -> None:
86
+ """Record a timeout event."""
87
+ self.timeouts_triggered += 1
88
+
89
+ def record_start(self) -> None:
90
+ """Record watchdog start."""
91
+ self.start_time = time.monotonic()
92
+ self.last_ping_time = time.monotonic()
93
+
94
+ @property
95
+ def uptime(self) -> float:
96
+ """Return seconds since watchdog started."""
97
+ if self.start_time is None:
98
+ return 0.0
99
+ return time.monotonic() - self.start_time
100
+
101
+
102
+ class Watchdog:
103
+ """Monitor thread/process health and detect freezes or hangs.
104
+
105
+ Implements a watchdog timer that must be periodically "pinged" to
106
+ prevent timeout. If no ping is received within the timeout period,
107
+ the on_timeout callback is invoked.
108
+
109
+ Args:
110
+ timeout: Seconds of inactivity before triggering timeout.
111
+ If None, uses config default (30s).
112
+ on_timeout: Callback invoked when timeout is detected.
113
+ Can be sync or async function.
114
+ name: Optional identifier for logging and monitoring.
115
+
116
+ Examples:
117
+ Basic usage:
118
+
119
+ >>> watchdog = Watchdog(timeout=30)
120
+ >>> watchdog.timeout
121
+ 30
122
+
123
+ With callback:
124
+
125
+ >>> def alert():
126
+ ... print("Watchdog triggered!")
127
+ >>> wd = Watchdog(timeout=10, on_timeout=alert, name="worker")
128
+ >>> wd.name
129
+ 'worker'
130
+
131
+ As context manager:
132
+
133
+ >>> with Watchdog(timeout=30) as wd: # doctest: +SKIP
134
+ ... wd.ping()
135
+ ... do_work()
136
+ """
137
+
138
+ def __init__(
139
+ self,
140
+ *,
141
+ timeout: float | None = None,
142
+ on_timeout: Callable[[], None] | Callable[[], Awaitable[None]] | None = None,
143
+ on_alert: OnAlertCallback | None = None,
144
+ name: str | None = None,
145
+ ) -> None:
146
+ """Initialize watchdog.
147
+
148
+ Args:
149
+ timeout: Seconds before timeout triggers. Clamped to [1, 3600].
150
+ on_timeout: Callback for timeout events (sync or async).
151
+ on_alert: Callback for alerting (channel, message, context).
152
+ name: Optional identifier.
153
+ """
154
+ # Load config defaults if needed
155
+ if timeout is None:
156
+ try:
157
+ limits = get_resilience_limits()
158
+ timeout = limits.watchdog_timeout
159
+ except Exception:
160
+ timeout = DEFAULT_WATCHDOG_TIMEOUT
161
+
162
+ self._timeout = clamp_with_limits(timeout, HARD_MIN_WATCHDOG_TIMEOUT, HARD_MAX_WATCHDOG_TIMEOUT)
163
+ self._on_timeout = on_timeout
164
+ self._on_alert = on_alert
165
+ self._name = name
166
+ self._stats = WatchdogStats()
167
+
168
+ # State
169
+ self._last_ping = time.monotonic()
170
+ self._running = False
171
+ self._triggered = False
172
+ self._shutdown_requested = False
173
+ self._lock = threading.Lock()
174
+ self._stop_event = threading.Event()
175
+ self._thread: threading.Thread | None = None
176
+ self._async_task: asyncio.Task[None] | None = None
177
+ self._callback_task: asyncio.Task[None] | None = None
178
+
179
+ # State file monitoring (when using from_state_file)
180
+ self._state_file: Path | None = None
181
+ self._max_age: float = 30.0
182
+
183
+ @property
184
+ def timeout(self) -> float:
185
+ """Timeout duration in seconds."""
186
+ return self._timeout
187
+
188
+ @property
189
+ def name(self) -> str | None:
190
+ """Watchdog identifier."""
191
+ return self._name
192
+
193
+ @property
194
+ def stats(self) -> WatchdogStats:
195
+ """Statistics for this watchdog."""
196
+ return self._stats
197
+
198
+ @property
199
+ def is_running(self) -> bool:
200
+ """Return True if watchdog is actively monitoring."""
201
+ return self._running
202
+
203
+ @property
204
+ def is_triggered(self) -> bool:
205
+ """Return True if timeout has been triggered."""
206
+ return self._triggered
207
+
208
+ @property
209
+ def seconds_since_ping(self) -> float:
210
+ """Return seconds since last ping."""
211
+ with self._lock:
212
+ return time.monotonic() - self._last_ping
213
+
214
+ @property
215
+ def is_shutdown(self) -> bool:
216
+ """Check if shutdown has been requested."""
217
+ return self._shutdown_requested
218
+
219
+ @property
220
+ def state_file(self) -> Path | None:
221
+ """Return the state file path if monitoring a heartbeat file."""
222
+ return self._state_file
223
+
224
+ @classmethod
225
+ def from_state_file(
226
+ cls,
227
+ state_file: str | Path,
228
+ *,
229
+ check_interval: float | None = None,
230
+ max_age: float = 30.0,
231
+ on_timeout: Callable[[], None] | Callable[[], Awaitable[None]] | None = None,
232
+ on_alert: OnAlertCallback | None = None,
233
+ name: str | None = None,
234
+ ) -> Self:
235
+ """Create a watchdog that monitors a heartbeat state file.
236
+
237
+ Instead of requiring periodic ping() calls, this watchdog checks
238
+ if a heartbeat state file is being updated regularly.
239
+
240
+ Args:
241
+ state_file: Path to the heartbeat JSON state file.
242
+ check_interval: Seconds between file checks (defaults to max_age/2).
243
+ max_age: Maximum age in seconds before triggering timeout (default: 30s).
244
+ on_timeout: Callback for timeout events.
245
+ on_alert: Callback for alerting (channel, message, context).
246
+ name: Optional identifier.
247
+
248
+ Returns:
249
+ Configured Watchdog instance.
250
+
251
+ Examples:
252
+ >>> wd = Watchdog.from_state_file( # doctest: +SKIP
253
+ ... "/tmp/bot.heartbeat",
254
+ ... max_age=30.0, # Trigger if no heartbeat for 30 seconds
255
+ ... on_timeout=restart_bot,
256
+ ... )
257
+ >>> await wd.astart() # doctest: +SKIP
258
+ """
259
+ interval = check_interval if check_interval is not None else max_age / 2
260
+ instance = cls(
261
+ timeout=interval,
262
+ on_timeout=on_timeout,
263
+ on_alert=on_alert,
264
+ name=name or f"state_file_watcher:{state_file}",
265
+ )
266
+ instance._state_file = Path(state_file)
267
+ instance._max_age = max_age
268
+ return instance
269
+
270
+ def shutdown(self) -> None:
271
+ """Signal shutdown and stop gracefully."""
272
+ log.info("Watchdog shutdown requested")
273
+ self._shutdown_requested = True
274
+ self.stop()
275
+
276
+ async def ashutdown(self) -> None:
277
+ """Signal shutdown and stop gracefully (async version)."""
278
+ log.info("Watchdog shutdown requested")
279
+ self._shutdown_requested = True
280
+ await self.astop()
281
+
282
+ def ping(self) -> None:
283
+ """Reset the watchdog timer.
284
+
285
+ Call this periodically to indicate the monitored code is still alive.
286
+ Must be called more frequently than the timeout interval.
287
+
288
+ Examples:
289
+ >>> watchdog = Watchdog(timeout=30)
290
+ >>> watchdog.ping() # Reset timer
291
+ """
292
+ with self._lock:
293
+ self._last_ping = time.monotonic()
294
+ self._stats.record_ping()
295
+
296
+ async def aping(self) -> None:
297
+ """Async version of ping().
298
+
299
+ Examples:
300
+ >>> import asyncio
301
+ >>> async def example():
302
+ ... watchdog = Watchdog(timeout=30)
303
+ ... await watchdog.aping()
304
+ >>> asyncio.run(example())
305
+ """
306
+ self.ping()
307
+
308
+ def start(self) -> None:
309
+ """Start watchdog monitoring in a background thread.
310
+
311
+ Raises:
312
+ RuntimeError: If watchdog is already running.
313
+
314
+ Examples:
315
+ >>> watchdog = Watchdog(timeout=30)
316
+ >>> watchdog.start()
317
+ >>> watchdog.is_running
318
+ True
319
+ >>> watchdog.stop()
320
+ """
321
+ with self._lock:
322
+ if self._running:
323
+ raise RuntimeError("Watchdog is already running")
324
+
325
+ self._running = True
326
+ self._triggered = False
327
+ self._stop_event.clear()
328
+ self._last_ping = time.monotonic()
329
+ self._stats.record_start()
330
+
331
+ self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
332
+ self._thread.start()
333
+
334
+ def stop(self) -> None:
335
+ """Stop watchdog monitoring.
336
+
337
+ Safe to call multiple times or when not running.
338
+
339
+ Examples:
340
+ >>> watchdog = Watchdog(timeout=30)
341
+ >>> watchdog.start()
342
+ >>> watchdog.stop()
343
+ >>> watchdog.is_running
344
+ False
345
+ """
346
+ with self._lock:
347
+ if not self._running:
348
+ return
349
+ self._running = False
350
+
351
+ self._stop_event.set()
352
+
353
+ if self._thread is not None:
354
+ self._thread.join(timeout=1.0)
355
+ self._thread = None
356
+
357
+ async def astart(self) -> None:
358
+ """Start watchdog monitoring asynchronously.
359
+
360
+ Raises:
361
+ RuntimeError: If watchdog is already running.
362
+ """
363
+ with self._lock:
364
+ if self._running:
365
+ raise RuntimeError("Watchdog is already running")
366
+
367
+ self._running = True
368
+ self._triggered = False
369
+ self._stop_event.clear()
370
+ self._last_ping = time.monotonic()
371
+ self._stats.record_start()
372
+
373
+ self._async_task = asyncio.create_task(self._async_monitor_loop())
374
+
375
+ async def astop(self) -> None:
376
+ """Stop watchdog monitoring asynchronously.
377
+
378
+ Safe to call multiple times or when not running.
379
+ """
380
+ with self._lock:
381
+ if not self._running:
382
+ return
383
+ self._running = False
384
+
385
+ self._stop_event.set()
386
+
387
+ if self._async_task is not None:
388
+ self._async_task.cancel()
389
+ with contextlib.suppress(asyncio.CancelledError):
390
+ await self._async_task
391
+ self._async_task = None
392
+
393
+ def reset(self) -> None:
394
+ """Reset watchdog state without stopping.
395
+
396
+ Clears triggered flag and resets timer.
397
+ """
398
+ with self._lock:
399
+ self._last_ping = time.monotonic()
400
+ self._triggered = False
401
+
402
+ def _monitor_loop(self) -> None:
403
+ """Background thread monitoring loop."""
404
+ check_interval = min(1.0, self._timeout / 4)
405
+
406
+ while not self._stop_event.wait(timeout=check_interval):
407
+ if self._shutdown_requested:
408
+ break
409
+ self._check_timeout()
410
+
411
+ async def _async_monitor_loop(self) -> None:
412
+ """Async monitoring loop."""
413
+ check_interval = min(1.0, self._timeout / 4)
414
+
415
+ while self._running and not self._shutdown_requested:
416
+ await asyncio.sleep(check_interval)
417
+ await self._async_check_timeout()
418
+
419
+ def _check_timeout(self) -> None:
420
+ """Check for timeout and invoke callback if needed."""
421
+ # If monitoring a state file, check that instead of ping time
422
+ if self._state_file is not None:
423
+ self._check_state_file_sync()
424
+ return
425
+
426
+ with self._lock:
427
+ if self._triggered:
428
+ return
429
+
430
+ elapsed = time.monotonic() - self._last_ping
431
+ if elapsed < self._timeout:
432
+ return
433
+
434
+ self._triggered = True
435
+ self._stats.record_timeout()
436
+
437
+ # Invoke callback outside lock - suppress errors to prevent watchdog crash
438
+ if self._on_timeout is not None:
439
+ with contextlib.suppress(Exception):
440
+ result = self._on_timeout()
441
+ # Handle async callback in sync context
442
+ if inspect.iscoroutine(result):
443
+ # Run async callback in new event loop
444
+ try:
445
+ loop = asyncio.get_running_loop()
446
+ self._callback_task = loop.create_task(result)
447
+ except RuntimeError:
448
+ asyncio.run(result)
449
+
450
+ def _check_state_file_sync(self) -> None:
451
+ """Check heartbeat state file (sync version)."""
452
+ if self._state_file is None:
453
+ return
454
+
455
+ is_alive = self._is_state_file_alive()
456
+
457
+ with self._lock:
458
+ if is_alive:
459
+ # Reset triggered state if heartbeat is back
460
+ self._triggered = False
461
+ return
462
+
463
+ if self._triggered:
464
+ return
465
+
466
+ self._triggered = True
467
+ self._stats.record_timeout()
468
+
469
+ # Invoke callbacks outside lock
470
+ if self._on_timeout is not None:
471
+ with contextlib.suppress(Exception):
472
+ result = self._on_timeout()
473
+ if inspect.iscoroutine(result):
474
+ result.close() # Cannot await in sync context
475
+
476
+ def _is_state_file_alive(self) -> bool:
477
+ """Check if heartbeat state file is recent enough."""
478
+ if self._state_file is None or not self._state_file.exists():
479
+ return False
480
+ try:
481
+ data = json.loads(self._state_file.read_text())
482
+ timestamp = data.get("timestamp")
483
+ if not timestamp:
484
+ return False
485
+ beat_time = datetime.fromisoformat(timestamp)
486
+ age = (datetime.now(timezone.utc) - beat_time).total_seconds()
487
+ return age <= self._max_age
488
+ except (json.JSONDecodeError, KeyError, OSError, ValueError, TypeError):
489
+ return False
490
+
491
+ async def _async_check_timeout(self) -> None:
492
+ """Async version of timeout check."""
493
+ # If monitoring a state file, check that instead of ping time
494
+ if self._state_file is not None:
495
+ await self._check_state_file_async()
496
+ return
497
+
498
+ with self._lock:
499
+ if self._triggered:
500
+ return
501
+
502
+ elapsed = time.monotonic() - self._last_ping
503
+ if elapsed < self._timeout:
504
+ return
505
+
506
+ self._triggered = True
507
+ self._stats.record_timeout()
508
+
509
+ # Invoke callback outside lock - suppress errors to prevent watchdog crash
510
+ if self._on_timeout is not None:
511
+ with contextlib.suppress(Exception):
512
+ result = self._on_timeout()
513
+ if inspect.iscoroutine(result):
514
+ await result
515
+
516
+ async def _check_state_file_async(self) -> None:
517
+ """Check heartbeat state file (async version)."""
518
+ if self._state_file is None:
519
+ return
520
+
521
+ # Run file check in executor to avoid blocking
522
+ loop = asyncio.get_running_loop()
523
+ is_alive = await loop.run_in_executor(None, self._is_state_file_alive)
524
+
525
+ with self._lock:
526
+ if is_alive:
527
+ # Reset triggered state if heartbeat is back
528
+ self._triggered = False
529
+ return
530
+
531
+ if self._triggered:
532
+ return
533
+
534
+ self._triggered = True
535
+ self._stats.record_timeout()
536
+
537
+ # Send alert if callback provided
538
+ if self._on_alert is not None:
539
+ with contextlib.suppress(Exception):
540
+ alert_result = self._on_alert(
541
+ "watchdog",
542
+ f"Heartbeat state file is stale: {self._state_file}",
543
+ {"state_file": str(self._state_file), "max_age": self._max_age},
544
+ )
545
+ if asyncio.iscoroutine(alert_result):
546
+ await alert_result
547
+
548
+ # Invoke timeout callback outside lock
549
+ if self._on_timeout is not None:
550
+ with contextlib.suppress(Exception):
551
+ result = self._on_timeout()
552
+ if inspect.iscoroutine(result):
553
+ await result
554
+
555
+ def __enter__(self) -> Self:
556
+ """Enter context manager, starting watchdog."""
557
+ self.start()
558
+ return self
559
+
560
+ def __exit__(
561
+ self,
562
+ exc_type: type[BaseException] | None,
563
+ exc_val: BaseException | None,
564
+ exc_tb: object,
565
+ ) -> None:
566
+ """Exit context manager, stopping watchdog."""
567
+ self.stop()
568
+
569
+ async def __aenter__(self) -> Self:
570
+ """Enter async context manager, starting watchdog."""
571
+ await self.astart()
572
+ return self
573
+
574
+ async def __aexit__(
575
+ self,
576
+ exc_type: type[BaseException] | None,
577
+ exc_val: BaseException | None,
578
+ exc_tb: object,
579
+ ) -> None:
580
+ """Exit async context manager, stopping watchdog."""
581
+ await self.astop()
582
+
583
+ def __repr__(self) -> str:
584
+ """Return string representation."""
585
+ name_part = f", name={self._name!r}" if self._name else ""
586
+ status = "running" if self._running else "stopped"
587
+ return f"Watchdog(timeout={self._timeout}, status={status}{name_part})"
588
+
589
+
590
+ def watchdog_context(
591
+ timeout: float | None = None,
592
+ on_timeout: Callable[[], None] | Callable[[], Awaitable[None]] | None = None,
593
+ *,
594
+ raise_on_timeout: bool = False,
595
+ name: str | None = None,
596
+ ) -> Watchdog:
597
+ """Create a watchdog context for monitoring code blocks.
598
+
599
+ This is a convenience function that creates a Watchdog instance.
600
+ Use with 'with' statement for automatic start/stop.
601
+
602
+ Args:
603
+ timeout: Seconds before timeout triggers.
604
+ on_timeout: Optional callback for timeout events.
605
+ raise_on_timeout: If True, raise WatchdogTimeoutError on timeout.
606
+ name: Optional identifier.
607
+
608
+ Returns:
609
+ Watchdog instance for use as context manager.
610
+
611
+ Examples:
612
+ >>> with watchdog_context(timeout=30) as wd: # doctest: +SKIP
613
+ ... for item in items:
614
+ ... wd.ping()
615
+ ... process(item)
616
+ """
617
+ callback = on_timeout
618
+
619
+ if raise_on_timeout and on_timeout is None:
620
+
621
+ def raise_timeout() -> None:
622
+ raise WatchdogTimeoutError(
623
+ f"Watchdog timeout after {timeout}s",
624
+ seconds_inactive=timeout or DEFAULT_WATCHDOG_TIMEOUT,
625
+ )
626
+
627
+ callback = raise_timeout
628
+
629
+ return Watchdog(timeout=timeout, on_timeout=callback, name=name)
630
+
631
+
632
+ __all__ = [
633
+ "OnAlertCallback",
634
+ "Watchdog",
635
+ "WatchdogStats",
636
+ "watchdog_context",
637
+ ]
@@ -0,0 +1,29 @@
1
+ """Secrets subsystem public exports.
2
+
3
+ The secrets package exposes factories and models that orchestrate credential
4
+ resolution across multiple providers such as kwargs, configuration files,
5
+ keyring backends, and SOPS encrypted payloads.
6
+ """
7
+
8
+ from kstlib.secrets.exceptions import (
9
+ SecretDecryptionError,
10
+ SecretError,
11
+ SecretNotFoundError,
12
+ )
13
+ from kstlib.secrets.models import SecretRecord, SecretRequest, SecretSource
14
+ from kstlib.secrets.resolver import SecretResolver, get_secret_resolver, resolve_secret
15
+ from kstlib.secrets.sensitive import CachePurgeProtocol, sensitive
16
+
17
+ __all__ = [
18
+ "CachePurgeProtocol",
19
+ "SecretDecryptionError",
20
+ "SecretError",
21
+ "SecretNotFoundError",
22
+ "SecretRecord",
23
+ "SecretRequest",
24
+ "SecretResolver",
25
+ "SecretSource",
26
+ "get_secret_resolver",
27
+ "resolve_secret",
28
+ "sensitive",
29
+ ]
@@ -0,0 +1,19 @@
1
+ """Custom exceptions raised by the secrets subsystem."""
2
+
3
+ __all__ = [
4
+ "SecretDecryptionError",
5
+ "SecretError",
6
+ "SecretNotFoundError",
7
+ ]
8
+
9
+
10
+ class SecretError(RuntimeError):
11
+ """Base class for all secrets related errors."""
12
+
13
+
14
+ class SecretNotFoundError(SecretError):
15
+ """Raised when no provider can supply a requested secret."""
16
+
17
+
18
+ class SecretDecryptionError(SecretError):
19
+ """Raised when a secret payload cannot be decrypted."""