procler 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. procler/__init__.py +3 -0
  2. procler/__main__.py +6 -0
  3. procler/api/__init__.py +5 -0
  4. procler/api/app.py +261 -0
  5. procler/api/deps.py +21 -0
  6. procler/api/routes/__init__.py +5 -0
  7. procler/api/routes/config.py +290 -0
  8. procler/api/routes/groups.py +62 -0
  9. procler/api/routes/logs.py +43 -0
  10. procler/api/routes/processes.py +185 -0
  11. procler/api/routes/recipes.py +69 -0
  12. procler/api/routes/snippets.py +134 -0
  13. procler/api/routes/ws.py +459 -0
  14. procler/cli.py +1478 -0
  15. procler/config/__init__.py +65 -0
  16. procler/config/changelog.py +148 -0
  17. procler/config/loader.py +256 -0
  18. procler/config/schema.py +315 -0
  19. procler/core/__init__.py +54 -0
  20. procler/core/context_base.py +117 -0
  21. procler/core/context_docker.py +384 -0
  22. procler/core/context_local.py +287 -0
  23. procler/core/daemon_detector.py +325 -0
  24. procler/core/events.py +74 -0
  25. procler/core/groups.py +419 -0
  26. procler/core/health.py +280 -0
  27. procler/core/log_tailer.py +262 -0
  28. procler/core/process_manager.py +1277 -0
  29. procler/core/recipes.py +330 -0
  30. procler/core/snippets.py +231 -0
  31. procler/core/variable_substitution.py +65 -0
  32. procler/db.py +96 -0
  33. procler/logging.py +41 -0
  34. procler/models.py +130 -0
  35. procler/py.typed +0 -0
  36. procler/settings.py +29 -0
  37. procler/static/assets/AboutView-BwZnsfpW.js +4 -0
  38. procler/static/assets/AboutView-UHbxWXcS.css +1 -0
  39. procler/static/assets/Code-HTS-H1S6.js +74 -0
  40. procler/static/assets/ConfigView-CGJcmp9G.css +1 -0
  41. procler/static/assets/ConfigView-aVtbRDf8.js +1 -0
  42. procler/static/assets/DashboardView-C5jw9Nsd.css +1 -0
  43. procler/static/assets/DashboardView-Dab7Cu9v.js +1 -0
  44. procler/static/assets/DataTable-z39TOAa4.js +746 -0
  45. procler/static/assets/DescriptionsItem-B2E8YbqJ.js +74 -0
  46. procler/static/assets/Divider-Dk-6aD2Y.js +42 -0
  47. procler/static/assets/Empty-MuygEHZM.js +24 -0
  48. procler/static/assets/Grid-CZ9QVKAT.js +1 -0
  49. procler/static/assets/GroupsView-BALG7i1X.js +1 -0
  50. procler/static/assets/GroupsView-gXAI1CVC.css +1 -0
  51. procler/static/assets/Input-e0xaxoWE.js +259 -0
  52. procler/static/assets/PhArrowsClockwise.vue-DqDg31az.js +1 -0
  53. procler/static/assets/PhCheckCircle.vue-Fwj9sh9m.js +1 -0
  54. procler/static/assets/PhEye.vue-JcPHciC2.js +1 -0
  55. procler/static/assets/PhPlay.vue-CZm7Gy3u.js +1 -0
  56. procler/static/assets/PhPlus.vue-yTWqKlSh.js +1 -0
  57. procler/static/assets/PhStop.vue-DxsqwIki.js +1 -0
  58. procler/static/assets/PhTrash.vue-DcqQbN1_.js +125 -0
  59. procler/static/assets/PhXCircle.vue-BXWmrabV.js +1 -0
  60. procler/static/assets/ProcessDetailView-DDbtIWq9.css +1 -0
  61. procler/static/assets/ProcessDetailView-DPtdNV-q.js +1 -0
  62. procler/static/assets/ProcessesView-B3a6Umur.js +1 -0
  63. procler/static/assets/ProcessesView-goLmghbJ.css +1 -0
  64. procler/static/assets/RecipesView-D2VxdneD.js +166 -0
  65. procler/static/assets/RecipesView-DXnFDCK4.css +1 -0
  66. procler/static/assets/Select-BBR17AHq.js +317 -0
  67. procler/static/assets/SnippetsView-B3a9q3AI.css +1 -0
  68. procler/static/assets/SnippetsView-DBCB2yGq.js +1 -0
  69. procler/static/assets/Spin-BXTjvFUk.js +90 -0
  70. procler/static/assets/Tag-Bh_qV63A.js +71 -0
  71. procler/static/assets/changelog-KkTT4H9-.js +1 -0
  72. procler/static/assets/groups-Zu-_v8ey.js +1 -0
  73. procler/static/assets/index-BsN-YMXq.css +1 -0
  74. procler/static/assets/index-BzW1XhyH.js +1282 -0
  75. procler/static/assets/procler-DOrSB1Vj.js +1 -0
  76. procler/static/assets/recipes-1w5SseGb.js +1 -0
  77. procler/static/index.html +17 -0
  78. procler/static/procler.png +0 -0
  79. procler-0.2.0.dist-info/METADATA +545 -0
  80. procler-0.2.0.dist-info/RECORD +83 -0
  81. procler-0.2.0.dist-info/WHEEL +4 -0
  82. procler-0.2.0.dist-info/entry_points.txt +2 -0
  83. procler-0.2.0.dist-info/licenses/LICENSE +21 -0
procler/core/groups.py ADDED
@@ -0,0 +1,419 @@
1
+ """Process group management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from typing import Any
7
+
8
+ from ..config import DependencyCondition, get_config
9
+ from .health import get_health_checker
10
+ from .process_manager import get_process_manager
11
+
12
+
13
+ class GroupManager:
14
+ """Manages process groups with ordered operations and dependency support."""
15
+
16
+ def __init__(self):
17
+ self._process_manager = get_process_manager()
18
+ self._health_checker = get_health_checker()
19
+
20
+ def list_groups(self) -> dict[str, Any]:
21
+ """List all defined groups."""
22
+ config = get_config()
23
+
24
+ groups_data = []
25
+ for name, group in config.groups.items():
26
+ groups_data.append(
27
+ {
28
+ "name": name,
29
+ "description": group.description,
30
+ "processes": group.processes,
31
+ "stop_order": group.get_stop_order(),
32
+ }
33
+ )
34
+
35
+ return {
36
+ "success": True,
37
+ "data": {
38
+ "groups": groups_data,
39
+ "count": len(groups_data),
40
+ },
41
+ }
42
+
43
+ def get_group(self, name: str) -> dict[str, Any]:
44
+ """Get a specific group by name."""
45
+ config = get_config()
46
+
47
+ if name not in config.groups:
48
+ return {
49
+ "success": False,
50
+ "error": f"Group '{name}' not found",
51
+ "error_code": "group_not_found",
52
+ "suggestion": "Run 'procler group list' to see available groups",
53
+ }
54
+
55
+ group = config.groups[name]
56
+ return {
57
+ "success": True,
58
+ "data": {
59
+ "group": {
60
+ "name": name,
61
+ "description": group.description,
62
+ "processes": group.processes,
63
+ "stop_order": group.get_stop_order(),
64
+ }
65
+ },
66
+ }
67
+
68
+ async def start_group(
69
+ self,
70
+ name: str,
71
+ respect_dependencies: bool = True,
72
+ dependency_timeout: float = 60.0,
73
+ ) -> dict[str, Any]:
74
+ """
75
+ Start all processes in a group in order.
76
+
77
+ Args:
78
+ name: Group name
79
+ respect_dependencies: If True, wait for process dependencies before starting
80
+ dependency_timeout: Max seconds to wait for each dependency
81
+ """
82
+ config = get_config()
83
+
84
+ if name not in config.groups:
85
+ return {
86
+ "success": False,
87
+ "error": f"Group '{name}' not found",
88
+ "error_code": "group_not_found",
89
+ "suggestion": "Run 'procler group list' to see available groups",
90
+ }
91
+
92
+ group = config.groups[name]
93
+ results = []
94
+ all_success = True
95
+
96
+ for proc_name in group.processes:
97
+ # Check if process is defined in config
98
+ if proc_name not in config.processes:
99
+ results.append(
100
+ {
101
+ "process": proc_name,
102
+ "success": False,
103
+ "error": f"Process '{proc_name}' not defined in config",
104
+ "error_code": "process_not_in_config",
105
+ "suggestion": "Add process definition to .procler/config.yaml",
106
+ }
107
+ )
108
+ all_success = False
109
+ continue
110
+
111
+ proc_def = config.processes[proc_name]
112
+
113
+ # Check dependencies before starting
114
+ if respect_dependencies and proc_def.depends_on:
115
+ dep_result = await self._wait_for_dependencies(proc_name, proc_def, config, dependency_timeout)
116
+ if not dep_result["success"]:
117
+ results.append(
118
+ {
119
+ "process": proc_name,
120
+ "success": False,
121
+ "error": dep_result["error"],
122
+ "dependency_failures": dep_result.get("failures", []),
123
+ }
124
+ )
125
+ all_success = False
126
+ continue
127
+
128
+ # Ensure process exists in runtime DB
129
+ await self._ensure_process_in_db(proc_name, proc_def)
130
+
131
+ # Start the process
132
+ result = await self._process_manager.start(proc_name)
133
+
134
+ # Start health checking if configured
135
+ if proc_def.healthcheck:
136
+ self._health_checker.register_process(proc_name, proc_def.healthcheck)
137
+ asyncio.create_task(self._health_checker.start_checking(proc_name, proc_def.healthcheck))
138
+
139
+ results.append(
140
+ {
141
+ "process": proc_name,
142
+ "success": result["success"],
143
+ "status": result.get("data", {}).get("status"),
144
+ "error": result.get("error"),
145
+ }
146
+ )
147
+
148
+ if not result["success"]:
149
+ all_success = False
150
+
151
+ return {
152
+ "success": all_success,
153
+ "data": {
154
+ "group": name,
155
+ "action": "started",
156
+ "results": results,
157
+ },
158
+ }
159
+
160
+ async def _wait_for_dependencies(
161
+ self,
162
+ proc_name: str,
163
+ proc_def,
164
+ config,
165
+ timeout: float,
166
+ ) -> dict[str, Any]:
167
+ """Wait for all dependencies of a process to be ready."""
168
+ dependencies = proc_def.get_dependencies()
169
+ failures = []
170
+
171
+ for dep in dependencies:
172
+ dep_name = dep.name
173
+
174
+ # Check if dependency exists
175
+ if dep_name not in config.processes:
176
+ failures.append(
177
+ {
178
+ "dependency": dep_name,
179
+ "error": "Dependency not defined in config",
180
+ }
181
+ )
182
+ continue
183
+
184
+ dep_def = config.processes[dep_name]
185
+
186
+ # Check if dependency is running
187
+ status_result = await self._process_manager.status(dep_name)
188
+ if not status_result["success"]:
189
+ failures.append(
190
+ {
191
+ "dependency": dep_name,
192
+ "error": f"Could not get status: {status_result.get('error')}",
193
+ }
194
+ )
195
+ continue
196
+
197
+ proc_status = status_result.get("data", {}).get("process", {}).get("status")
198
+
199
+ if proc_status != "running":
200
+ failures.append(
201
+ {
202
+ "dependency": dep_name,
203
+ "error": f"Dependency not running (status: {proc_status})",
204
+ "condition": dep.condition.value,
205
+ }
206
+ )
207
+ continue
208
+
209
+ # If condition is 'healthy', wait for health check
210
+ if dep.condition == DependencyCondition.HEALTHY:
211
+ if not dep_def.healthcheck:
212
+ failures.append(
213
+ {
214
+ "dependency": dep_name,
215
+ "error": "Dependency requires healthy condition but has no healthcheck",
216
+ }
217
+ )
218
+ continue
219
+
220
+ # Wait for healthy status
221
+ is_healthy = await self._health_checker.wait_for_healthy(dep_name, dep_def.healthcheck, timeout)
222
+ if not is_healthy:
223
+ failures.append(
224
+ {
225
+ "dependency": dep_name,
226
+ "error": f"Dependency not healthy after {timeout}s",
227
+ "condition": "healthy",
228
+ }
229
+ )
230
+ continue
231
+
232
+ if failures:
233
+ return {
234
+ "success": False,
235
+ "error": f"Dependencies not satisfied for '{proc_name}'",
236
+ "failures": failures,
237
+ }
238
+
239
+ return {"success": True}
240
+
241
+ async def stop_group(self, name: str) -> dict[str, Any]:
242
+ """Stop all processes in a group in stop order."""
243
+ config = get_config()
244
+
245
+ if name not in config.groups:
246
+ return {
247
+ "success": False,
248
+ "error": f"Group '{name}' not found",
249
+ "error_code": "group_not_found",
250
+ "suggestion": "Run 'procler group list' to see available groups",
251
+ }
252
+
253
+ group = config.groups[name]
254
+ stop_order = group.get_stop_order()
255
+ results = []
256
+ all_success = True
257
+
258
+ for proc_name in stop_order:
259
+ # Check if process is defined
260
+ if proc_name not in config.processes:
261
+ results.append(
262
+ {
263
+ "process": proc_name,
264
+ "success": False,
265
+ "error": f"Process '{proc_name}' not defined in config",
266
+ "error_code": "process_not_in_config",
267
+ "suggestion": "Add process definition to .procler/config.yaml",
268
+ }
269
+ )
270
+ all_success = False
271
+ continue
272
+
273
+ # Stop health checking first
274
+ await self._health_checker.stop_checking(proc_name)
275
+
276
+ # Ensure process exists in runtime DB
277
+ await self._ensure_process_in_db(proc_name, config.processes[proc_name])
278
+
279
+ # Stop the process
280
+ result = await self._process_manager.stop(proc_name)
281
+ results.append(
282
+ {
283
+ "process": proc_name,
284
+ "success": result["success"],
285
+ "status": result.get("data", {}).get("status"),
286
+ "error": result.get("error"),
287
+ }
288
+ )
289
+
290
+ if not result["success"]:
291
+ all_success = False
292
+
293
+ return {
294
+ "success": all_success,
295
+ "data": {
296
+ "group": name,
297
+ "action": "stopped",
298
+ "results": results,
299
+ },
300
+ }
301
+
302
+ async def status_group(self, name: str) -> dict[str, Any]:
303
+ """Get status of all processes in a group, including health information."""
304
+ config = get_config()
305
+
306
+ if name not in config.groups:
307
+ return {
308
+ "success": False,
309
+ "error": f"Group '{name}' not found",
310
+ "error_code": "group_not_found",
311
+ "suggestion": "Run 'procler group list' to see available groups",
312
+ }
313
+
314
+ group = config.groups[name]
315
+ statuses = []
316
+
317
+ for proc_name in group.processes:
318
+ if proc_name not in config.processes:
319
+ statuses.append(
320
+ {
321
+ "process": proc_name,
322
+ "status": "not_defined",
323
+ "error": "Not defined in config",
324
+ }
325
+ )
326
+ continue
327
+
328
+ proc_def = config.processes[proc_name]
329
+
330
+ # Ensure process exists in runtime DB
331
+ await self._ensure_process_in_db(proc_name, proc_def)
332
+
333
+ result = await self._process_manager.status(proc_name)
334
+ if result["success"]:
335
+ proc_data = result["data"]["process"]
336
+ status_entry = {
337
+ "process": proc_name,
338
+ "status": proc_data.get("status", "unknown"),
339
+ "pid": proc_data.get("pid"),
340
+ "uptime_seconds": proc_data.get("uptime_seconds"),
341
+ "linux_state": proc_data.get("linux_state"),
342
+ }
343
+
344
+ # Include health status if process has health check
345
+ if proc_def.healthcheck:
346
+ status_entry["health"] = self._health_checker.to_dict(proc_name)
347
+
348
+ # Include dependency info
349
+ if proc_def.depends_on:
350
+ status_entry["depends_on"] = [
351
+ {"name": d.name, "condition": d.condition.value} for d in proc_def.get_dependencies()
352
+ ]
353
+
354
+ statuses.append(status_entry)
355
+ else:
356
+ statuses.append(
357
+ {
358
+ "process": proc_name,
359
+ "status": "unknown",
360
+ "error": result.get("error"),
361
+ }
362
+ )
363
+
364
+ return {
365
+ "success": True,
366
+ "data": {
367
+ "group": name,
368
+ "description": group.description,
369
+ "statuses": statuses,
370
+ },
371
+ }
372
+
373
+ async def _ensure_process_in_db(self, name: str, proc_def) -> None:
374
+ """Ensure a process from config exists in the runtime database."""
375
+ from datetime import datetime
376
+
377
+ from sqler.query import SQLerField as F
378
+
379
+ from ..db import init_database
380
+ from ..models import Process
381
+
382
+ init_database()
383
+
384
+ # Check if already exists
385
+ existing = Process.query().filter(F("name") == name).all()
386
+ if existing:
387
+ return
388
+
389
+ # Create from config definition
390
+ tags = proc_def.tags if proc_def.tags else None
391
+ process = Process(
392
+ name=name,
393
+ command=proc_def.command,
394
+ context_type=proc_def.context.value,
395
+ container_name=proc_def.container,
396
+ cwd=proc_def.cwd,
397
+ tags=tags,
398
+ created_at=datetime.now().isoformat(),
399
+ updated_at=datetime.now().isoformat(),
400
+ )
401
+ process.save()
402
+
403
+
404
+ # Singleton
405
+ _group_manager: GroupManager | None = None
406
+
407
+
408
+ def get_group_manager() -> GroupManager:
409
+ """Get the singleton GroupManager instance."""
410
+ global _group_manager
411
+ if _group_manager is None:
412
+ _group_manager = GroupManager()
413
+ return _group_manager
414
+
415
+
416
+ def reset_group_manager() -> None:
417
+ """Reset the singleton (for testing)."""
418
+ global _group_manager
419
+ _group_manager = None
procler/core/health.py ADDED
@@ -0,0 +1,280 @@
1
+ """Health check management for processes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import Callable
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ from ..config import HealthCheckDef
13
+ from .context_local import get_local_context
14
+
15
+
16
+ class HealthStatus(str, Enum):
17
+ """Health check status."""
18
+
19
+ UNKNOWN = "unknown" # Never checked
20
+ STARTING = "starting" # In start_period grace window
21
+ HEALTHY = "healthy" # Passing health checks
22
+ UNHEALTHY = "unhealthy" # Failed health checks
23
+ DEAD = "dead" # Process not running
24
+
25
+
26
+ @dataclass
27
+ class HealthState:
28
+ """Current health state for a process."""
29
+
30
+ status: HealthStatus
31
+ last_check: datetime | None = None
32
+ consecutive_failures: int = 0
33
+ last_error: str | None = None
34
+ check_count: int = 0
35
+
36
+
37
+ class HealthChecker:
38
+ """
39
+ Manages health checks for processes.
40
+
41
+ Runs health check commands at intervals and tracks pass/fail state.
42
+ Supports start_period grace period and configurable retries.
43
+ """
44
+
45
+ def __init__(self):
46
+ self._health_states: dict[str, HealthState] = {}
47
+ self._check_tasks: dict[str, asyncio.Task] = {}
48
+ self._callbacks: dict[str, list[Callable[[str, HealthStatus], None]]] = {}
49
+ self._local_context = get_local_context()
50
+
51
+ def get_health(self, process_name: str) -> HealthState:
52
+ """Get current health state for a process."""
53
+ return self._health_states.get(process_name, HealthState(status=HealthStatus.UNKNOWN))
54
+
55
+ def register_process(
56
+ self,
57
+ process_name: str,
58
+ healthcheck: HealthCheckDef,
59
+ on_status_change: Callable[[str, HealthStatus], None] | None = None,
60
+ ) -> None:
61
+ """
62
+ Register a process for health checking.
63
+
64
+ Args:
65
+ process_name: Name of the process
66
+ healthcheck: Health check definition from config
67
+ on_status_change: Optional callback for status changes
68
+ """
69
+ # Initialize state
70
+ self._health_states[process_name] = HealthState(
71
+ status=HealthStatus.STARTING,
72
+ last_check=None,
73
+ consecutive_failures=0,
74
+ )
75
+
76
+ if on_status_change:
77
+ if process_name not in self._callbacks:
78
+ self._callbacks[process_name] = []
79
+ self._callbacks[process_name].append(on_status_change)
80
+
81
+ async def start_checking(
82
+ self,
83
+ process_name: str,
84
+ healthcheck: HealthCheckDef,
85
+ ) -> None:
86
+ """Start the health check loop for a process."""
87
+ # Cancel any existing check task
88
+ if process_name in self._check_tasks:
89
+ self._check_tasks[process_name].cancel()
90
+ try:
91
+ await self._check_tasks[process_name]
92
+ except asyncio.CancelledError:
93
+ pass
94
+
95
+ # Start new check task
96
+ self._check_tasks[process_name] = asyncio.create_task(self._health_check_loop(process_name, healthcheck))
97
+
98
+ async def stop_checking(self, process_name: str) -> None:
99
+ """Stop health checking for a process."""
100
+ if process_name in self._check_tasks:
101
+ self._check_tasks[process_name].cancel()
102
+ try:
103
+ await self._check_tasks[process_name]
104
+ except asyncio.CancelledError:
105
+ pass
106
+ del self._check_tasks[process_name]
107
+
108
+ if process_name in self._health_states:
109
+ self._health_states[process_name].status = HealthStatus.DEAD
110
+
111
+ async def _health_check_loop(
112
+ self,
113
+ process_name: str,
114
+ healthcheck: HealthCheckDef,
115
+ ) -> None:
116
+ """Main health check loop for a process."""
117
+ start_period = healthcheck.get_start_period_seconds()
118
+ interval = healthcheck.get_interval_seconds()
119
+ timeout = healthcheck.get_timeout_seconds()
120
+ retries = healthcheck.retries
121
+
122
+ # Wait for start period
123
+ if start_period > 0:
124
+ await asyncio.sleep(start_period)
125
+
126
+ # Update status to unknown (ready to check)
127
+ if process_name in self._health_states:
128
+ self._health_states[process_name].status = HealthStatus.UNKNOWN
129
+
130
+ while True:
131
+ try:
132
+ # Run the health check command
133
+ result = await self._local_context.exec_command(
134
+ healthcheck.test,
135
+ timeout=timeout,
136
+ )
137
+
138
+ state = self._health_states.get(process_name)
139
+ if not state:
140
+ break
141
+
142
+ state.last_check = datetime.now()
143
+ state.check_count += 1
144
+ old_status = state.status
145
+
146
+ if result.exit_code == 0:
147
+ # Health check passed
148
+ state.consecutive_failures = 0
149
+ state.last_error = None
150
+ state.status = HealthStatus.HEALTHY
151
+ else:
152
+ # Health check failed
153
+ state.consecutive_failures += 1
154
+ state.last_error = result.stderr or f"Exit code: {result.exit_code}"
155
+
156
+ if state.consecutive_failures >= retries:
157
+ state.status = HealthStatus.UNHEALTHY
158
+
159
+ # Notify callbacks if status changed
160
+ if old_status != state.status:
161
+ self._notify_status_change(process_name, state.status)
162
+
163
+ # Wait for next interval
164
+ await asyncio.sleep(interval)
165
+
166
+ except asyncio.CancelledError:
167
+ break
168
+ except Exception as e:
169
+ # Log error but continue checking
170
+ if process_name in self._health_states:
171
+ self._health_states[process_name].last_error = str(e)
172
+ await asyncio.sleep(interval)
173
+
174
+ def _notify_status_change(self, process_name: str, status: HealthStatus) -> None:
175
+ """Notify all callbacks of a status change."""
176
+ callbacks = self._callbacks.get(process_name, [])
177
+ for callback in callbacks:
178
+ try:
179
+ callback(process_name, status)
180
+ except Exception:
181
+ pass # Don't let callback errors break health checking
182
+
183
+ async def run_single_check(
184
+ self,
185
+ process_name: str,
186
+ healthcheck: HealthCheckDef,
187
+ ) -> dict[str, Any]:
188
+ """
189
+ Run a single health check and return result.
190
+
191
+ Useful for manual health check triggers.
192
+ """
193
+ timeout = healthcheck.get_timeout_seconds()
194
+
195
+ result = await self._local_context.exec_command(
196
+ healthcheck.test,
197
+ timeout=timeout,
198
+ )
199
+
200
+ return {
201
+ "success": result.exit_code == 0,
202
+ "exit_code": result.exit_code,
203
+ "stdout": result.stdout,
204
+ "stderr": result.stderr,
205
+ "timestamp": datetime.now().isoformat(),
206
+ }
207
+
208
+ async def wait_for_healthy(
209
+ self,
210
+ process_name: str,
211
+ healthcheck: HealthCheckDef,
212
+ timeout: float = 60.0,
213
+ ) -> bool:
214
+ """
215
+ Wait until a process becomes healthy or timeout.
216
+
217
+ Args:
218
+ process_name: Name of the process
219
+ healthcheck: Health check definition
220
+ timeout: Maximum time to wait in seconds
221
+
222
+ Returns:
223
+ True if healthy, False if timeout or unhealthy
224
+ """
225
+ start = datetime.now()
226
+ check_interval = min(healthcheck.get_interval_seconds(), 1.0)
227
+
228
+ while True:
229
+ elapsed = (datetime.now() - start).total_seconds()
230
+ if elapsed >= timeout:
231
+ return False
232
+
233
+ result = await self.run_single_check(process_name, healthcheck)
234
+ if result["success"]:
235
+ if process_name in self._health_states:
236
+ self._health_states[process_name].status = HealthStatus.HEALTHY
237
+ return True
238
+
239
+ await asyncio.sleep(check_interval)
240
+
241
+ def to_dict(self, process_name: str) -> dict[str, Any]:
242
+ """Get health state as a dictionary."""
243
+ state = self.get_health(process_name)
244
+ return {
245
+ "status": state.status.value,
246
+ "last_check": state.last_check.isoformat() if state.last_check else None,
247
+ "consecutive_failures": state.consecutive_failures,
248
+ "last_error": state.last_error,
249
+ "check_count": state.check_count,
250
+ }
251
+
252
+ def reset(self) -> None:
253
+ """Reset all health state (for testing)."""
254
+ # Cancel all tasks - they'll clean up on their own
255
+ for task in self._check_tasks.values():
256
+ if not task.done():
257
+ task.cancel()
258
+ self._check_tasks.clear()
259
+ self._health_states.clear()
260
+ self._callbacks.clear()
261
+
262
+
263
+ # Singleton
264
+ _health_checker: HealthChecker | None = None
265
+
266
+
267
+ def get_health_checker() -> HealthChecker:
268
+ """Get the singleton HealthChecker instance."""
269
+ global _health_checker
270
+ if _health_checker is None:
271
+ _health_checker = HealthChecker()
272
+ return _health_checker
273
+
274
+
275
+ def reset_health_checker() -> None:
276
+ """Reset the singleton (for testing)."""
277
+ global _health_checker
278
+ if _health_checker:
279
+ _health_checker.reset()
280
+ _health_checker = None