singleserver 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,489 @@
1
+ """
2
+ Process management for subprocess lifecycle, health monitoring, and restarts.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import os
9
+ import signal
10
+ import subprocess
11
+ import threading
12
+ import time
13
+ from collections.abc import Callable
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ from pathlib import Path
17
+ from types import TracebackType
18
+ from typing import IO
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class ProcessState(Enum):
24
+ """State of a managed process."""
25
+
26
+ STOPPED = "stopped"
27
+ STARTING = "starting"
28
+ RUNNING = "running"
29
+ UNHEALTHY = "unhealthy"
30
+ STOPPING = "stopping"
31
+ FAILED = "failed"
32
+
33
+
34
+ @dataclass
35
+ class ProcessOptions:
36
+ """Configuration options for a managed process."""
37
+
38
+ # Command and environment
39
+ command: list[str] = field(default_factory=list)
40
+ env: dict[str, str] | None = None
41
+ cwd: str | Path | None = None
42
+
43
+ # Output handling
44
+ stdout: str | Path | IO | None = None # "inherit", "null", path, or file object
45
+ stderr: str | Path | IO | None = None # "inherit", "null", "stdout", path, or file object
46
+
47
+ # Health and lifecycle
48
+ health_check: Callable[[], bool] | None = None
49
+ health_check_interval: float = 5.0
50
+ startup_timeout: float = 30.0
51
+
52
+ # Restart behavior
53
+ restart_on_failure: bool = True
54
+ max_restarts: int = 3
55
+ restart_delay: float = 1.0
56
+ restart_window: float = 60.0 # Reset restart count after this many seconds of stability
57
+
58
+ # Shutdown
59
+ shutdown_timeout: float = 10.0
60
+ shutdown_signal: int = signal.SIGTERM
61
+
62
+ def __post_init__(self) -> None:
63
+ if isinstance(self.cwd, str):
64
+ self.cwd = Path(self.cwd)
65
+
66
+
67
+ class ProcessOwner:
68
+ """
69
+ Manages a subprocess when we're the lock owner.
70
+
71
+ Handles:
72
+ - Starting the subprocess with proper environment and output handling
73
+ - Health monitoring in a background thread
74
+ - Automatic restarts on failure (configurable)
75
+ - Graceful shutdown with timeout
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ command: list[str],
81
+ env: dict[str, str] | None = None,
82
+ cwd: str | Path | None = None,
83
+ stdout: str | Path | IO | None = None,
84
+ stderr: str | Path | IO | None = None,
85
+ health_check: Callable[[], bool] | None = None,
86
+ health_check_interval: float = 5.0,
87
+ startup_timeout: float = 30.0,
88
+ restart_on_failure: bool = True,
89
+ max_restarts: int = 3,
90
+ restart_delay: float = 1.0,
91
+ restart_window: float = 60.0,
92
+ shutdown_timeout: float = 10.0,
93
+ shutdown_signal: int = signal.SIGTERM,
94
+ on_state_change: Callable[[ProcessState], None] | None = None,
95
+ ):
96
+ """
97
+ Initialize the process owner.
98
+
99
+ Args:
100
+ command: Command to run as a list of strings.
101
+ env: Additional environment variables (merged with current env).
102
+ cwd: Working directory for the process.
103
+ stdout: Where to send stdout ("inherit", "null", path, or file object).
104
+ stderr: Where to send stderr ("inherit", "null", "stdout", path, or file object).
105
+ health_check: Callable that returns True if the process is healthy.
106
+ health_check_interval: Seconds between health checks.
107
+ startup_timeout: Max seconds to wait for process to become healthy.
108
+ restart_on_failure: Whether to restart the process if it dies.
109
+ max_restarts: Maximum restart attempts before giving up.
110
+ restart_delay: Seconds to wait between restart attempts.
111
+ restart_window: Seconds of stability after which restart count resets.
112
+ shutdown_timeout: Seconds to wait for graceful shutdown before SIGKILL.
113
+ shutdown_signal: Signal to send for graceful shutdown.
114
+ on_state_change: Callback when process state changes.
115
+ """
116
+ self.options = ProcessOptions(
117
+ command=command,
118
+ env=env,
119
+ cwd=cwd,
120
+ stdout=stdout,
121
+ stderr=stderr,
122
+ health_check=health_check,
123
+ health_check_interval=health_check_interval,
124
+ startup_timeout=startup_timeout,
125
+ restart_on_failure=restart_on_failure,
126
+ max_restarts=max_restarts,
127
+ restart_delay=restart_delay,
128
+ restart_window=restart_window,
129
+ shutdown_timeout=shutdown_timeout,
130
+ shutdown_signal=shutdown_signal,
131
+ )
132
+
133
+ self._process: subprocess.Popen | None = None
134
+ self._state = ProcessState.STOPPED
135
+ self._state_lock = threading.Lock()
136
+ self._monitor_thread: threading.Thread | None = None
137
+ self._stop_event = threading.Event()
138
+ self._restart_count = 0
139
+ self._last_restart_time: float | None = None
140
+ self._last_healthy_time: float | None = None
141
+ self._on_state_change = on_state_change
142
+ self._stdout_file: IO | None = None
143
+ self._stderr_file: IO | None = None
144
+
145
+ @property
146
+ def state(self) -> ProcessState:
147
+ """Current state of the process."""
148
+ with self._state_lock:
149
+ return self._state
150
+
151
+ @property
152
+ def pid(self) -> int | None:
153
+ """PID of the managed process, or None if not running."""
154
+ if self._process:
155
+ return self._process.pid
156
+ return None
157
+
158
+ @property
159
+ def restart_count(self) -> int:
160
+ """Number of times the process has been restarted."""
161
+ return self._restart_count
162
+
163
+ def _set_state(self, state: ProcessState) -> None:
164
+ """Update state and notify callback."""
165
+ with self._state_lock:
166
+ old_state = self._state
167
+ self._state = state
168
+
169
+ if old_state != state:
170
+ logger.debug(f"Process state changed: {old_state.value} -> {state.value}")
171
+ if self._on_state_change:
172
+ try:
173
+ self._on_state_change(state)
174
+ except Exception as e:
175
+ logger.warning(f"Error in state change callback: {e}")
176
+
177
+ def _build_env(self) -> dict[str, str]:
178
+ """Build the environment for the subprocess."""
179
+ env = os.environ.copy()
180
+ if self.options.env:
181
+ env.update(self.options.env)
182
+ return env
183
+
184
+ def _get_stdout(self) -> int | IO | None:
185
+ """Get the stdout configuration for subprocess."""
186
+ stdout = self.options.stdout
187
+
188
+ if stdout is None or stdout == "inherit":
189
+ return None
190
+ elif stdout == "null":
191
+ return subprocess.DEVNULL
192
+ elif isinstance(stdout, (str, Path)):
193
+ self._stdout_file = open(stdout, "a")
194
+ return self._stdout_file
195
+ else:
196
+ return stdout
197
+
198
+ def _get_stderr(self) -> int | IO | None:
199
+ """Get the stderr configuration for subprocess."""
200
+ stderr = self.options.stderr
201
+
202
+ if stderr is None or stderr == "inherit":
203
+ return None
204
+ elif stderr == "null":
205
+ return subprocess.DEVNULL
206
+ elif stderr == "stdout":
207
+ return subprocess.STDOUT
208
+ elif isinstance(stderr, (str, Path)):
209
+ self._stderr_file = open(stderr, "a")
210
+ return self._stderr_file
211
+ else:
212
+ return stderr
213
+
214
+ def _close_log_files(self) -> None:
215
+ """Close any log files we opened."""
216
+ if self._stdout_file:
217
+ try:
218
+ self._stdout_file.close()
219
+ except Exception:
220
+ pass
221
+ self._stdout_file = None
222
+ if self._stderr_file:
223
+ try:
224
+ self._stderr_file.close()
225
+ except Exception:
226
+ pass
227
+ self._stderr_file = None
228
+
229
+ def start(self) -> None:
230
+ """
231
+ Start the subprocess.
232
+
233
+ Raises:
234
+ RuntimeError: If the process is already running.
235
+ subprocess.SubprocessError: If the process fails to start.
236
+ """
237
+ if self._state not in (ProcessState.STOPPED, ProcessState.FAILED):
238
+ raise RuntimeError(f"Cannot start process in state {self._state.value}")
239
+
240
+ self._stop_event.clear()
241
+ self._set_state(ProcessState.STARTING)
242
+
243
+ try:
244
+ self._process = subprocess.Popen(
245
+ self.options.command,
246
+ env=self._build_env(),
247
+ cwd=self.options.cwd,
248
+ stdout=self._get_stdout(),
249
+ stderr=self._get_stderr(),
250
+ # Start in new process group for clean shutdown
251
+ start_new_session=True,
252
+ )
253
+
254
+ logger.info(
255
+ f"Started process with PID {self._process.pid}: {' '.join(self.options.command)}"
256
+ )
257
+
258
+ # Start the monitor thread
259
+ self._monitor_thread = threading.Thread(
260
+ target=self._monitor_loop,
261
+ daemon=True,
262
+ name=f"process-monitor-{self._process.pid}",
263
+ )
264
+ self._monitor_thread.start()
265
+
266
+ except Exception:
267
+ self._set_state(ProcessState.FAILED)
268
+ self._close_log_files()
269
+ raise
270
+
271
+ def _wait_for_healthy(self) -> bool:
272
+ """
273
+ Wait for the process to become healthy.
274
+
275
+ Returns:
276
+ True if the process became healthy, False if timeout or failure.
277
+ """
278
+ if not self.options.health_check:
279
+ # No health check configured, assume healthy if process is running
280
+ time.sleep(0.1) # Brief delay to let process initialize
281
+ if self._process and self._process.poll() is None:
282
+ self._set_state(ProcessState.RUNNING)
283
+ self._last_healthy_time = time.time()
284
+ return True
285
+ return False
286
+
287
+ deadline = time.time() + self.options.startup_timeout
288
+ check_interval = min(0.5, self.options.health_check_interval)
289
+
290
+ while time.time() < deadline:
291
+ if self._stop_event.is_set():
292
+ return False
293
+
294
+ # Check if process is still running
295
+ if self._process and self._process.poll() is not None:
296
+ logger.warning(
297
+ f"Process exited during startup with code {self._process.returncode}"
298
+ )
299
+ return False
300
+
301
+ try:
302
+ if self.options.health_check():
303
+ self._set_state(ProcessState.RUNNING)
304
+ self._last_healthy_time = time.time()
305
+ logger.info("Process is healthy")
306
+ return True
307
+ except Exception as e:
308
+ logger.debug(f"Health check failed: {e}")
309
+
310
+ time.sleep(check_interval)
311
+
312
+ logger.warning("Process did not become healthy within timeout")
313
+ return False
314
+
315
+ def _monitor_loop(self) -> None:
316
+ """Background thread that monitors process health and handles restarts."""
317
+ # First, wait for the process to become healthy
318
+ if not self._wait_for_healthy():
319
+ if self._process and self._process.poll() is None:
320
+ # Process is running but not healthy, stop it
321
+ self._do_stop()
322
+ self._handle_failure()
323
+ return
324
+
325
+ # Now monitor ongoing health
326
+ while not self._stop_event.is_set():
327
+ self._stop_event.wait(self.options.health_check_interval)
328
+ if self._stop_event.is_set():
329
+ break
330
+
331
+ # Check if process is still running
332
+ if self._process and self._process.poll() is not None:
333
+ logger.warning(f"Process exited unexpectedly with code {self._process.returncode}")
334
+ self._handle_failure()
335
+ return
336
+
337
+ # Run health check if configured
338
+ if self.options.health_check:
339
+ try:
340
+ if self.options.health_check():
341
+ self._last_healthy_time = time.time()
342
+ if self._state == ProcessState.UNHEALTHY:
343
+ self._set_state(ProcessState.RUNNING)
344
+ else:
345
+ if self._state == ProcessState.RUNNING:
346
+ self._set_state(ProcessState.UNHEALTHY)
347
+ logger.warning("Process became unhealthy")
348
+ except Exception as e:
349
+ if self._state == ProcessState.RUNNING:
350
+ self._set_state(ProcessState.UNHEALTHY)
351
+ logger.warning(f"Health check error: {e}")
352
+
353
+ # Reset restart count if process has been stable
354
+ if self._last_healthy_time and self._last_restart_time:
355
+ if time.time() - self._last_healthy_time < self.options.restart_window:
356
+ if self._restart_count > 0:
357
+ self._restart_count = 0
358
+ logger.info("Process stable, reset restart count")
359
+
360
+ def _handle_failure(self) -> None:
361
+ """Handle process failure, potentially restarting."""
362
+ self._close_log_files()
363
+
364
+ if not self.options.restart_on_failure:
365
+ self._set_state(ProcessState.FAILED)
366
+ logger.info("Process failed, restart disabled")
367
+ return
368
+
369
+ if self._restart_count >= self.options.max_restarts:
370
+ self._set_state(ProcessState.FAILED)
371
+ logger.error(f"Process failed after {self._restart_count} restart attempts")
372
+ return
373
+
374
+ if self._stop_event.is_set():
375
+ self._set_state(ProcessState.STOPPED)
376
+ return
377
+
378
+ # Attempt restart
379
+ self._restart_count += 1
380
+ self._last_restart_time = time.time()
381
+ logger.info(f"Attempting restart {self._restart_count}/{self.options.max_restarts}")
382
+
383
+ time.sleep(self.options.restart_delay)
384
+
385
+ if self._stop_event.is_set():
386
+ self._set_state(ProcessState.STOPPED)
387
+ return
388
+
389
+ try:
390
+ self._set_state(ProcessState.STARTING)
391
+ self._process = subprocess.Popen(
392
+ self.options.command,
393
+ env=self._build_env(),
394
+ cwd=self.options.cwd,
395
+ stdout=self._get_stdout(),
396
+ stderr=self._get_stderr(),
397
+ start_new_session=True,
398
+ )
399
+ logger.info(f"Restarted process with PID {self._process.pid}")
400
+
401
+ if not self._wait_for_healthy():
402
+ self._handle_failure() # Recursive, will increment counter
403
+
404
+ except Exception as e:
405
+ logger.error(f"Failed to restart process: {e}")
406
+ self._handle_failure()
407
+
408
+ def _do_stop(self) -> None:
409
+ """Actually stop the process (internal, no state management)."""
410
+ if not self._process:
411
+ return
412
+
413
+ # Try graceful shutdown first
414
+ try:
415
+ os.killpg(os.getpgid(self._process.pid), self.options.shutdown_signal)
416
+ except (ProcessLookupError, PermissionError):
417
+ # Process already gone
418
+ return
419
+
420
+ # Wait for graceful shutdown
421
+ try:
422
+ self._process.wait(timeout=self.options.shutdown_timeout)
423
+ logger.info(f"Process stopped gracefully with code {self._process.returncode}")
424
+ return
425
+ except subprocess.TimeoutExpired:
426
+ pass
427
+
428
+ # Force kill
429
+ logger.warning("Graceful shutdown timeout, sending SIGKILL")
430
+ try:
431
+ os.killpg(os.getpgid(self._process.pid), signal.SIGKILL)
432
+ self._process.wait(timeout=5)
433
+ except (ProcessLookupError, PermissionError, subprocess.TimeoutExpired):
434
+ pass
435
+
436
+ def stop(self) -> None:
437
+ """
438
+ Stop the subprocess.
439
+
440
+ Attempts graceful shutdown first, then SIGKILL if timeout expires.
441
+ """
442
+ if self._state in (ProcessState.STOPPED, ProcessState.FAILED):
443
+ return
444
+
445
+ self._set_state(ProcessState.STOPPING)
446
+ self._stop_event.set()
447
+
448
+ # Wait for monitor thread to stop
449
+ if self._monitor_thread and self._monitor_thread.is_alive():
450
+ self._monitor_thread.join(timeout=1)
451
+
452
+ self._do_stop()
453
+ self._close_log_files()
454
+ self._set_state(ProcessState.STOPPED)
455
+ self._process = None
456
+
457
+ def wait(self, timeout: float | None = None) -> int | None:
458
+ """
459
+ Wait for the process to exit.
460
+
461
+ Args:
462
+ timeout: Maximum seconds to wait, or None for indefinite.
463
+
464
+ Returns:
465
+ Process return code, or None if timeout.
466
+ """
467
+ if not self._process:
468
+ return None
469
+
470
+ try:
471
+ return self._process.wait(timeout=timeout)
472
+ except subprocess.TimeoutExpired:
473
+ return None
474
+
475
+ def is_running(self) -> bool:
476
+ """Check if the process is currently running."""
477
+ return self._state in (ProcessState.STARTING, ProcessState.RUNNING, ProcessState.UNHEALTHY)
478
+
479
+ def __enter__(self) -> ProcessOwner:
480
+ self.start()
481
+ return self
482
+
483
+ def __exit__(
484
+ self,
485
+ exc_type: type[BaseException] | None,
486
+ exc_val: BaseException | None,
487
+ exc_tb: TracebackType | None,
488
+ ) -> None:
489
+ self.stop()