claude-mpm 4.2.28__py3-none-any.whl → 4.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/templates/agentic-coder-optimizer.json +233 -0
- claude_mpm/agents/templates/agentic-coder-optimizer.md +44 -0
- claude_mpm/agents/templates/agentic_coder_optimizer.json +20 -4
- claude_mpm/services/cli/unified_dashboard_manager.py +46 -84
- claude_mpm/services/monitor/daemon.py +53 -163
- claude_mpm/services/monitor/daemon_manager.py +739 -0
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/METADATA +1 -1
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/RECORD +13 -10
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/WHEEL +0 -0
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.2.28.dist-info → claude_mpm-4.2.32.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,739 @@
|
|
1
|
+
"""
|
2
|
+
Unified Daemon Manager Service
|
3
|
+
==============================
|
4
|
+
|
5
|
+
WHY: This service consolidates ALL daemon lifecycle operations into a single place,
|
6
|
+
eliminating duplicate code and race conditions from having daemon management logic
|
7
|
+
scattered across multiple files.
|
8
|
+
|
9
|
+
DESIGN DECISIONS:
|
10
|
+
- Single source of truth for all daemon operations
|
11
|
+
- Robust port cleanup with retry logic
|
12
|
+
- Thread-safe operations with proper locking
|
13
|
+
- Comprehensive error handling and recovery
|
14
|
+
- Supports both foreground and background/daemon modes
|
15
|
+
- Manages PID files, port conflicts, and process lifecycle
|
16
|
+
|
17
|
+
This replaces duplicate logic that was in:
|
18
|
+
- UnifiedMonitorDaemon._cleanup_port_conflicts()
|
19
|
+
- UnifiedDashboardManager._cleanup_port_conflicts()
|
20
|
+
- Various daemon startup/stop logic spread across files
|
21
|
+
"""
|
22
|
+
|
23
|
+
import os
|
24
|
+
import signal
|
25
|
+
import socket
|
26
|
+
import subprocess
|
27
|
+
import sys
|
28
|
+
import tempfile
|
29
|
+
import threading
|
30
|
+
import time
|
31
|
+
from pathlib import Path
|
32
|
+
from typing import Optional, Tuple
|
33
|
+
|
34
|
+
from ...core.logging_config import get_logger
|
35
|
+
|
36
|
+
|
37
|
+
class DaemonManager:
|
38
|
+
"""Centralized manager for all daemon lifecycle operations.
|
39
|
+
|
40
|
+
This is the SINGLE source of truth for:
|
41
|
+
- Port conflict resolution
|
42
|
+
- Process cleanup
|
43
|
+
- Daemon startup/stop
|
44
|
+
- PID file management
|
45
|
+
- Service detection
|
46
|
+
"""
|
47
|
+
|
48
|
+
# Class-level lock for thread safety
|
49
|
+
_lock = threading.Lock()
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
port: int = 8765,
|
54
|
+
host: str = "localhost",
|
55
|
+
pid_file: Optional[str] = None,
|
56
|
+
log_file: Optional[str] = None,
|
57
|
+
):
|
58
|
+
"""Initialize the daemon manager.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
port: Port number for the daemon
|
62
|
+
host: Host to bind to
|
63
|
+
pid_file: Path to PID file (uses default if None)
|
64
|
+
log_file: Path to log file for daemon mode
|
65
|
+
"""
|
66
|
+
self.port = port
|
67
|
+
self.host = host
|
68
|
+
self.logger = get_logger(__name__)
|
69
|
+
|
70
|
+
# Set up paths
|
71
|
+
if pid_file:
|
72
|
+
self.pid_file = Path(pid_file)
|
73
|
+
else:
|
74
|
+
self.pid_file = self._get_default_pid_file()
|
75
|
+
|
76
|
+
self.log_file = Path(log_file) if log_file else self._get_default_log_file()
|
77
|
+
|
78
|
+
# Startup status communication
|
79
|
+
self.startup_status_file = None
|
80
|
+
|
81
|
+
def _get_default_pid_file(self) -> Path:
|
82
|
+
"""Get default PID file path."""
|
83
|
+
project_root = Path.cwd()
|
84
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
85
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
86
|
+
return claude_mpm_dir / "monitor-daemon.pid"
|
87
|
+
|
88
|
+
def _get_default_log_file(self) -> Path:
|
89
|
+
"""Get default log file path."""
|
90
|
+
project_root = Path.cwd()
|
91
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
92
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
93
|
+
return claude_mpm_dir / "monitor-daemon.log"
|
94
|
+
|
95
|
+
def cleanup_port_conflicts(self, max_retries: int = 3) -> bool:
|
96
|
+
"""Clean up any processes using the daemon port.
|
97
|
+
|
98
|
+
This is the SINGLE implementation for port cleanup, replacing
|
99
|
+
duplicate logic in multiple files.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
max_retries: Maximum number of cleanup attempts
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
True if port is available after cleanup, False otherwise
|
106
|
+
"""
|
107
|
+
with self._lock:
|
108
|
+
for attempt in range(max_retries):
|
109
|
+
if attempt > 0:
|
110
|
+
self.logger.info(
|
111
|
+
f"Port cleanup attempt {attempt + 1}/{max_retries}"
|
112
|
+
)
|
113
|
+
|
114
|
+
# First check if port is actually in use
|
115
|
+
if self._is_port_available():
|
116
|
+
self.logger.debug(f"Port {self.port} is available")
|
117
|
+
return True
|
118
|
+
|
119
|
+
self.logger.info(f"Port {self.port} is in use, attempting cleanup")
|
120
|
+
|
121
|
+
# Try to find and kill processes using the port
|
122
|
+
if self._kill_processes_on_port():
|
123
|
+
# Wait for port to be released
|
124
|
+
time.sleep(2 if attempt == 0 else 3)
|
125
|
+
|
126
|
+
# Verify port is now free
|
127
|
+
if self._is_port_available():
|
128
|
+
self.logger.info(f"Port {self.port} successfully cleaned up")
|
129
|
+
return True
|
130
|
+
|
131
|
+
if attempt < max_retries - 1:
|
132
|
+
# Wait longer between attempts
|
133
|
+
time.sleep(3)
|
134
|
+
|
135
|
+
self.logger.error(
|
136
|
+
f"Failed to clean up port {self.port} after {max_retries} attempts"
|
137
|
+
)
|
138
|
+
return False
|
139
|
+
|
140
|
+
def _is_port_available(self) -> bool:
|
141
|
+
"""Check if the port is available for binding.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
True if port is available, False otherwise
|
145
|
+
"""
|
146
|
+
try:
|
147
|
+
test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
148
|
+
test_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
149
|
+
test_sock.bind((self.host, self.port))
|
150
|
+
test_sock.close()
|
151
|
+
return True
|
152
|
+
except OSError:
|
153
|
+
return False
|
154
|
+
|
155
|
+
def _kill_processes_on_port(self) -> bool:
|
156
|
+
"""Kill processes using the daemon port.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
True if processes were killed or none found, False on error
|
160
|
+
"""
|
161
|
+
try:
|
162
|
+
# Try using lsof first (most reliable)
|
163
|
+
if self._kill_using_lsof():
|
164
|
+
return True
|
165
|
+
|
166
|
+
# Fallback to checking our known PID file
|
167
|
+
if self._kill_using_pid_file():
|
168
|
+
return True
|
169
|
+
|
170
|
+
# Try to identify claude-mpm processes
|
171
|
+
if self._kill_claude_mpm_processes():
|
172
|
+
return True
|
173
|
+
|
174
|
+
return False
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
self.logger.error(f"Error killing processes on port: {e}")
|
178
|
+
return False
|
179
|
+
|
180
|
+
def _kill_using_lsof(self) -> bool:
|
181
|
+
"""Kill processes using lsof to find them.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
True if successful or lsof not available, False on error
|
185
|
+
"""
|
186
|
+
try:
|
187
|
+
# Find processes using the port
|
188
|
+
result = subprocess.run(
|
189
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
190
|
+
)
|
191
|
+
|
192
|
+
if result.returncode != 0 or not result.stdout.strip():
|
193
|
+
self.logger.debug(f"No processes found using port {self.port}")
|
194
|
+
return True
|
195
|
+
|
196
|
+
pids = result.stdout.strip().split("\n")
|
197
|
+
self.logger.info(f"Found processes using port {self.port}: {pids}")
|
198
|
+
|
199
|
+
# Kill each process
|
200
|
+
for pid_str in pids:
|
201
|
+
try:
|
202
|
+
pid = int(pid_str.strip())
|
203
|
+
|
204
|
+
# Check if it's a Python/Claude process
|
205
|
+
process_info = subprocess.run(
|
206
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
207
|
+
capture_output=True,
|
208
|
+
text=True, check=False,
|
209
|
+
)
|
210
|
+
|
211
|
+
process_name = process_info.stdout.strip().lower()
|
212
|
+
if "python" in process_name or "claude" in process_name:
|
213
|
+
self.logger.info(f"Killing Python/Claude process {pid}")
|
214
|
+
os.kill(pid, signal.SIGTERM)
|
215
|
+
|
216
|
+
# Wait briefly for graceful shutdown
|
217
|
+
time.sleep(1)
|
218
|
+
|
219
|
+
# Check if still alive and force kill if needed
|
220
|
+
try:
|
221
|
+
os.kill(pid, 0) # Check if process exists
|
222
|
+
self.logger.warning(
|
223
|
+
f"Process {pid} didn't terminate, force killing"
|
224
|
+
)
|
225
|
+
os.kill(pid, signal.SIGKILL)
|
226
|
+
time.sleep(0.5)
|
227
|
+
except ProcessLookupError:
|
228
|
+
pass # Process already dead
|
229
|
+
else:
|
230
|
+
self.logger.warning(
|
231
|
+
f"Process {pid} ({process_name}) is not a Claude MPM process"
|
232
|
+
)
|
233
|
+
return False
|
234
|
+
|
235
|
+
except (ValueError, ProcessLookupError) as e:
|
236
|
+
self.logger.debug(f"Error handling PID {pid_str}: {e}")
|
237
|
+
continue
|
238
|
+
|
239
|
+
return True
|
240
|
+
|
241
|
+
except FileNotFoundError:
|
242
|
+
# lsof not available
|
243
|
+
self.logger.debug("lsof not available, using alternative methods")
|
244
|
+
return True
|
245
|
+
except Exception as e:
|
246
|
+
self.logger.error(f"Error using lsof: {e}")
|
247
|
+
return False
|
248
|
+
|
249
|
+
def _kill_using_pid_file(self) -> bool:
|
250
|
+
"""Kill process using PID file.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
True if successful or no PID file, False on error
|
254
|
+
"""
|
255
|
+
try:
|
256
|
+
if not self.pid_file.exists():
|
257
|
+
return True
|
258
|
+
|
259
|
+
with open(self.pid_file) as f:
|
260
|
+
pid = int(f.read().strip())
|
261
|
+
|
262
|
+
self.logger.info(f"Found PID {pid} in PID file")
|
263
|
+
|
264
|
+
# Kill the process
|
265
|
+
try:
|
266
|
+
os.kill(pid, signal.SIGTERM)
|
267
|
+
time.sleep(1)
|
268
|
+
|
269
|
+
# Check if still alive
|
270
|
+
try:
|
271
|
+
os.kill(pid, 0)
|
272
|
+
os.kill(pid, signal.SIGKILL)
|
273
|
+
time.sleep(0.5)
|
274
|
+
except ProcessLookupError:
|
275
|
+
pass
|
276
|
+
|
277
|
+
# Remove PID file
|
278
|
+
self.pid_file.unlink(missing_ok=True)
|
279
|
+
return True
|
280
|
+
|
281
|
+
except ProcessLookupError:
|
282
|
+
# Process doesn't exist, just remove PID file
|
283
|
+
self.pid_file.unlink(missing_ok=True)
|
284
|
+
return True
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
self.logger.error(f"Error killing process from PID file: {e}")
|
288
|
+
return False
|
289
|
+
|
290
|
+
def _kill_claude_mpm_processes(self) -> bool:
|
291
|
+
"""Kill any claude-mpm monitor processes.
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
True if successful, False on error
|
295
|
+
"""
|
296
|
+
try:
|
297
|
+
# Look for claude-mpm monitor processes
|
298
|
+
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, check=False)
|
299
|
+
|
300
|
+
if result.returncode != 0:
|
301
|
+
return False
|
302
|
+
|
303
|
+
lines = result.stdout.strip().split("\n")
|
304
|
+
killed_any = False
|
305
|
+
|
306
|
+
for line in lines:
|
307
|
+
if "claude" in line.lower() and "monitor" in line.lower():
|
308
|
+
parts = line.split()
|
309
|
+
if len(parts) > 1:
|
310
|
+
try:
|
311
|
+
pid = int(parts[1])
|
312
|
+
self.logger.info(
|
313
|
+
f"Killing claude-mpm monitor process {pid}"
|
314
|
+
)
|
315
|
+
os.kill(pid, signal.SIGTERM)
|
316
|
+
killed_any = True
|
317
|
+
time.sleep(0.5)
|
318
|
+
except (ValueError, ProcessLookupError):
|
319
|
+
continue
|
320
|
+
|
321
|
+
if killed_any:
|
322
|
+
time.sleep(1) # Give processes time to exit
|
323
|
+
|
324
|
+
return True
|
325
|
+
|
326
|
+
except Exception as e:
|
327
|
+
self.logger.error(f"Error killing claude-mpm processes: {e}")
|
328
|
+
return False
|
329
|
+
|
330
|
+
def is_our_service(self) -> Tuple[bool, Optional[int]]:
|
331
|
+
"""Check if the service on the port is our claude-mpm monitor.
|
332
|
+
|
333
|
+
Returns:
|
334
|
+
Tuple of (is_ours, pid) where is_ours is True if it's our service
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
# First check PID file
|
338
|
+
if self.pid_file.exists():
|
339
|
+
try:
|
340
|
+
with open(self.pid_file) as f:
|
341
|
+
pid = int(f.read().strip())
|
342
|
+
|
343
|
+
# Verify process exists
|
344
|
+
os.kill(pid, 0)
|
345
|
+
|
346
|
+
# Check if it's a Python process
|
347
|
+
process_info = subprocess.run(
|
348
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
349
|
+
capture_output=True,
|
350
|
+
text=True, check=False,
|
351
|
+
)
|
352
|
+
|
353
|
+
if "python" in process_info.stdout.lower():
|
354
|
+
return True, pid
|
355
|
+
|
356
|
+
except (ValueError, ProcessLookupError, subprocess.CalledProcessError):
|
357
|
+
# PID file exists but process doesn't or isn't Python
|
358
|
+
self.pid_file.unlink(missing_ok=True)
|
359
|
+
|
360
|
+
# Check if service responds to our health endpoint
|
361
|
+
try:
|
362
|
+
import requests
|
363
|
+
|
364
|
+
response = requests.get(
|
365
|
+
f"http://{self.host}:{self.port}/health", timeout=2
|
366
|
+
)
|
367
|
+
|
368
|
+
if response.status_code == 200:
|
369
|
+
# Try to get service info
|
370
|
+
try:
|
371
|
+
data = response.json()
|
372
|
+
if "claude" in str(data).lower() or "mpm" in str(data).lower():
|
373
|
+
# It's likely our service, try to find PID
|
374
|
+
pid = self._find_service_pid()
|
375
|
+
return True, pid
|
376
|
+
except:
|
377
|
+
pass
|
378
|
+
|
379
|
+
except:
|
380
|
+
pass
|
381
|
+
|
382
|
+
return False, None
|
383
|
+
|
384
|
+
except Exception as e:
|
385
|
+
self.logger.error(f"Error checking service ownership: {e}")
|
386
|
+
return False, None
|
387
|
+
|
388
|
+
def _find_service_pid(self) -> Optional[int]:
|
389
|
+
"""Find PID of service on our port using lsof.
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
PID if found, None otherwise
|
393
|
+
"""
|
394
|
+
try:
|
395
|
+
result = subprocess.run(
|
396
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
397
|
+
)
|
398
|
+
|
399
|
+
if result.returncode == 0 and result.stdout.strip():
|
400
|
+
pids = result.stdout.strip().split("\n")
|
401
|
+
if pids:
|
402
|
+
return int(pids[0].strip())
|
403
|
+
|
404
|
+
except:
|
405
|
+
pass
|
406
|
+
|
407
|
+
return None
|
408
|
+
|
409
|
+
def start_daemon(self, force_restart: bool = False) -> bool:
|
410
|
+
"""Start the daemon with automatic cleanup and retry.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
force_restart: Force restart even if already running
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
True if daemon started successfully
|
417
|
+
"""
|
418
|
+
with self._lock:
|
419
|
+
# Check if already running
|
420
|
+
if self.is_running():
|
421
|
+
if not force_restart:
|
422
|
+
pid = self.get_pid()
|
423
|
+
self.logger.info(f"Daemon already running with PID {pid}")
|
424
|
+
return True
|
425
|
+
|
426
|
+
# Stop existing daemon
|
427
|
+
self.logger.info("Force restarting daemon")
|
428
|
+
if not self.stop_daemon():
|
429
|
+
self.logger.error("Failed to stop existing daemon")
|
430
|
+
return False
|
431
|
+
|
432
|
+
# Wait for cleanup
|
433
|
+
time.sleep(2)
|
434
|
+
|
435
|
+
# Clean up port conflicts
|
436
|
+
if not self.cleanup_port_conflicts():
|
437
|
+
self.logger.error(f"Cannot start daemon - port {self.port} is in use")
|
438
|
+
return False
|
439
|
+
|
440
|
+
# Daemonize the process
|
441
|
+
return self.daemonize()
|
442
|
+
|
443
|
+
def daemonize(self) -> bool:
|
444
|
+
"""Daemonize the current process.
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
True if successful (in parent), doesn't return in child
|
448
|
+
"""
|
449
|
+
try:
|
450
|
+
# Clean up asyncio event loops before forking
|
451
|
+
self._cleanup_event_loops()
|
452
|
+
|
453
|
+
# Create status file for communication
|
454
|
+
with tempfile.NamedTemporaryFile(
|
455
|
+
mode="w", delete=False, suffix=".status"
|
456
|
+
) as f:
|
457
|
+
self.startup_status_file = f.name
|
458
|
+
f.write("starting")
|
459
|
+
|
460
|
+
# First fork
|
461
|
+
pid = os.fork()
|
462
|
+
if pid > 0:
|
463
|
+
# Parent process - wait for child to confirm startup
|
464
|
+
return self._parent_wait_for_startup(pid)
|
465
|
+
|
466
|
+
except OSError as e:
|
467
|
+
self.logger.error(f"First fork failed: {e}")
|
468
|
+
return False
|
469
|
+
|
470
|
+
# Child process continues...
|
471
|
+
|
472
|
+
# Decouple from parent
|
473
|
+
os.chdir("/")
|
474
|
+
os.setsid()
|
475
|
+
os.umask(0)
|
476
|
+
|
477
|
+
try:
|
478
|
+
# Second fork
|
479
|
+
pid = os.fork()
|
480
|
+
if pid > 0:
|
481
|
+
# First child exits
|
482
|
+
sys.exit(0)
|
483
|
+
except OSError as e:
|
484
|
+
self.logger.error(f"Second fork failed: {e}")
|
485
|
+
self._report_startup_error(f"Second fork failed: {e}")
|
486
|
+
sys.exit(1)
|
487
|
+
|
488
|
+
# Grandchild process - the actual daemon
|
489
|
+
|
490
|
+
# Write PID file
|
491
|
+
self.write_pid_file()
|
492
|
+
|
493
|
+
# Redirect streams
|
494
|
+
self._redirect_streams()
|
495
|
+
|
496
|
+
# Setup signal handlers
|
497
|
+
self._setup_signal_handlers()
|
498
|
+
|
499
|
+
self.logger.info(f"Daemon process started with PID {os.getpid()}")
|
500
|
+
|
501
|
+
# Report successful startup
|
502
|
+
self._report_startup_success()
|
503
|
+
|
504
|
+
# Note: Daemon process continues running
|
505
|
+
# Caller is responsible for running the actual service
|
506
|
+
return True
|
507
|
+
|
508
|
+
def stop_daemon(self, timeout: int = 10) -> bool:
|
509
|
+
"""Stop the daemon process.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
timeout: Maximum time to wait for daemon to stop
|
513
|
+
|
514
|
+
Returns:
|
515
|
+
True if stopped successfully
|
516
|
+
"""
|
517
|
+
with self._lock:
|
518
|
+
try:
|
519
|
+
pid = self.get_pid()
|
520
|
+
if not pid:
|
521
|
+
self.logger.info("No daemon PID found")
|
522
|
+
# Still try to clean up port
|
523
|
+
self.cleanup_port_conflicts()
|
524
|
+
return True
|
525
|
+
|
526
|
+
self.logger.info(f"Stopping daemon with PID {pid}")
|
527
|
+
|
528
|
+
# Send SIGTERM for graceful shutdown
|
529
|
+
try:
|
530
|
+
os.kill(pid, signal.SIGTERM)
|
531
|
+
except ProcessLookupError:
|
532
|
+
# Process already dead
|
533
|
+
self.cleanup_pid_file()
|
534
|
+
return True
|
535
|
+
|
536
|
+
# Wait for process to exit
|
537
|
+
start_time = time.time()
|
538
|
+
while time.time() - start_time < timeout:
|
539
|
+
try:
|
540
|
+
os.kill(pid, 0) # Check if still alive
|
541
|
+
time.sleep(0.5)
|
542
|
+
except ProcessLookupError:
|
543
|
+
# Process exited
|
544
|
+
self.cleanup_pid_file()
|
545
|
+
return True
|
546
|
+
|
547
|
+
# Force kill if still running
|
548
|
+
self.logger.warning("Daemon didn't stop gracefully, force killing")
|
549
|
+
try:
|
550
|
+
os.kill(pid, signal.SIGKILL)
|
551
|
+
time.sleep(1)
|
552
|
+
except ProcessLookupError:
|
553
|
+
pass
|
554
|
+
|
555
|
+
self.cleanup_pid_file()
|
556
|
+
return True
|
557
|
+
|
558
|
+
except Exception as e:
|
559
|
+
self.logger.error(f"Error stopping daemon: {e}")
|
560
|
+
return False
|
561
|
+
|
562
|
+
def is_running(self) -> bool:
|
563
|
+
"""Check if daemon is running.
|
564
|
+
|
565
|
+
Returns:
|
566
|
+
True if daemon is running
|
567
|
+
"""
|
568
|
+
try:
|
569
|
+
pid = self.get_pid()
|
570
|
+
if not pid:
|
571
|
+
return False
|
572
|
+
|
573
|
+
# Check if process exists
|
574
|
+
os.kill(pid, 0)
|
575
|
+
return True
|
576
|
+
|
577
|
+
except ProcessLookupError:
|
578
|
+
# Process doesn't exist
|
579
|
+
self.cleanup_pid_file()
|
580
|
+
return False
|
581
|
+
|
582
|
+
def get_pid(self) -> Optional[int]:
|
583
|
+
"""Get daemon PID from PID file.
|
584
|
+
|
585
|
+
Returns:
|
586
|
+
PID if found, None otherwise
|
587
|
+
"""
|
588
|
+
try:
|
589
|
+
if not self.pid_file.exists():
|
590
|
+
return None
|
591
|
+
|
592
|
+
with open(self.pid_file) as f:
|
593
|
+
return int(f.read().strip())
|
594
|
+
|
595
|
+
except Exception as e:
|
596
|
+
self.logger.error(f"Error reading PID file: {e}")
|
597
|
+
return None
|
598
|
+
|
599
|
+
def write_pid_file(self):
|
600
|
+
"""Write current PID to PID file."""
|
601
|
+
try:
|
602
|
+
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
|
603
|
+
with open(self.pid_file, "w") as f:
|
604
|
+
f.write(str(os.getpid()))
|
605
|
+
self.logger.debug(f"PID file written: {self.pid_file}")
|
606
|
+
except Exception as e:
|
607
|
+
self.logger.error(f"Error writing PID file: {e}")
|
608
|
+
raise
|
609
|
+
|
610
|
+
def cleanup_pid_file(self):
|
611
|
+
"""Remove PID file."""
|
612
|
+
try:
|
613
|
+
self.pid_file.unlink(missing_ok=True)
|
614
|
+
self.logger.debug("PID file removed")
|
615
|
+
except Exception as e:
|
616
|
+
self.logger.error(f"Error removing PID file: {e}")
|
617
|
+
|
618
|
+
def _cleanup_event_loops(self):
|
619
|
+
"""Clean up asyncio event loops before forking."""
|
620
|
+
try:
|
621
|
+
import asyncio
|
622
|
+
|
623
|
+
try:
|
624
|
+
loop = asyncio.get_event_loop()
|
625
|
+
if loop and not loop.is_closed():
|
626
|
+
# Cancel pending tasks
|
627
|
+
pending = asyncio.all_tasks(loop)
|
628
|
+
for task in pending:
|
629
|
+
task.cancel()
|
630
|
+
|
631
|
+
# Stop and close loop
|
632
|
+
if loop.is_running():
|
633
|
+
loop.stop()
|
634
|
+
|
635
|
+
asyncio.set_event_loop(None)
|
636
|
+
loop.close()
|
637
|
+
|
638
|
+
except RuntimeError:
|
639
|
+
# No event loop
|
640
|
+
pass
|
641
|
+
|
642
|
+
except Exception as e:
|
643
|
+
self.logger.debug(f"Error cleaning up event loops: {e}")
|
644
|
+
|
645
|
+
def _redirect_streams(self):
|
646
|
+
"""Redirect standard streams for daemon mode."""
|
647
|
+
try:
|
648
|
+
sys.stdout.flush()
|
649
|
+
sys.stderr.flush()
|
650
|
+
|
651
|
+
# Redirect stdin to /dev/null
|
652
|
+
with open("/dev/null") as null_in:
|
653
|
+
os.dup2(null_in.fileno(), sys.stdin.fileno())
|
654
|
+
|
655
|
+
# Redirect stdout and stderr to log file
|
656
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
657
|
+
with open(self.log_file, "a") as log_out:
|
658
|
+
os.dup2(log_out.fileno(), sys.stdout.fileno())
|
659
|
+
os.dup2(log_out.fileno(), sys.stderr.fileno())
|
660
|
+
|
661
|
+
except Exception as e:
|
662
|
+
self.logger.error(f"Error redirecting streams: {e}")
|
663
|
+
|
664
|
+
def _setup_signal_handlers(self):
|
665
|
+
"""Setup signal handlers for graceful shutdown."""
|
666
|
+
|
667
|
+
def signal_handler(signum, frame):
|
668
|
+
self.logger.info(f"Received signal {signum}, shutting down")
|
669
|
+
self.cleanup_pid_file()
|
670
|
+
sys.exit(0)
|
671
|
+
|
672
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
673
|
+
signal.signal(signal.SIGINT, signal_handler)
|
674
|
+
|
675
|
+
def _parent_wait_for_startup(self, child_pid: int, timeout: float = 10.0) -> bool:
|
676
|
+
"""Parent process waits for child to confirm startup.
|
677
|
+
|
678
|
+
Args:
|
679
|
+
child_pid: PID of child process
|
680
|
+
timeout: Maximum time to wait
|
681
|
+
|
682
|
+
Returns:
|
683
|
+
True if child started successfully
|
684
|
+
"""
|
685
|
+
try:
|
686
|
+
start_time = time.time()
|
687
|
+
|
688
|
+
while time.time() - start_time < timeout:
|
689
|
+
if (
|
690
|
+
not self.startup_status_file
|
691
|
+
or not Path(self.startup_status_file).exists()
|
692
|
+
):
|
693
|
+
time.sleep(0.1)
|
694
|
+
continue
|
695
|
+
|
696
|
+
try:
|
697
|
+
with open(self.startup_status_file) as f:
|
698
|
+
status = f.read().strip()
|
699
|
+
|
700
|
+
if status == "success":
|
701
|
+
# Cleanup status file
|
702
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
703
|
+
return True
|
704
|
+
|
705
|
+
if status.startswith("error:"):
|
706
|
+
error_msg = status[6:]
|
707
|
+
self.logger.error(f"Daemon startup failed: {error_msg}")
|
708
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
709
|
+
return False
|
710
|
+
|
711
|
+
except Exception:
|
712
|
+
pass
|
713
|
+
|
714
|
+
time.sleep(0.1)
|
715
|
+
|
716
|
+
self.logger.error("Daemon startup timed out")
|
717
|
+
return False
|
718
|
+
|
719
|
+
except Exception as e:
|
720
|
+
self.logger.error(f"Error waiting for daemon startup: {e}")
|
721
|
+
return False
|
722
|
+
|
723
|
+
def _report_startup_success(self):
|
724
|
+
"""Report successful startup to parent process."""
|
725
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
726
|
+
try:
|
727
|
+
with open(self.startup_status_file, "w") as f:
|
728
|
+
f.write("success")
|
729
|
+
except Exception as e:
|
730
|
+
self.logger.error(f"Error reporting startup success: {e}")
|
731
|
+
|
732
|
+
def _report_startup_error(self, error: str):
|
733
|
+
"""Report startup error to parent process."""
|
734
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
735
|
+
try:
|
736
|
+
with open(self.startup_status_file, "w") as f:
|
737
|
+
f.write(f"error:{error}")
|
738
|
+
except Exception as e:
|
739
|
+
self.logger.error(f"Error reporting startup error: {e}")
|