claude-mpm 4.2.29__py3-none-any.whl → 4.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/services/cli/unified_dashboard_manager.py +39 -97
- claude_mpm/services/monitor/daemon.py +53 -163
- claude_mpm/services/monitor/daemon_manager.py +739 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/METADATA +1 -1
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/RECORD +10 -9
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/WHEEL +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.32.dist-info}/top_level.txt +0 -0
claude_mpm/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
4.2.
|
1
|
+
4.2.32
|
@@ -25,6 +25,7 @@ import requests
|
|
25
25
|
|
26
26
|
from ...core.logging_config import get_logger
|
27
27
|
from ...services.monitor.daemon import UnifiedMonitorDaemon
|
28
|
+
from ...services.monitor.daemon_manager import DaemonManager
|
28
29
|
from ...services.port_manager import PortManager
|
29
30
|
|
30
31
|
|
@@ -99,8 +100,9 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
99
100
|
host="localhost", port=port, daemon_mode=background
|
100
101
|
)
|
101
102
|
|
102
|
-
#
|
103
|
-
|
103
|
+
# Use daemon manager to check service ownership
|
104
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
105
|
+
is_ours, pid = daemon_mgr.is_our_service()
|
104
106
|
|
105
107
|
if is_ours and not force_restart:
|
106
108
|
# Our service is already running, just open browser if needed
|
@@ -115,12 +117,16 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
115
117
|
self.logger.info(
|
116
118
|
f"Force restarting our dashboard on port {port} (PID: {pid})"
|
117
119
|
)
|
118
|
-
#
|
119
|
-
|
120
|
+
# Use daemon manager for cleanup
|
121
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
122
|
+
daemon_mgr.cleanup_port_conflicts()
|
120
123
|
elif self.is_dashboard_running(port) and not force_restart:
|
121
124
|
# Different service is using the port - try to clean it up
|
122
|
-
self.logger.warning(
|
123
|
-
|
125
|
+
self.logger.warning(
|
126
|
+
f"Port {port} is in use by a different service, attempting cleanup"
|
127
|
+
)
|
128
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
129
|
+
daemon_mgr.cleanup_port_conflicts()
|
124
130
|
# Brief pause to ensure cleanup is complete
|
125
131
|
time.sleep(1)
|
126
132
|
|
@@ -130,39 +136,49 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
130
136
|
|
131
137
|
if background:
|
132
138
|
# Always try to clean up first before starting
|
133
|
-
self.logger.info(
|
134
|
-
|
139
|
+
self.logger.info(
|
140
|
+
f"Pre-emptively cleaning up port {port} before starting daemon"
|
141
|
+
)
|
142
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
143
|
+
if not daemon_mgr.cleanup_port_conflicts():
|
135
144
|
self.logger.error(f"Failed to clean up port {port}, cannot proceed")
|
136
145
|
return False, False
|
137
|
-
|
146
|
+
|
138
147
|
# Try to start daemon with retry on port conflicts
|
139
148
|
max_retries = 3
|
140
149
|
retry_count = 0
|
141
150
|
success = False
|
142
|
-
|
151
|
+
|
143
152
|
while retry_count < max_retries and not success:
|
144
153
|
if retry_count > 0:
|
145
|
-
self.logger.info(
|
146
|
-
|
154
|
+
self.logger.info(
|
155
|
+
f"Retry {retry_count}/{max_retries}: Cleaning up port {port}"
|
156
|
+
)
|
157
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
158
|
+
if not daemon_mgr.cleanup_port_conflicts():
|
147
159
|
self.logger.error(f"Cleanup failed on retry {retry_count}")
|
148
160
|
break
|
149
161
|
time.sleep(3) # Longer wait for cleanup to complete
|
150
|
-
|
162
|
+
|
151
163
|
# Start daemon in background mode with force restart if needed
|
152
164
|
success = daemon.start(force_restart=True) # Always force restart
|
153
|
-
|
165
|
+
|
154
166
|
if not success and retry_count < max_retries - 1:
|
155
167
|
# Check if it's a port conflict
|
156
168
|
if not self.port_manager.is_port_available(port):
|
157
|
-
self.logger.warning(
|
169
|
+
self.logger.warning(
|
170
|
+
f"Port {port} still in use, will retry cleanup"
|
171
|
+
)
|
158
172
|
retry_count += 1
|
159
173
|
else:
|
160
174
|
# Different kind of failure, don't retry
|
161
|
-
self.logger.error(
|
175
|
+
self.logger.error(
|
176
|
+
"Daemon start failed for reason other than port conflict"
|
177
|
+
)
|
162
178
|
break
|
163
179
|
else:
|
164
180
|
break
|
165
|
-
|
181
|
+
|
166
182
|
if success:
|
167
183
|
with self._lock:
|
168
184
|
self._background_daemons[port] = daemon
|
@@ -350,91 +366,17 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
350
366
|
def _cleanup_port_conflicts(self, port: int) -> bool:
|
351
367
|
"""
|
352
368
|
Try to clean up any processes using our port.
|
353
|
-
|
369
|
+
|
370
|
+
Delegates to the consolidated DaemonManager for consistent behavior.
|
371
|
+
|
354
372
|
Args:
|
355
373
|
port: Port to clean up
|
356
|
-
|
374
|
+
|
357
375
|
Returns:
|
358
376
|
True if cleanup was successful or not needed
|
359
377
|
"""
|
360
|
-
|
361
|
-
|
362
|
-
import signal
|
363
|
-
import time
|
364
|
-
|
365
|
-
# Find processes using the port
|
366
|
-
result = subprocess.run(
|
367
|
-
["lsof", "-ti", f":{port}"],
|
368
|
-
capture_output=True,
|
369
|
-
text=True
|
370
|
-
)
|
371
|
-
|
372
|
-
if result.returncode == 0 and result.stdout.strip():
|
373
|
-
pids = result.stdout.strip().split('\n')
|
374
|
-
self.logger.info(f"Found processes using port {port}: {pids}")
|
375
|
-
|
376
|
-
for pid_str in pids:
|
377
|
-
try:
|
378
|
-
pid = int(pid_str.strip())
|
379
|
-
# Try graceful termination first
|
380
|
-
import os
|
381
|
-
os.kill(pid, signal.SIGTERM)
|
382
|
-
self.logger.info(f"Sent SIGTERM to process {pid}")
|
383
|
-
except (ValueError, ProcessLookupError) as e:
|
384
|
-
self.logger.debug(f"Could not terminate process {pid_str}: {e}")
|
385
|
-
continue
|
386
|
-
|
387
|
-
# Give processes time to shut down gracefully
|
388
|
-
time.sleep(3)
|
389
|
-
|
390
|
-
# Check if port is still in use and force kill if needed
|
391
|
-
result = subprocess.run(
|
392
|
-
["lsof", "-ti", f":{port}"],
|
393
|
-
capture_output=True,
|
394
|
-
text=True
|
395
|
-
)
|
396
|
-
|
397
|
-
if result.returncode == 0 and result.stdout.strip():
|
398
|
-
remaining_pids = result.stdout.strip().split('\n')
|
399
|
-
self.logger.warning(f"Processes still using port {port}: {remaining_pids}, force killing")
|
400
|
-
|
401
|
-
for pid_str in remaining_pids:
|
402
|
-
try:
|
403
|
-
pid = int(pid_str.strip())
|
404
|
-
os.kill(pid, signal.SIGKILL)
|
405
|
-
self.logger.info(f"Force killed process {pid}")
|
406
|
-
except (ValueError, ProcessLookupError) as e:
|
407
|
-
self.logger.debug(f"Could not force kill process {pid_str}: {e}")
|
408
|
-
continue
|
409
|
-
|
410
|
-
# Longer pause after force kill to ensure port is fully released
|
411
|
-
time.sleep(5)
|
412
|
-
|
413
|
-
# Final verification that port is actually free
|
414
|
-
final_check = subprocess.run(
|
415
|
-
["lsof", "-ti", f":{port}"],
|
416
|
-
capture_output=True,
|
417
|
-
text=True
|
418
|
-
)
|
419
|
-
|
420
|
-
if final_check.returncode == 0 and final_check.stdout.strip():
|
421
|
-
self.logger.error(f"Failed to clean up port {port} - processes still running: {final_check.stdout.strip()}")
|
422
|
-
return False
|
423
|
-
|
424
|
-
self.logger.info(f"Successfully cleaned up processes on port {port}")
|
425
|
-
return True
|
426
|
-
else:
|
427
|
-
self.logger.debug(f"No processes found using port {port}")
|
428
|
-
return True
|
429
|
-
|
430
|
-
except FileNotFoundError:
|
431
|
-
# lsof not available, try alternative approach
|
432
|
-
self.logger.debug("lsof not available, skipping port cleanup")
|
433
|
-
return True
|
434
|
-
except Exception as e:
|
435
|
-
self.logger.warning(f"Error during port cleanup: {e}")
|
436
|
-
# Continue anyway - the port check will catch actual conflicts
|
437
|
-
return True
|
378
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
379
|
+
return daemon_mgr.cleanup_port_conflicts()
|
438
380
|
|
439
381
|
def start_server(
|
440
382
|
self, port: Optional[int] = None, timeout: int = 30, force_restart: bool = True
|
@@ -24,6 +24,7 @@ from typing import Optional
|
|
24
24
|
|
25
25
|
from ...core.logging_config import get_logger
|
26
26
|
from ..hook_installer_service import HookInstallerService
|
27
|
+
from .daemon_manager import DaemonManager
|
27
28
|
from .management.health import HealthMonitor
|
28
29
|
from .management.lifecycle import DaemonLifecycle
|
29
30
|
from .server import UnifiedMonitorServer
|
@@ -58,7 +59,15 @@ class UnifiedMonitorDaemon:
|
|
58
59
|
self.daemon_mode = daemon_mode
|
59
60
|
self.logger = get_logger(__name__)
|
60
61
|
|
61
|
-
#
|
62
|
+
# Use new consolidated DaemonManager for all daemon operations
|
63
|
+
self.daemon_manager = DaemonManager(
|
64
|
+
port=port,
|
65
|
+
host=host,
|
66
|
+
pid_file=pid_file or self._get_default_pid_file(),
|
67
|
+
log_file=log_file,
|
68
|
+
)
|
69
|
+
|
70
|
+
# Keep lifecycle for backward compatibility (delegates to daemon_manager)
|
62
71
|
self.lifecycle = DaemonLifecycle(
|
63
72
|
pid_file=pid_file or self._get_default_pid_file(),
|
64
73
|
log_file=log_file,
|
@@ -104,84 +113,13 @@ class UnifiedMonitorDaemon:
|
|
104
113
|
|
105
114
|
def _cleanup_port_conflicts(self) -> bool:
|
106
115
|
"""Try to clean up any processes using our port.
|
107
|
-
|
116
|
+
|
117
|
+
Delegates to the consolidated DaemonManager for consistent behavior.
|
118
|
+
|
108
119
|
Returns:
|
109
120
|
True if cleanup was successful, False otherwise
|
110
121
|
"""
|
111
|
-
|
112
|
-
# Find process using the port
|
113
|
-
import subprocess
|
114
|
-
result = subprocess.run(
|
115
|
-
["lsof", "-ti", f":{self.port}"],
|
116
|
-
capture_output=True,
|
117
|
-
text=True
|
118
|
-
)
|
119
|
-
|
120
|
-
if result.returncode == 0 and result.stdout.strip():
|
121
|
-
pids = result.stdout.strip().split('\n')
|
122
|
-
for pid_str in pids:
|
123
|
-
try:
|
124
|
-
pid = int(pid_str.strip())
|
125
|
-
self.logger.info(f"Found process {pid} using port {self.port}")
|
126
|
-
|
127
|
-
# Check if it's a claude-mpm process
|
128
|
-
process_info = subprocess.run(
|
129
|
-
["ps", "-p", str(pid), "-o", "comm="],
|
130
|
-
capture_output=True,
|
131
|
-
text=True
|
132
|
-
)
|
133
|
-
|
134
|
-
if "python" in process_info.stdout.lower() or "claude" in process_info.stdout.lower():
|
135
|
-
self.logger.info(f"Killing process {pid} (appears to be Python/Claude related)")
|
136
|
-
os.kill(pid, signal.SIGTERM)
|
137
|
-
time.sleep(1)
|
138
|
-
|
139
|
-
# Check if still alive
|
140
|
-
try:
|
141
|
-
os.kill(pid, 0)
|
142
|
-
# Still alive, force kill
|
143
|
-
self.logger.warning(f"Process {pid} didn't terminate, force killing")
|
144
|
-
os.kill(pid, signal.SIGKILL)
|
145
|
-
time.sleep(1)
|
146
|
-
except ProcessLookupError:
|
147
|
-
pass
|
148
|
-
else:
|
149
|
-
self.logger.warning(f"Process {pid} is not a Claude MPM process: {process_info.stdout}")
|
150
|
-
return False
|
151
|
-
except (ValueError, ProcessLookupError) as e:
|
152
|
-
self.logger.debug(f"Error handling PID {pid_str}: {e}")
|
153
|
-
continue
|
154
|
-
|
155
|
-
return True
|
156
|
-
|
157
|
-
except FileNotFoundError:
|
158
|
-
# lsof not available, try alternative method
|
159
|
-
self.logger.debug("lsof not available, using alternative cleanup")
|
160
|
-
|
161
|
-
# Check if there's an orphaned service we can identify
|
162
|
-
is_ours, pid = self.lifecycle.is_our_service(self.host)
|
163
|
-
if is_ours and pid:
|
164
|
-
try:
|
165
|
-
self.logger.info(f"Killing orphaned Claude MPM service (PID: {pid})")
|
166
|
-
os.kill(pid, signal.SIGTERM)
|
167
|
-
time.sleep(1)
|
168
|
-
|
169
|
-
# Check if still alive
|
170
|
-
try:
|
171
|
-
os.kill(pid, 0)
|
172
|
-
os.kill(pid, signal.SIGKILL)
|
173
|
-
time.sleep(1)
|
174
|
-
except ProcessLookupError:
|
175
|
-
pass
|
176
|
-
|
177
|
-
return True
|
178
|
-
except Exception as e:
|
179
|
-
self.logger.error(f"Failed to kill process: {e}")
|
180
|
-
|
181
|
-
except Exception as e:
|
182
|
-
self.logger.error(f"Error during port cleanup: {e}")
|
183
|
-
|
184
|
-
return False
|
122
|
+
return self.daemon_manager.cleanup_port_conflicts()
|
185
123
|
|
186
124
|
def _start_daemon(self, force_restart: bool = False) -> bool:
|
187
125
|
"""Start as background daemon process.
|
@@ -191,108 +129,60 @@ class UnifiedMonitorDaemon:
|
|
191
129
|
"""
|
192
130
|
self.logger.info("Starting unified monitor daemon in background mode")
|
193
131
|
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
132
|
+
# Always use daemon manager for cleanup first
|
133
|
+
# This ensures consistent behavior and prevents race conditions
|
134
|
+
if force_restart:
|
135
|
+
self.logger.info(
|
136
|
+
"Force restart requested, cleaning up any existing processes..."
|
137
|
+
)
|
138
|
+
if not self.daemon_manager.cleanup_port_conflicts(max_retries=3):
|
139
|
+
self.logger.error(f"Failed to clean up port {self.port}")
|
140
|
+
return False
|
141
|
+
# Wait for port to be fully released
|
142
|
+
time.sleep(2)
|
204
143
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
# Stop the existing daemon
|
210
|
-
if self.lifecycle.stop_daemon():
|
211
|
-
# Wait a moment for port to be released
|
212
|
-
time.sleep(2)
|
213
|
-
else:
|
214
|
-
self.logger.error("Failed to stop existing daemon for restart")
|
215
|
-
return False
|
216
|
-
else:
|
217
|
-
self.logger.warning(
|
218
|
-
f"Port {self.port} is in use by another service (PID: {existing_pid}). Cannot force restart."
|
219
|
-
)
|
220
|
-
self.logger.info(
|
221
|
-
"To restart the claude-mpm monitor, first stop the other service or use a different port."
|
222
|
-
)
|
223
|
-
return False
|
224
|
-
else:
|
144
|
+
# Check if already running via daemon manager
|
145
|
+
if self.daemon_manager.is_running():
|
146
|
+
existing_pid = self.daemon_manager.get_pid()
|
147
|
+
if not force_restart:
|
225
148
|
self.logger.warning(f"Daemon already running with PID {existing_pid}")
|
226
149
|
return False
|
150
|
+
# Force restart was already handled above
|
227
151
|
|
228
|
-
# Check for
|
229
|
-
|
230
|
-
|
231
|
-
|
152
|
+
# Check for our service on the port
|
153
|
+
is_ours, pid = self.daemon_manager.is_our_service()
|
154
|
+
if is_ours and pid and not force_restart:
|
155
|
+
self.logger.warning(
|
156
|
+
f"Our service already running on port {self.port} (PID: {pid})"
|
232
157
|
)
|
233
|
-
|
234
|
-
if is_ours and pid:
|
235
|
-
self.logger.info(
|
236
|
-
f"Found orphaned claude-mpm monitor service (PID: {pid}), force restarting"
|
237
|
-
)
|
238
|
-
# Try to kill the orphaned process
|
239
|
-
try:
|
240
|
-
os.kill(pid, signal.SIGTERM)
|
241
|
-
# Wait for it to exit
|
242
|
-
for _ in range(10):
|
243
|
-
try:
|
244
|
-
os.kill(pid, 0) # Check if still exists
|
245
|
-
time.sleep(0.5)
|
246
|
-
except ProcessLookupError:
|
247
|
-
break
|
248
|
-
else:
|
249
|
-
# Force kill if still running
|
250
|
-
os.kill(pid, signal.SIGKILL)
|
251
|
-
time.sleep(1)
|
252
|
-
except Exception as e:
|
253
|
-
self.logger.error(f"Failed to kill orphaned process: {e}")
|
254
|
-
return False
|
255
|
-
|
256
|
-
# Check port availability and clean up if needed
|
257
|
-
port_available, error_msg = self.lifecycle.verify_port_available(self.host)
|
258
|
-
if not port_available:
|
259
|
-
self.logger.warning(f"Port {self.port} is not available: {error_msg}")
|
260
|
-
|
261
|
-
# Try to identify and kill any process using the port
|
262
|
-
self.logger.info("Attempting to clean up processes on port...")
|
263
|
-
cleaned = self._cleanup_port_conflicts()
|
264
|
-
|
265
|
-
if cleaned:
|
266
|
-
# Wait longer for port to be released to avoid race conditions
|
267
|
-
time.sleep(3)
|
268
|
-
# Check again
|
269
|
-
port_available, error_msg = self.lifecycle.verify_port_available(self.host)
|
270
|
-
|
271
|
-
if not port_available:
|
272
|
-
self.logger.error(f"Port {self.port} is still not available after cleanup: {error_msg}")
|
273
|
-
print(f"Error: {error_msg}", file=sys.stderr)
|
274
|
-
print(f"Try 'claude-mpm monitor stop' or use --force flag", file=sys.stderr)
|
275
|
-
return False
|
158
|
+
return False
|
276
159
|
|
277
160
|
# Wait for any pre-warming threads to complete before forking
|
278
161
|
self._wait_for_prewarm_completion()
|
279
162
|
|
280
|
-
#
|
281
|
-
|
163
|
+
# Use daemon manager's daemonize which includes cleanup
|
164
|
+
self.daemon_manager.startup_status_file = None # Reset status file
|
165
|
+
success = self.daemon_manager.daemonize()
|
282
166
|
if not success:
|
283
167
|
return False
|
284
168
|
|
169
|
+
# We're now in the daemon process
|
170
|
+
# Update our PID references
|
171
|
+
self.lifecycle.pid_file = self.daemon_manager.pid_file
|
172
|
+
|
285
173
|
# Start the server in daemon mode
|
286
|
-
# This will run in the child process
|
287
174
|
try:
|
288
175
|
result = self._run_server()
|
289
176
|
if not result:
|
290
177
|
# Report failure before exiting
|
291
|
-
self.
|
178
|
+
self.daemon_manager._report_startup_error("Failed to start server")
|
179
|
+
else:
|
180
|
+
# Report success
|
181
|
+
self.daemon_manager._report_startup_success()
|
292
182
|
return result
|
293
183
|
except Exception as e:
|
294
184
|
# Report any exceptions during startup
|
295
|
-
self.
|
185
|
+
self.daemon_manager._report_startup_error(f"Server startup exception: {e}")
|
296
186
|
raise
|
297
187
|
|
298
188
|
def _start_foreground(self, force_restart: bool = False) -> bool:
|
@@ -302,12 +192,12 @@ class UnifiedMonitorDaemon:
|
|
302
192
|
force_restart: If True, restart existing service if it's ours
|
303
193
|
"""
|
304
194
|
self.logger.info(f"Starting unified monitor daemon on {self.host}:{self.port}")
|
305
|
-
|
306
|
-
#
|
195
|
+
|
196
|
+
# Use daemon manager for consistent port cleanup
|
307
197
|
# This helps with race conditions where old processes haven't fully released the port
|
308
198
|
if force_restart:
|
309
199
|
self.logger.info("Force restart requested, cleaning up port conflicts...")
|
310
|
-
self.
|
200
|
+
self.daemon_manager.cleanup_port_conflicts(max_retries=2)
|
311
201
|
time.sleep(1) # Brief pause to ensure port is released
|
312
202
|
|
313
203
|
# Check if already running (check PID file even in foreground mode)
|
@@ -319,7 +209,7 @@ class UnifiedMonitorDaemon:
|
|
319
209
|
self.logger.debug(
|
320
210
|
f"Checking if existing daemon (PID: {existing_pid}) is our service..."
|
321
211
|
)
|
322
|
-
is_ours, detected_pid = self.
|
212
|
+
is_ours, detected_pid = self.daemon_manager.is_our_service()
|
323
213
|
|
324
214
|
if is_ours:
|
325
215
|
self.logger.info(
|
@@ -351,7 +241,7 @@ class UnifiedMonitorDaemon:
|
|
351
241
|
self.logger.debug(
|
352
242
|
"No PID file found, checking for orphaned claude-mpm service..."
|
353
243
|
)
|
354
|
-
is_ours, pid = self.
|
244
|
+
is_ours, pid = self.daemon_manager.is_our_service()
|
355
245
|
if is_ours and pid:
|
356
246
|
self.logger.info(
|
357
247
|
f"Found orphaned claude-mpm monitor service (PID: {pid}), force restarting"
|
@@ -0,0 +1,739 @@
|
|
1
|
+
"""
|
2
|
+
Unified Daemon Manager Service
|
3
|
+
==============================
|
4
|
+
|
5
|
+
WHY: This service consolidates ALL daemon lifecycle operations into a single place,
|
6
|
+
eliminating duplicate code and race conditions from having daemon management logic
|
7
|
+
scattered across multiple files.
|
8
|
+
|
9
|
+
DESIGN DECISIONS:
|
10
|
+
- Single source of truth for all daemon operations
|
11
|
+
- Robust port cleanup with retry logic
|
12
|
+
- Thread-safe operations with proper locking
|
13
|
+
- Comprehensive error handling and recovery
|
14
|
+
- Supports both foreground and background/daemon modes
|
15
|
+
- Manages PID files, port conflicts, and process lifecycle
|
16
|
+
|
17
|
+
This replaces duplicate logic that was in:
|
18
|
+
- UnifiedMonitorDaemon._cleanup_port_conflicts()
|
19
|
+
- UnifiedDashboardManager._cleanup_port_conflicts()
|
20
|
+
- Various daemon startup/stop logic spread across files
|
21
|
+
"""
|
22
|
+
|
23
|
+
import os
|
24
|
+
import signal
|
25
|
+
import socket
|
26
|
+
import subprocess
|
27
|
+
import sys
|
28
|
+
import tempfile
|
29
|
+
import threading
|
30
|
+
import time
|
31
|
+
from pathlib import Path
|
32
|
+
from typing import Optional, Tuple
|
33
|
+
|
34
|
+
from ...core.logging_config import get_logger
|
35
|
+
|
36
|
+
|
37
|
+
class DaemonManager:
|
38
|
+
"""Centralized manager for all daemon lifecycle operations.
|
39
|
+
|
40
|
+
This is the SINGLE source of truth for:
|
41
|
+
- Port conflict resolution
|
42
|
+
- Process cleanup
|
43
|
+
- Daemon startup/stop
|
44
|
+
- PID file management
|
45
|
+
- Service detection
|
46
|
+
"""
|
47
|
+
|
48
|
+
# Class-level lock for thread safety
|
49
|
+
_lock = threading.Lock()
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
port: int = 8765,
|
54
|
+
host: str = "localhost",
|
55
|
+
pid_file: Optional[str] = None,
|
56
|
+
log_file: Optional[str] = None,
|
57
|
+
):
|
58
|
+
"""Initialize the daemon manager.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
port: Port number for the daemon
|
62
|
+
host: Host to bind to
|
63
|
+
pid_file: Path to PID file (uses default if None)
|
64
|
+
log_file: Path to log file for daemon mode
|
65
|
+
"""
|
66
|
+
self.port = port
|
67
|
+
self.host = host
|
68
|
+
self.logger = get_logger(__name__)
|
69
|
+
|
70
|
+
# Set up paths
|
71
|
+
if pid_file:
|
72
|
+
self.pid_file = Path(pid_file)
|
73
|
+
else:
|
74
|
+
self.pid_file = self._get_default_pid_file()
|
75
|
+
|
76
|
+
self.log_file = Path(log_file) if log_file else self._get_default_log_file()
|
77
|
+
|
78
|
+
# Startup status communication
|
79
|
+
self.startup_status_file = None
|
80
|
+
|
81
|
+
def _get_default_pid_file(self) -> Path:
|
82
|
+
"""Get default PID file path."""
|
83
|
+
project_root = Path.cwd()
|
84
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
85
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
86
|
+
return claude_mpm_dir / "monitor-daemon.pid"
|
87
|
+
|
88
|
+
def _get_default_log_file(self) -> Path:
|
89
|
+
"""Get default log file path."""
|
90
|
+
project_root = Path.cwd()
|
91
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
92
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
93
|
+
return claude_mpm_dir / "monitor-daemon.log"
|
94
|
+
|
95
|
+
def cleanup_port_conflicts(self, max_retries: int = 3) -> bool:
|
96
|
+
"""Clean up any processes using the daemon port.
|
97
|
+
|
98
|
+
This is the SINGLE implementation for port cleanup, replacing
|
99
|
+
duplicate logic in multiple files.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
max_retries: Maximum number of cleanup attempts
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
True if port is available after cleanup, False otherwise
|
106
|
+
"""
|
107
|
+
with self._lock:
|
108
|
+
for attempt in range(max_retries):
|
109
|
+
if attempt > 0:
|
110
|
+
self.logger.info(
|
111
|
+
f"Port cleanup attempt {attempt + 1}/{max_retries}"
|
112
|
+
)
|
113
|
+
|
114
|
+
# First check if port is actually in use
|
115
|
+
if self._is_port_available():
|
116
|
+
self.logger.debug(f"Port {self.port} is available")
|
117
|
+
return True
|
118
|
+
|
119
|
+
self.logger.info(f"Port {self.port} is in use, attempting cleanup")
|
120
|
+
|
121
|
+
# Try to find and kill processes using the port
|
122
|
+
if self._kill_processes_on_port():
|
123
|
+
# Wait for port to be released
|
124
|
+
time.sleep(2 if attempt == 0 else 3)
|
125
|
+
|
126
|
+
# Verify port is now free
|
127
|
+
if self._is_port_available():
|
128
|
+
self.logger.info(f"Port {self.port} successfully cleaned up")
|
129
|
+
return True
|
130
|
+
|
131
|
+
if attempt < max_retries - 1:
|
132
|
+
# Wait longer between attempts
|
133
|
+
time.sleep(3)
|
134
|
+
|
135
|
+
self.logger.error(
|
136
|
+
f"Failed to clean up port {self.port} after {max_retries} attempts"
|
137
|
+
)
|
138
|
+
return False
|
139
|
+
|
140
|
+
def _is_port_available(self) -> bool:
|
141
|
+
"""Check if the port is available for binding.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
True if port is available, False otherwise
|
145
|
+
"""
|
146
|
+
try:
|
147
|
+
test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
148
|
+
test_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
149
|
+
test_sock.bind((self.host, self.port))
|
150
|
+
test_sock.close()
|
151
|
+
return True
|
152
|
+
except OSError:
|
153
|
+
return False
|
154
|
+
|
155
|
+
def _kill_processes_on_port(self) -> bool:
|
156
|
+
"""Kill processes using the daemon port.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
True if processes were killed or none found, False on error
|
160
|
+
"""
|
161
|
+
try:
|
162
|
+
# Try using lsof first (most reliable)
|
163
|
+
if self._kill_using_lsof():
|
164
|
+
return True
|
165
|
+
|
166
|
+
# Fallback to checking our known PID file
|
167
|
+
if self._kill_using_pid_file():
|
168
|
+
return True
|
169
|
+
|
170
|
+
# Try to identify claude-mpm processes
|
171
|
+
if self._kill_claude_mpm_processes():
|
172
|
+
return True
|
173
|
+
|
174
|
+
return False
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
self.logger.error(f"Error killing processes on port: {e}")
|
178
|
+
return False
|
179
|
+
|
180
|
+
def _kill_using_lsof(self) -> bool:
|
181
|
+
"""Kill processes using lsof to find them.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
True if successful or lsof not available, False on error
|
185
|
+
"""
|
186
|
+
try:
|
187
|
+
# Find processes using the port
|
188
|
+
result = subprocess.run(
|
189
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
190
|
+
)
|
191
|
+
|
192
|
+
if result.returncode != 0 or not result.stdout.strip():
|
193
|
+
self.logger.debug(f"No processes found using port {self.port}")
|
194
|
+
return True
|
195
|
+
|
196
|
+
pids = result.stdout.strip().split("\n")
|
197
|
+
self.logger.info(f"Found processes using port {self.port}: {pids}")
|
198
|
+
|
199
|
+
# Kill each process
|
200
|
+
for pid_str in pids:
|
201
|
+
try:
|
202
|
+
pid = int(pid_str.strip())
|
203
|
+
|
204
|
+
# Check if it's a Python/Claude process
|
205
|
+
process_info = subprocess.run(
|
206
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
207
|
+
capture_output=True,
|
208
|
+
text=True, check=False,
|
209
|
+
)
|
210
|
+
|
211
|
+
process_name = process_info.stdout.strip().lower()
|
212
|
+
if "python" in process_name or "claude" in process_name:
|
213
|
+
self.logger.info(f"Killing Python/Claude process {pid}")
|
214
|
+
os.kill(pid, signal.SIGTERM)
|
215
|
+
|
216
|
+
# Wait briefly for graceful shutdown
|
217
|
+
time.sleep(1)
|
218
|
+
|
219
|
+
# Check if still alive and force kill if needed
|
220
|
+
try:
|
221
|
+
os.kill(pid, 0) # Check if process exists
|
222
|
+
self.logger.warning(
|
223
|
+
f"Process {pid} didn't terminate, force killing"
|
224
|
+
)
|
225
|
+
os.kill(pid, signal.SIGKILL)
|
226
|
+
time.sleep(0.5)
|
227
|
+
except ProcessLookupError:
|
228
|
+
pass # Process already dead
|
229
|
+
else:
|
230
|
+
self.logger.warning(
|
231
|
+
f"Process {pid} ({process_name}) is not a Claude MPM process"
|
232
|
+
)
|
233
|
+
return False
|
234
|
+
|
235
|
+
except (ValueError, ProcessLookupError) as e:
|
236
|
+
self.logger.debug(f"Error handling PID {pid_str}: {e}")
|
237
|
+
continue
|
238
|
+
|
239
|
+
return True
|
240
|
+
|
241
|
+
except FileNotFoundError:
|
242
|
+
# lsof not available
|
243
|
+
self.logger.debug("lsof not available, using alternative methods")
|
244
|
+
return True
|
245
|
+
except Exception as e:
|
246
|
+
self.logger.error(f"Error using lsof: {e}")
|
247
|
+
return False
|
248
|
+
|
249
|
+
def _kill_using_pid_file(self) -> bool:
|
250
|
+
"""Kill process using PID file.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
True if successful or no PID file, False on error
|
254
|
+
"""
|
255
|
+
try:
|
256
|
+
if not self.pid_file.exists():
|
257
|
+
return True
|
258
|
+
|
259
|
+
with open(self.pid_file) as f:
|
260
|
+
pid = int(f.read().strip())
|
261
|
+
|
262
|
+
self.logger.info(f"Found PID {pid} in PID file")
|
263
|
+
|
264
|
+
# Kill the process
|
265
|
+
try:
|
266
|
+
os.kill(pid, signal.SIGTERM)
|
267
|
+
time.sleep(1)
|
268
|
+
|
269
|
+
# Check if still alive
|
270
|
+
try:
|
271
|
+
os.kill(pid, 0)
|
272
|
+
os.kill(pid, signal.SIGKILL)
|
273
|
+
time.sleep(0.5)
|
274
|
+
except ProcessLookupError:
|
275
|
+
pass
|
276
|
+
|
277
|
+
# Remove PID file
|
278
|
+
self.pid_file.unlink(missing_ok=True)
|
279
|
+
return True
|
280
|
+
|
281
|
+
except ProcessLookupError:
|
282
|
+
# Process doesn't exist, just remove PID file
|
283
|
+
self.pid_file.unlink(missing_ok=True)
|
284
|
+
return True
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
self.logger.error(f"Error killing process from PID file: {e}")
|
288
|
+
return False
|
289
|
+
|
290
|
+
def _kill_claude_mpm_processes(self) -> bool:
|
291
|
+
"""Kill any claude-mpm monitor processes.
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
True if successful, False on error
|
295
|
+
"""
|
296
|
+
try:
|
297
|
+
# Look for claude-mpm monitor processes
|
298
|
+
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, check=False)
|
299
|
+
|
300
|
+
if result.returncode != 0:
|
301
|
+
return False
|
302
|
+
|
303
|
+
lines = result.stdout.strip().split("\n")
|
304
|
+
killed_any = False
|
305
|
+
|
306
|
+
for line in lines:
|
307
|
+
if "claude" in line.lower() and "monitor" in line.lower():
|
308
|
+
parts = line.split()
|
309
|
+
if len(parts) > 1:
|
310
|
+
try:
|
311
|
+
pid = int(parts[1])
|
312
|
+
self.logger.info(
|
313
|
+
f"Killing claude-mpm monitor process {pid}"
|
314
|
+
)
|
315
|
+
os.kill(pid, signal.SIGTERM)
|
316
|
+
killed_any = True
|
317
|
+
time.sleep(0.5)
|
318
|
+
except (ValueError, ProcessLookupError):
|
319
|
+
continue
|
320
|
+
|
321
|
+
if killed_any:
|
322
|
+
time.sleep(1) # Give processes time to exit
|
323
|
+
|
324
|
+
return True
|
325
|
+
|
326
|
+
except Exception as e:
|
327
|
+
self.logger.error(f"Error killing claude-mpm processes: {e}")
|
328
|
+
return False
|
329
|
+
|
330
|
+
def is_our_service(self) -> Tuple[bool, Optional[int]]:
|
331
|
+
"""Check if the service on the port is our claude-mpm monitor.
|
332
|
+
|
333
|
+
Returns:
|
334
|
+
Tuple of (is_ours, pid) where is_ours is True if it's our service
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
# First check PID file
|
338
|
+
if self.pid_file.exists():
|
339
|
+
try:
|
340
|
+
with open(self.pid_file) as f:
|
341
|
+
pid = int(f.read().strip())
|
342
|
+
|
343
|
+
# Verify process exists
|
344
|
+
os.kill(pid, 0)
|
345
|
+
|
346
|
+
# Check if it's a Python process
|
347
|
+
process_info = subprocess.run(
|
348
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
349
|
+
capture_output=True,
|
350
|
+
text=True, check=False,
|
351
|
+
)
|
352
|
+
|
353
|
+
if "python" in process_info.stdout.lower():
|
354
|
+
return True, pid
|
355
|
+
|
356
|
+
except (ValueError, ProcessLookupError, subprocess.CalledProcessError):
|
357
|
+
# PID file exists but process doesn't or isn't Python
|
358
|
+
self.pid_file.unlink(missing_ok=True)
|
359
|
+
|
360
|
+
# Check if service responds to our health endpoint
|
361
|
+
try:
|
362
|
+
import requests
|
363
|
+
|
364
|
+
response = requests.get(
|
365
|
+
f"http://{self.host}:{self.port}/health", timeout=2
|
366
|
+
)
|
367
|
+
|
368
|
+
if response.status_code == 200:
|
369
|
+
# Try to get service info
|
370
|
+
try:
|
371
|
+
data = response.json()
|
372
|
+
if "claude" in str(data).lower() or "mpm" in str(data).lower():
|
373
|
+
# It's likely our service, try to find PID
|
374
|
+
pid = self._find_service_pid()
|
375
|
+
return True, pid
|
376
|
+
except:
|
377
|
+
pass
|
378
|
+
|
379
|
+
except:
|
380
|
+
pass
|
381
|
+
|
382
|
+
return False, None
|
383
|
+
|
384
|
+
except Exception as e:
|
385
|
+
self.logger.error(f"Error checking service ownership: {e}")
|
386
|
+
return False, None
|
387
|
+
|
388
|
+
def _find_service_pid(self) -> Optional[int]:
|
389
|
+
"""Find PID of service on our port using lsof.
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
PID if found, None otherwise
|
393
|
+
"""
|
394
|
+
try:
|
395
|
+
result = subprocess.run(
|
396
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
397
|
+
)
|
398
|
+
|
399
|
+
if result.returncode == 0 and result.stdout.strip():
|
400
|
+
pids = result.stdout.strip().split("\n")
|
401
|
+
if pids:
|
402
|
+
return int(pids[0].strip())
|
403
|
+
|
404
|
+
except:
|
405
|
+
pass
|
406
|
+
|
407
|
+
return None
|
408
|
+
|
409
|
+
def start_daemon(self, force_restart: bool = False) -> bool:
|
410
|
+
"""Start the daemon with automatic cleanup and retry.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
force_restart: Force restart even if already running
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
True if daemon started successfully
|
417
|
+
"""
|
418
|
+
with self._lock:
|
419
|
+
# Check if already running
|
420
|
+
if self.is_running():
|
421
|
+
if not force_restart:
|
422
|
+
pid = self.get_pid()
|
423
|
+
self.logger.info(f"Daemon already running with PID {pid}")
|
424
|
+
return True
|
425
|
+
|
426
|
+
# Stop existing daemon
|
427
|
+
self.logger.info("Force restarting daemon")
|
428
|
+
if not self.stop_daemon():
|
429
|
+
self.logger.error("Failed to stop existing daemon")
|
430
|
+
return False
|
431
|
+
|
432
|
+
# Wait for cleanup
|
433
|
+
time.sleep(2)
|
434
|
+
|
435
|
+
# Clean up port conflicts
|
436
|
+
if not self.cleanup_port_conflicts():
|
437
|
+
self.logger.error(f"Cannot start daemon - port {self.port} is in use")
|
438
|
+
return False
|
439
|
+
|
440
|
+
# Daemonize the process
|
441
|
+
return self.daemonize()
|
442
|
+
|
443
|
+
def daemonize(self) -> bool:
|
444
|
+
"""Daemonize the current process.
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
True if successful (in parent), doesn't return in child
|
448
|
+
"""
|
449
|
+
try:
|
450
|
+
# Clean up asyncio event loops before forking
|
451
|
+
self._cleanup_event_loops()
|
452
|
+
|
453
|
+
# Create status file for communication
|
454
|
+
with tempfile.NamedTemporaryFile(
|
455
|
+
mode="w", delete=False, suffix=".status"
|
456
|
+
) as f:
|
457
|
+
self.startup_status_file = f.name
|
458
|
+
f.write("starting")
|
459
|
+
|
460
|
+
# First fork
|
461
|
+
pid = os.fork()
|
462
|
+
if pid > 0:
|
463
|
+
# Parent process - wait for child to confirm startup
|
464
|
+
return self._parent_wait_for_startup(pid)
|
465
|
+
|
466
|
+
except OSError as e:
|
467
|
+
self.logger.error(f"First fork failed: {e}")
|
468
|
+
return False
|
469
|
+
|
470
|
+
# Child process continues...
|
471
|
+
|
472
|
+
# Decouple from parent
|
473
|
+
os.chdir("/")
|
474
|
+
os.setsid()
|
475
|
+
os.umask(0)
|
476
|
+
|
477
|
+
try:
|
478
|
+
# Second fork
|
479
|
+
pid = os.fork()
|
480
|
+
if pid > 0:
|
481
|
+
# First child exits
|
482
|
+
sys.exit(0)
|
483
|
+
except OSError as e:
|
484
|
+
self.logger.error(f"Second fork failed: {e}")
|
485
|
+
self._report_startup_error(f"Second fork failed: {e}")
|
486
|
+
sys.exit(1)
|
487
|
+
|
488
|
+
# Grandchild process - the actual daemon
|
489
|
+
|
490
|
+
# Write PID file
|
491
|
+
self.write_pid_file()
|
492
|
+
|
493
|
+
# Redirect streams
|
494
|
+
self._redirect_streams()
|
495
|
+
|
496
|
+
# Setup signal handlers
|
497
|
+
self._setup_signal_handlers()
|
498
|
+
|
499
|
+
self.logger.info(f"Daemon process started with PID {os.getpid()}")
|
500
|
+
|
501
|
+
# Report successful startup
|
502
|
+
self._report_startup_success()
|
503
|
+
|
504
|
+
# Note: Daemon process continues running
|
505
|
+
# Caller is responsible for running the actual service
|
506
|
+
return True
|
507
|
+
|
508
|
+
def stop_daemon(self, timeout: int = 10) -> bool:
|
509
|
+
"""Stop the daemon process.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
timeout: Maximum time to wait for daemon to stop
|
513
|
+
|
514
|
+
Returns:
|
515
|
+
True if stopped successfully
|
516
|
+
"""
|
517
|
+
with self._lock:
|
518
|
+
try:
|
519
|
+
pid = self.get_pid()
|
520
|
+
if not pid:
|
521
|
+
self.logger.info("No daemon PID found")
|
522
|
+
# Still try to clean up port
|
523
|
+
self.cleanup_port_conflicts()
|
524
|
+
return True
|
525
|
+
|
526
|
+
self.logger.info(f"Stopping daemon with PID {pid}")
|
527
|
+
|
528
|
+
# Send SIGTERM for graceful shutdown
|
529
|
+
try:
|
530
|
+
os.kill(pid, signal.SIGTERM)
|
531
|
+
except ProcessLookupError:
|
532
|
+
# Process already dead
|
533
|
+
self.cleanup_pid_file()
|
534
|
+
return True
|
535
|
+
|
536
|
+
# Wait for process to exit
|
537
|
+
start_time = time.time()
|
538
|
+
while time.time() - start_time < timeout:
|
539
|
+
try:
|
540
|
+
os.kill(pid, 0) # Check if still alive
|
541
|
+
time.sleep(0.5)
|
542
|
+
except ProcessLookupError:
|
543
|
+
# Process exited
|
544
|
+
self.cleanup_pid_file()
|
545
|
+
return True
|
546
|
+
|
547
|
+
# Force kill if still running
|
548
|
+
self.logger.warning("Daemon didn't stop gracefully, force killing")
|
549
|
+
try:
|
550
|
+
os.kill(pid, signal.SIGKILL)
|
551
|
+
time.sleep(1)
|
552
|
+
except ProcessLookupError:
|
553
|
+
pass
|
554
|
+
|
555
|
+
self.cleanup_pid_file()
|
556
|
+
return True
|
557
|
+
|
558
|
+
except Exception as e:
|
559
|
+
self.logger.error(f"Error stopping daemon: {e}")
|
560
|
+
return False
|
561
|
+
|
562
|
+
def is_running(self) -> bool:
|
563
|
+
"""Check if daemon is running.
|
564
|
+
|
565
|
+
Returns:
|
566
|
+
True if daemon is running
|
567
|
+
"""
|
568
|
+
try:
|
569
|
+
pid = self.get_pid()
|
570
|
+
if not pid:
|
571
|
+
return False
|
572
|
+
|
573
|
+
# Check if process exists
|
574
|
+
os.kill(pid, 0)
|
575
|
+
return True
|
576
|
+
|
577
|
+
except ProcessLookupError:
|
578
|
+
# Process doesn't exist
|
579
|
+
self.cleanup_pid_file()
|
580
|
+
return False
|
581
|
+
|
582
|
+
def get_pid(self) -> Optional[int]:
|
583
|
+
"""Get daemon PID from PID file.
|
584
|
+
|
585
|
+
Returns:
|
586
|
+
PID if found, None otherwise
|
587
|
+
"""
|
588
|
+
try:
|
589
|
+
if not self.pid_file.exists():
|
590
|
+
return None
|
591
|
+
|
592
|
+
with open(self.pid_file) as f:
|
593
|
+
return int(f.read().strip())
|
594
|
+
|
595
|
+
except Exception as e:
|
596
|
+
self.logger.error(f"Error reading PID file: {e}")
|
597
|
+
return None
|
598
|
+
|
599
|
+
def write_pid_file(self):
|
600
|
+
"""Write current PID to PID file."""
|
601
|
+
try:
|
602
|
+
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
|
603
|
+
with open(self.pid_file, "w") as f:
|
604
|
+
f.write(str(os.getpid()))
|
605
|
+
self.logger.debug(f"PID file written: {self.pid_file}")
|
606
|
+
except Exception as e:
|
607
|
+
self.logger.error(f"Error writing PID file: {e}")
|
608
|
+
raise
|
609
|
+
|
610
|
+
def cleanup_pid_file(self):
|
611
|
+
"""Remove PID file."""
|
612
|
+
try:
|
613
|
+
self.pid_file.unlink(missing_ok=True)
|
614
|
+
self.logger.debug("PID file removed")
|
615
|
+
except Exception as e:
|
616
|
+
self.logger.error(f"Error removing PID file: {e}")
|
617
|
+
|
618
|
+
def _cleanup_event_loops(self):
|
619
|
+
"""Clean up asyncio event loops before forking."""
|
620
|
+
try:
|
621
|
+
import asyncio
|
622
|
+
|
623
|
+
try:
|
624
|
+
loop = asyncio.get_event_loop()
|
625
|
+
if loop and not loop.is_closed():
|
626
|
+
# Cancel pending tasks
|
627
|
+
pending = asyncio.all_tasks(loop)
|
628
|
+
for task in pending:
|
629
|
+
task.cancel()
|
630
|
+
|
631
|
+
# Stop and close loop
|
632
|
+
if loop.is_running():
|
633
|
+
loop.stop()
|
634
|
+
|
635
|
+
asyncio.set_event_loop(None)
|
636
|
+
loop.close()
|
637
|
+
|
638
|
+
except RuntimeError:
|
639
|
+
# No event loop
|
640
|
+
pass
|
641
|
+
|
642
|
+
except Exception as e:
|
643
|
+
self.logger.debug(f"Error cleaning up event loops: {e}")
|
644
|
+
|
645
|
+
def _redirect_streams(self):
|
646
|
+
"""Redirect standard streams for daemon mode."""
|
647
|
+
try:
|
648
|
+
sys.stdout.flush()
|
649
|
+
sys.stderr.flush()
|
650
|
+
|
651
|
+
# Redirect stdin to /dev/null
|
652
|
+
with open("/dev/null") as null_in:
|
653
|
+
os.dup2(null_in.fileno(), sys.stdin.fileno())
|
654
|
+
|
655
|
+
# Redirect stdout and stderr to log file
|
656
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
657
|
+
with open(self.log_file, "a") as log_out:
|
658
|
+
os.dup2(log_out.fileno(), sys.stdout.fileno())
|
659
|
+
os.dup2(log_out.fileno(), sys.stderr.fileno())
|
660
|
+
|
661
|
+
except Exception as e:
|
662
|
+
self.logger.error(f"Error redirecting streams: {e}")
|
663
|
+
|
664
|
+
def _setup_signal_handlers(self):
|
665
|
+
"""Setup signal handlers for graceful shutdown."""
|
666
|
+
|
667
|
+
def signal_handler(signum, frame):
|
668
|
+
self.logger.info(f"Received signal {signum}, shutting down")
|
669
|
+
self.cleanup_pid_file()
|
670
|
+
sys.exit(0)
|
671
|
+
|
672
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
673
|
+
signal.signal(signal.SIGINT, signal_handler)
|
674
|
+
|
675
|
+
def _parent_wait_for_startup(self, child_pid: int, timeout: float = 10.0) -> bool:
|
676
|
+
"""Parent process waits for child to confirm startup.
|
677
|
+
|
678
|
+
Args:
|
679
|
+
child_pid: PID of child process
|
680
|
+
timeout: Maximum time to wait
|
681
|
+
|
682
|
+
Returns:
|
683
|
+
True if child started successfully
|
684
|
+
"""
|
685
|
+
try:
|
686
|
+
start_time = time.time()
|
687
|
+
|
688
|
+
while time.time() - start_time < timeout:
|
689
|
+
if (
|
690
|
+
not self.startup_status_file
|
691
|
+
or not Path(self.startup_status_file).exists()
|
692
|
+
):
|
693
|
+
time.sleep(0.1)
|
694
|
+
continue
|
695
|
+
|
696
|
+
try:
|
697
|
+
with open(self.startup_status_file) as f:
|
698
|
+
status = f.read().strip()
|
699
|
+
|
700
|
+
if status == "success":
|
701
|
+
# Cleanup status file
|
702
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
703
|
+
return True
|
704
|
+
|
705
|
+
if status.startswith("error:"):
|
706
|
+
error_msg = status[6:]
|
707
|
+
self.logger.error(f"Daemon startup failed: {error_msg}")
|
708
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
709
|
+
return False
|
710
|
+
|
711
|
+
except Exception:
|
712
|
+
pass
|
713
|
+
|
714
|
+
time.sleep(0.1)
|
715
|
+
|
716
|
+
self.logger.error("Daemon startup timed out")
|
717
|
+
return False
|
718
|
+
|
719
|
+
except Exception as e:
|
720
|
+
self.logger.error(f"Error waiting for daemon startup: {e}")
|
721
|
+
return False
|
722
|
+
|
723
|
+
def _report_startup_success(self):
|
724
|
+
"""Report successful startup to parent process."""
|
725
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
726
|
+
try:
|
727
|
+
with open(self.startup_status_file, "w") as f:
|
728
|
+
f.write("success")
|
729
|
+
except Exception as e:
|
730
|
+
self.logger.error(f"Error reporting startup success: {e}")
|
731
|
+
|
732
|
+
def _report_startup_error(self, error: str):
|
733
|
+
"""Report startup error to parent process."""
|
734
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
735
|
+
try:
|
736
|
+
with open(self.startup_status_file, "w") as f:
|
737
|
+
f.write(f"error:{error}")
|
738
|
+
except Exception as e:
|
739
|
+
self.logger.error(f"Error reporting startup error: {e}")
|
@@ -1,5 +1,5 @@
|
|
1
1
|
claude_mpm/BUILD_NUMBER,sha256=toytnNjkIKPgQaGwDqQdC1rpNTAdSEc6Vja50d7Ovug,4
|
2
|
-
claude_mpm/VERSION,sha256=
|
2
|
+
claude_mpm/VERSION,sha256=D4RiM4BijFcXkczgjBK5VLy8keaTJQARC_2jVMdSt8s,7
|
3
3
|
claude_mpm/__init__.py,sha256=lyTZAYGH4DTaFGLRNWJKk5Q5oTjzN5I6AXmfVX-Jff0,1512
|
4
4
|
claude_mpm/__main__.py,sha256=Ro5UBWBoQaSAIoSqWAr7zkbLyvi4sSy28WShqAhKJG0,723
|
5
5
|
claude_mpm/constants.py,sha256=I946iCQzIIPRZVVJ8aO7lA4euiyDnNw2IX7EelAOkIE,5915
|
@@ -436,7 +436,7 @@ claude_mpm/services/cli/memory_crud_service.py,sha256=ciN9Pl_12iDAqF9zPBWOzu-iXi
|
|
436
436
|
claude_mpm/services/cli/memory_output_formatter.py,sha256=nbf7VsjGvH4e9fLv9c7PzjuO9COZhbK5P2fNZ79055w,24783
|
437
437
|
claude_mpm/services/cli/session_manager.py,sha256=rla_Stbcvt93wa9G9MCMu9UqB3FLGqlPt_eN5lQb3Gg,16599
|
438
438
|
claude_mpm/services/cli/startup_checker.py,sha256=efhuvu8ns5G16jcQ0nQZKVddmD2AktUEdlvjNcXjAuk,12232
|
439
|
-
claude_mpm/services/cli/unified_dashboard_manager.py,sha256=
|
439
|
+
claude_mpm/services/cli/unified_dashboard_manager.py,sha256=ZdiN-YlUjtycaE-TLUCRRP-WZZzwXz5XB896LgDx7G8,15712
|
440
440
|
claude_mpm/services/communication/__init__.py,sha256=b4qc7_Rqy4DE9q7BAUlfUZjoYG4uimAyUnE0irPcXyU,560
|
441
441
|
claude_mpm/services/core/__init__.py,sha256=evEayLlBqJvxMZhrhuK6aagXmNrKGSj8Jm9OOxKzqvU,2195
|
442
442
|
claude_mpm/services/core/base.py,sha256=iA-F7DgGp-FJIMvQTiHQ68RkG_k-AtUWlArJPMw6ZPk,7297
|
@@ -552,7 +552,8 @@ claude_mpm/services/memory/cache/__init__.py,sha256=6M6-P8ParyxX8vOgp_IxHgLMvacr
|
|
552
552
|
claude_mpm/services/memory/cache/shared_prompt_cache.py,sha256=crnYPUT8zcS7TvoE1vW7pyaf4T77N5rJ1wUf_YQ2vvo,28704
|
553
553
|
claude_mpm/services/memory/cache/simple_cache.py,sha256=qsTjbcsPxj-kNfaod9VN_uE5NioIwpfkUin_mMVUJCg,10218
|
554
554
|
claude_mpm/services/monitor/__init__.py,sha256=X7gxSLUm9Fg_zEsX6LtCHP2ipF0qj6Emkun20h2So7g,745
|
555
|
-
claude_mpm/services/monitor/daemon.py,sha256=
|
555
|
+
claude_mpm/services/monitor/daemon.py,sha256=nkB_xslT4yxIiSVf2u6nGm56rYpkit0WDj4YPWr-osM,22961
|
556
|
+
claude_mpm/services/monitor/daemon_manager.py,sha256=6ZYXgRhwurnPDXxFgk9msLoa7x7ccE64m93FwqWVJfs,24519
|
556
557
|
claude_mpm/services/monitor/event_emitter.py,sha256=JzRLNg8PUJ5s3ulNnq_D4yqCPItvidJzu8DmFxriieQ,12224
|
557
558
|
claude_mpm/services/monitor/server.py,sha256=m98Eyv9caxRywJ4JtAdOuv5EB__z7vd2hYRZPwcqFLg,28498
|
558
559
|
claude_mpm/services/monitor/handlers/__init__.py,sha256=jgPIf4IJVERm_tAeD9834tfx9IcxtlHj5r9rhEWpkfM,701
|
@@ -643,9 +644,9 @@ claude_mpm/utils/subprocess_utils.py,sha256=zgiwLqh_17WxHpySvUPH65pb4bzIeUGOAYUJ
|
|
643
644
|
claude_mpm/validation/__init__.py,sha256=YZhwE3mhit-lslvRLuwfX82xJ_k4haZeKmh4IWaVwtk,156
|
644
645
|
claude_mpm/validation/agent_validator.py,sha256=3Lo6LK-Mw9IdnL_bd3zl_R6FkgSVDYKUUM7EeVVD3jc,20865
|
645
646
|
claude_mpm/validation/frontmatter_validator.py,sha256=u8g4Eyd_9O6ugj7Un47oSGh3kqv4wMkuks2i_CtWRvM,7028
|
646
|
-
claude_mpm-4.2.
|
647
|
-
claude_mpm-4.2.
|
648
|
-
claude_mpm-4.2.
|
649
|
-
claude_mpm-4.2.
|
650
|
-
claude_mpm-4.2.
|
651
|
-
claude_mpm-4.2.
|
647
|
+
claude_mpm-4.2.32.dist-info/licenses/LICENSE,sha256=lpaivOlPuBZW1ds05uQLJJswy8Rp_HMNieJEbFlqvLk,1072
|
648
|
+
claude_mpm-4.2.32.dist-info/METADATA,sha256=5rI-z0awk9FawxvQGIv5ehodB23tje9Z3550ObeFurk,14451
|
649
|
+
claude_mpm-4.2.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
650
|
+
claude_mpm-4.2.32.dist-info/entry_points.txt,sha256=FDPZgz8JOvD-6iuXY2l9Zbo9zYVRuE4uz4Qr0vLeGOk,471
|
651
|
+
claude_mpm-4.2.32.dist-info/top_level.txt,sha256=1nUg3FEaBySgm8t-s54jK5zoPnu3_eY6EP6IOlekyHA,11
|
652
|
+
claude_mpm-4.2.32.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|