claude-mpm 4.2.29__py3-none-any.whl → 4.2.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/services/cli/unified_dashboard_manager.py +39 -105
- claude_mpm/services/monitor/daemon.py +53 -163
- claude_mpm/services/monitor/daemon_manager.py +739 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/METADATA +1 -1
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/RECORD +10 -9
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/WHEEL +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.2.29.dist-info → claude_mpm-4.2.33.dist-info}/top_level.txt +0 -0
claude_mpm/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
4.2.
|
1
|
+
4.2.33
|
@@ -25,6 +25,7 @@ import requests
|
|
25
25
|
|
26
26
|
from ...core.logging_config import get_logger
|
27
27
|
from ...services.monitor.daemon import UnifiedMonitorDaemon
|
28
|
+
from ...services.monitor.daemon_manager import DaemonManager
|
28
29
|
from ...services.port_manager import PortManager
|
29
30
|
|
30
31
|
|
@@ -99,8 +100,9 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
99
100
|
host="localhost", port=port, daemon_mode=background
|
100
101
|
)
|
101
102
|
|
102
|
-
#
|
103
|
-
|
103
|
+
# Use daemon manager to check service ownership
|
104
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
105
|
+
is_ours, pid = daemon_mgr.is_our_service()
|
104
106
|
|
105
107
|
if is_ours and not force_restart:
|
106
108
|
# Our service is already running, just open browser if needed
|
@@ -115,54 +117,60 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
115
117
|
self.logger.info(
|
116
118
|
f"Force restarting our dashboard on port {port} (PID: {pid})"
|
117
119
|
)
|
118
|
-
#
|
119
|
-
|
120
|
+
# Don't cleanup here - let daemon.start(force_restart=True) handle it
|
121
|
+
# This prevents race conditions where we kill the service we're trying to start
|
120
122
|
elif self.is_dashboard_running(port) and not force_restart:
|
121
|
-
# Different service is using the port -
|
122
|
-
self.logger.warning(
|
123
|
-
|
124
|
-
|
125
|
-
|
123
|
+
# Different service is using the port - don't clean it up without force_restart
|
124
|
+
self.logger.warning(
|
125
|
+
f"Port {port} is in use by a different service. Use force_restart to override."
|
126
|
+
)
|
127
|
+
return False, False
|
126
128
|
|
127
129
|
self.logger.info(
|
128
130
|
f"Starting unified dashboard on port {port} (background: {background}, force_restart: {force_restart})"
|
129
131
|
)
|
130
132
|
|
131
133
|
if background:
|
132
|
-
#
|
133
|
-
|
134
|
-
|
135
|
-
self.logger.error(f"Failed to clean up port {port}, cannot proceed")
|
136
|
-
return False, False
|
137
|
-
|
134
|
+
# The daemon.start() method will handle cleanup when force_restart=True
|
135
|
+
# We don't need pre-emptive cleanup here as it causes race conditions
|
136
|
+
|
138
137
|
# Try to start daemon with retry on port conflicts
|
139
138
|
max_retries = 3
|
140
139
|
retry_count = 0
|
141
140
|
success = False
|
142
|
-
|
141
|
+
|
143
142
|
while retry_count < max_retries and not success:
|
144
143
|
if retry_count > 0:
|
145
|
-
|
146
|
-
|
144
|
+
# Only cleanup on retries, not on first attempt
|
145
|
+
self.logger.info(
|
146
|
+
f"Retry {retry_count}/{max_retries}: Cleaning up port {port}"
|
147
|
+
)
|
148
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
149
|
+
if not daemon_mgr.cleanup_port_conflicts():
|
147
150
|
self.logger.error(f"Cleanup failed on retry {retry_count}")
|
148
151
|
break
|
149
152
|
time.sleep(3) # Longer wait for cleanup to complete
|
150
|
-
|
151
|
-
# Start daemon in background mode with force restart
|
152
|
-
|
153
|
-
|
153
|
+
|
154
|
+
# Start daemon in background mode with force restart
|
155
|
+
# The daemon.start() method handles its own cleanup when force_restart=True
|
156
|
+
success = daemon.start(force_restart=force_restart)
|
157
|
+
|
154
158
|
if not success and retry_count < max_retries - 1:
|
155
159
|
# Check if it's a port conflict
|
156
160
|
if not self.port_manager.is_port_available(port):
|
157
|
-
self.logger.warning(
|
161
|
+
self.logger.warning(
|
162
|
+
f"Port {port} still in use, will retry cleanup"
|
163
|
+
)
|
158
164
|
retry_count += 1
|
159
165
|
else:
|
160
166
|
# Different kind of failure, don't retry
|
161
|
-
self.logger.error(
|
167
|
+
self.logger.error(
|
168
|
+
"Daemon start failed for reason other than port conflict"
|
169
|
+
)
|
162
170
|
break
|
163
171
|
else:
|
164
172
|
break
|
165
|
-
|
173
|
+
|
166
174
|
if success:
|
167
175
|
with self._lock:
|
168
176
|
self._background_daemons[port] = daemon
|
@@ -350,91 +358,17 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
|
|
350
358
|
def _cleanup_port_conflicts(self, port: int) -> bool:
|
351
359
|
"""
|
352
360
|
Try to clean up any processes using our port.
|
353
|
-
|
361
|
+
|
362
|
+
Delegates to the consolidated DaemonManager for consistent behavior.
|
363
|
+
|
354
364
|
Args:
|
355
365
|
port: Port to clean up
|
356
|
-
|
366
|
+
|
357
367
|
Returns:
|
358
368
|
True if cleanup was successful or not needed
|
359
369
|
"""
|
360
|
-
|
361
|
-
|
362
|
-
import signal
|
363
|
-
import time
|
364
|
-
|
365
|
-
# Find processes using the port
|
366
|
-
result = subprocess.run(
|
367
|
-
["lsof", "-ti", f":{port}"],
|
368
|
-
capture_output=True,
|
369
|
-
text=True
|
370
|
-
)
|
371
|
-
|
372
|
-
if result.returncode == 0 and result.stdout.strip():
|
373
|
-
pids = result.stdout.strip().split('\n')
|
374
|
-
self.logger.info(f"Found processes using port {port}: {pids}")
|
375
|
-
|
376
|
-
for pid_str in pids:
|
377
|
-
try:
|
378
|
-
pid = int(pid_str.strip())
|
379
|
-
# Try graceful termination first
|
380
|
-
import os
|
381
|
-
os.kill(pid, signal.SIGTERM)
|
382
|
-
self.logger.info(f"Sent SIGTERM to process {pid}")
|
383
|
-
except (ValueError, ProcessLookupError) as e:
|
384
|
-
self.logger.debug(f"Could not terminate process {pid_str}: {e}")
|
385
|
-
continue
|
386
|
-
|
387
|
-
# Give processes time to shut down gracefully
|
388
|
-
time.sleep(3)
|
389
|
-
|
390
|
-
# Check if port is still in use and force kill if needed
|
391
|
-
result = subprocess.run(
|
392
|
-
["lsof", "-ti", f":{port}"],
|
393
|
-
capture_output=True,
|
394
|
-
text=True
|
395
|
-
)
|
396
|
-
|
397
|
-
if result.returncode == 0 and result.stdout.strip():
|
398
|
-
remaining_pids = result.stdout.strip().split('\n')
|
399
|
-
self.logger.warning(f"Processes still using port {port}: {remaining_pids}, force killing")
|
400
|
-
|
401
|
-
for pid_str in remaining_pids:
|
402
|
-
try:
|
403
|
-
pid = int(pid_str.strip())
|
404
|
-
os.kill(pid, signal.SIGKILL)
|
405
|
-
self.logger.info(f"Force killed process {pid}")
|
406
|
-
except (ValueError, ProcessLookupError) as e:
|
407
|
-
self.logger.debug(f"Could not force kill process {pid_str}: {e}")
|
408
|
-
continue
|
409
|
-
|
410
|
-
# Longer pause after force kill to ensure port is fully released
|
411
|
-
time.sleep(5)
|
412
|
-
|
413
|
-
# Final verification that port is actually free
|
414
|
-
final_check = subprocess.run(
|
415
|
-
["lsof", "-ti", f":{port}"],
|
416
|
-
capture_output=True,
|
417
|
-
text=True
|
418
|
-
)
|
419
|
-
|
420
|
-
if final_check.returncode == 0 and final_check.stdout.strip():
|
421
|
-
self.logger.error(f"Failed to clean up port {port} - processes still running: {final_check.stdout.strip()}")
|
422
|
-
return False
|
423
|
-
|
424
|
-
self.logger.info(f"Successfully cleaned up processes on port {port}")
|
425
|
-
return True
|
426
|
-
else:
|
427
|
-
self.logger.debug(f"No processes found using port {port}")
|
428
|
-
return True
|
429
|
-
|
430
|
-
except FileNotFoundError:
|
431
|
-
# lsof not available, try alternative approach
|
432
|
-
self.logger.debug("lsof not available, skipping port cleanup")
|
433
|
-
return True
|
434
|
-
except Exception as e:
|
435
|
-
self.logger.warning(f"Error during port cleanup: {e}")
|
436
|
-
# Continue anyway - the port check will catch actual conflicts
|
437
|
-
return True
|
370
|
+
daemon_mgr = DaemonManager(port=port, host="localhost")
|
371
|
+
return daemon_mgr.cleanup_port_conflicts()
|
438
372
|
|
439
373
|
def start_server(
|
440
374
|
self, port: Optional[int] = None, timeout: int = 30, force_restart: bool = True
|
@@ -24,6 +24,7 @@ from typing import Optional
|
|
24
24
|
|
25
25
|
from ...core.logging_config import get_logger
|
26
26
|
from ..hook_installer_service import HookInstallerService
|
27
|
+
from .daemon_manager import DaemonManager
|
27
28
|
from .management.health import HealthMonitor
|
28
29
|
from .management.lifecycle import DaemonLifecycle
|
29
30
|
from .server import UnifiedMonitorServer
|
@@ -58,7 +59,15 @@ class UnifiedMonitorDaemon:
|
|
58
59
|
self.daemon_mode = daemon_mode
|
59
60
|
self.logger = get_logger(__name__)
|
60
61
|
|
61
|
-
#
|
62
|
+
# Use new consolidated DaemonManager for all daemon operations
|
63
|
+
self.daemon_manager = DaemonManager(
|
64
|
+
port=port,
|
65
|
+
host=host,
|
66
|
+
pid_file=pid_file or self._get_default_pid_file(),
|
67
|
+
log_file=log_file,
|
68
|
+
)
|
69
|
+
|
70
|
+
# Keep lifecycle for backward compatibility (delegates to daemon_manager)
|
62
71
|
self.lifecycle = DaemonLifecycle(
|
63
72
|
pid_file=pid_file or self._get_default_pid_file(),
|
64
73
|
log_file=log_file,
|
@@ -104,84 +113,13 @@ class UnifiedMonitorDaemon:
|
|
104
113
|
|
105
114
|
def _cleanup_port_conflicts(self) -> bool:
|
106
115
|
"""Try to clean up any processes using our port.
|
107
|
-
|
116
|
+
|
117
|
+
Delegates to the consolidated DaemonManager for consistent behavior.
|
118
|
+
|
108
119
|
Returns:
|
109
120
|
True if cleanup was successful, False otherwise
|
110
121
|
"""
|
111
|
-
|
112
|
-
# Find process using the port
|
113
|
-
import subprocess
|
114
|
-
result = subprocess.run(
|
115
|
-
["lsof", "-ti", f":{self.port}"],
|
116
|
-
capture_output=True,
|
117
|
-
text=True
|
118
|
-
)
|
119
|
-
|
120
|
-
if result.returncode == 0 and result.stdout.strip():
|
121
|
-
pids = result.stdout.strip().split('\n')
|
122
|
-
for pid_str in pids:
|
123
|
-
try:
|
124
|
-
pid = int(pid_str.strip())
|
125
|
-
self.logger.info(f"Found process {pid} using port {self.port}")
|
126
|
-
|
127
|
-
# Check if it's a claude-mpm process
|
128
|
-
process_info = subprocess.run(
|
129
|
-
["ps", "-p", str(pid), "-o", "comm="],
|
130
|
-
capture_output=True,
|
131
|
-
text=True
|
132
|
-
)
|
133
|
-
|
134
|
-
if "python" in process_info.stdout.lower() or "claude" in process_info.stdout.lower():
|
135
|
-
self.logger.info(f"Killing process {pid} (appears to be Python/Claude related)")
|
136
|
-
os.kill(pid, signal.SIGTERM)
|
137
|
-
time.sleep(1)
|
138
|
-
|
139
|
-
# Check if still alive
|
140
|
-
try:
|
141
|
-
os.kill(pid, 0)
|
142
|
-
# Still alive, force kill
|
143
|
-
self.logger.warning(f"Process {pid} didn't terminate, force killing")
|
144
|
-
os.kill(pid, signal.SIGKILL)
|
145
|
-
time.sleep(1)
|
146
|
-
except ProcessLookupError:
|
147
|
-
pass
|
148
|
-
else:
|
149
|
-
self.logger.warning(f"Process {pid} is not a Claude MPM process: {process_info.stdout}")
|
150
|
-
return False
|
151
|
-
except (ValueError, ProcessLookupError) as e:
|
152
|
-
self.logger.debug(f"Error handling PID {pid_str}: {e}")
|
153
|
-
continue
|
154
|
-
|
155
|
-
return True
|
156
|
-
|
157
|
-
except FileNotFoundError:
|
158
|
-
# lsof not available, try alternative method
|
159
|
-
self.logger.debug("lsof not available, using alternative cleanup")
|
160
|
-
|
161
|
-
# Check if there's an orphaned service we can identify
|
162
|
-
is_ours, pid = self.lifecycle.is_our_service(self.host)
|
163
|
-
if is_ours and pid:
|
164
|
-
try:
|
165
|
-
self.logger.info(f"Killing orphaned Claude MPM service (PID: {pid})")
|
166
|
-
os.kill(pid, signal.SIGTERM)
|
167
|
-
time.sleep(1)
|
168
|
-
|
169
|
-
# Check if still alive
|
170
|
-
try:
|
171
|
-
os.kill(pid, 0)
|
172
|
-
os.kill(pid, signal.SIGKILL)
|
173
|
-
time.sleep(1)
|
174
|
-
except ProcessLookupError:
|
175
|
-
pass
|
176
|
-
|
177
|
-
return True
|
178
|
-
except Exception as e:
|
179
|
-
self.logger.error(f"Failed to kill process: {e}")
|
180
|
-
|
181
|
-
except Exception as e:
|
182
|
-
self.logger.error(f"Error during port cleanup: {e}")
|
183
|
-
|
184
|
-
return False
|
122
|
+
return self.daemon_manager.cleanup_port_conflicts()
|
185
123
|
|
186
124
|
def _start_daemon(self, force_restart: bool = False) -> bool:
|
187
125
|
"""Start as background daemon process.
|
@@ -191,108 +129,60 @@ class UnifiedMonitorDaemon:
|
|
191
129
|
"""
|
192
130
|
self.logger.info("Starting unified monitor daemon in background mode")
|
193
131
|
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
132
|
+
# Always use daemon manager for cleanup first
|
133
|
+
# This ensures consistent behavior and prevents race conditions
|
134
|
+
if force_restart:
|
135
|
+
self.logger.info(
|
136
|
+
"Force restart requested, cleaning up any existing processes..."
|
137
|
+
)
|
138
|
+
if not self.daemon_manager.cleanup_port_conflicts(max_retries=3):
|
139
|
+
self.logger.error(f"Failed to clean up port {self.port}")
|
140
|
+
return False
|
141
|
+
# Wait for port to be fully released
|
142
|
+
time.sleep(2)
|
204
143
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
# Stop the existing daemon
|
210
|
-
if self.lifecycle.stop_daemon():
|
211
|
-
# Wait a moment for port to be released
|
212
|
-
time.sleep(2)
|
213
|
-
else:
|
214
|
-
self.logger.error("Failed to stop existing daemon for restart")
|
215
|
-
return False
|
216
|
-
else:
|
217
|
-
self.logger.warning(
|
218
|
-
f"Port {self.port} is in use by another service (PID: {existing_pid}). Cannot force restart."
|
219
|
-
)
|
220
|
-
self.logger.info(
|
221
|
-
"To restart the claude-mpm monitor, first stop the other service or use a different port."
|
222
|
-
)
|
223
|
-
return False
|
224
|
-
else:
|
144
|
+
# Check if already running via daemon manager
|
145
|
+
if self.daemon_manager.is_running():
|
146
|
+
existing_pid = self.daemon_manager.get_pid()
|
147
|
+
if not force_restart:
|
225
148
|
self.logger.warning(f"Daemon already running with PID {existing_pid}")
|
226
149
|
return False
|
150
|
+
# Force restart was already handled above
|
227
151
|
|
228
|
-
# Check for
|
229
|
-
|
230
|
-
|
231
|
-
|
152
|
+
# Check for our service on the port
|
153
|
+
is_ours, pid = self.daemon_manager.is_our_service()
|
154
|
+
if is_ours and pid and not force_restart:
|
155
|
+
self.logger.warning(
|
156
|
+
f"Our service already running on port {self.port} (PID: {pid})"
|
232
157
|
)
|
233
|
-
|
234
|
-
if is_ours and pid:
|
235
|
-
self.logger.info(
|
236
|
-
f"Found orphaned claude-mpm monitor service (PID: {pid}), force restarting"
|
237
|
-
)
|
238
|
-
# Try to kill the orphaned process
|
239
|
-
try:
|
240
|
-
os.kill(pid, signal.SIGTERM)
|
241
|
-
# Wait for it to exit
|
242
|
-
for _ in range(10):
|
243
|
-
try:
|
244
|
-
os.kill(pid, 0) # Check if still exists
|
245
|
-
time.sleep(0.5)
|
246
|
-
except ProcessLookupError:
|
247
|
-
break
|
248
|
-
else:
|
249
|
-
# Force kill if still running
|
250
|
-
os.kill(pid, signal.SIGKILL)
|
251
|
-
time.sleep(1)
|
252
|
-
except Exception as e:
|
253
|
-
self.logger.error(f"Failed to kill orphaned process: {e}")
|
254
|
-
return False
|
255
|
-
|
256
|
-
# Check port availability and clean up if needed
|
257
|
-
port_available, error_msg = self.lifecycle.verify_port_available(self.host)
|
258
|
-
if not port_available:
|
259
|
-
self.logger.warning(f"Port {self.port} is not available: {error_msg}")
|
260
|
-
|
261
|
-
# Try to identify and kill any process using the port
|
262
|
-
self.logger.info("Attempting to clean up processes on port...")
|
263
|
-
cleaned = self._cleanup_port_conflicts()
|
264
|
-
|
265
|
-
if cleaned:
|
266
|
-
# Wait longer for port to be released to avoid race conditions
|
267
|
-
time.sleep(3)
|
268
|
-
# Check again
|
269
|
-
port_available, error_msg = self.lifecycle.verify_port_available(self.host)
|
270
|
-
|
271
|
-
if not port_available:
|
272
|
-
self.logger.error(f"Port {self.port} is still not available after cleanup: {error_msg}")
|
273
|
-
print(f"Error: {error_msg}", file=sys.stderr)
|
274
|
-
print(f"Try 'claude-mpm monitor stop' or use --force flag", file=sys.stderr)
|
275
|
-
return False
|
158
|
+
return False
|
276
159
|
|
277
160
|
# Wait for any pre-warming threads to complete before forking
|
278
161
|
self._wait_for_prewarm_completion()
|
279
162
|
|
280
|
-
#
|
281
|
-
|
163
|
+
# Use daemon manager's daemonize which includes cleanup
|
164
|
+
self.daemon_manager.startup_status_file = None # Reset status file
|
165
|
+
success = self.daemon_manager.daemonize()
|
282
166
|
if not success:
|
283
167
|
return False
|
284
168
|
|
169
|
+
# We're now in the daemon process
|
170
|
+
# Update our PID references
|
171
|
+
self.lifecycle.pid_file = self.daemon_manager.pid_file
|
172
|
+
|
285
173
|
# Start the server in daemon mode
|
286
|
-
# This will run in the child process
|
287
174
|
try:
|
288
175
|
result = self._run_server()
|
289
176
|
if not result:
|
290
177
|
# Report failure before exiting
|
291
|
-
self.
|
178
|
+
self.daemon_manager._report_startup_error("Failed to start server")
|
179
|
+
else:
|
180
|
+
# Report success
|
181
|
+
self.daemon_manager._report_startup_success()
|
292
182
|
return result
|
293
183
|
except Exception as e:
|
294
184
|
# Report any exceptions during startup
|
295
|
-
self.
|
185
|
+
self.daemon_manager._report_startup_error(f"Server startup exception: {e}")
|
296
186
|
raise
|
297
187
|
|
298
188
|
def _start_foreground(self, force_restart: bool = False) -> bool:
|
@@ -302,12 +192,12 @@ class UnifiedMonitorDaemon:
|
|
302
192
|
force_restart: If True, restart existing service if it's ours
|
303
193
|
"""
|
304
194
|
self.logger.info(f"Starting unified monitor daemon on {self.host}:{self.port}")
|
305
|
-
|
306
|
-
#
|
195
|
+
|
196
|
+
# Use daemon manager for consistent port cleanup
|
307
197
|
# This helps with race conditions where old processes haven't fully released the port
|
308
198
|
if force_restart:
|
309
199
|
self.logger.info("Force restart requested, cleaning up port conflicts...")
|
310
|
-
self.
|
200
|
+
self.daemon_manager.cleanup_port_conflicts(max_retries=2)
|
311
201
|
time.sleep(1) # Brief pause to ensure port is released
|
312
202
|
|
313
203
|
# Check if already running (check PID file even in foreground mode)
|
@@ -319,7 +209,7 @@ class UnifiedMonitorDaemon:
|
|
319
209
|
self.logger.debug(
|
320
210
|
f"Checking if existing daemon (PID: {existing_pid}) is our service..."
|
321
211
|
)
|
322
|
-
is_ours, detected_pid = self.
|
212
|
+
is_ours, detected_pid = self.daemon_manager.is_our_service()
|
323
213
|
|
324
214
|
if is_ours:
|
325
215
|
self.logger.info(
|
@@ -351,7 +241,7 @@ class UnifiedMonitorDaemon:
|
|
351
241
|
self.logger.debug(
|
352
242
|
"No PID file found, checking for orphaned claude-mpm service..."
|
353
243
|
)
|
354
|
-
is_ours, pid = self.
|
244
|
+
is_ours, pid = self.daemon_manager.is_our_service()
|
355
245
|
if is_ours and pid:
|
356
246
|
self.logger.info(
|
357
247
|
f"Found orphaned claude-mpm monitor service (PID: {pid}), force restarting"
|
@@ -0,0 +1,739 @@
|
|
1
|
+
"""
|
2
|
+
Unified Daemon Manager Service
|
3
|
+
==============================
|
4
|
+
|
5
|
+
WHY: This service consolidates ALL daemon lifecycle operations into a single place,
|
6
|
+
eliminating duplicate code and race conditions from having daemon management logic
|
7
|
+
scattered across multiple files.
|
8
|
+
|
9
|
+
DESIGN DECISIONS:
|
10
|
+
- Single source of truth for all daemon operations
|
11
|
+
- Robust port cleanup with retry logic
|
12
|
+
- Thread-safe operations with proper locking
|
13
|
+
- Comprehensive error handling and recovery
|
14
|
+
- Supports both foreground and background/daemon modes
|
15
|
+
- Manages PID files, port conflicts, and process lifecycle
|
16
|
+
|
17
|
+
This replaces duplicate logic that was in:
|
18
|
+
- UnifiedMonitorDaemon._cleanup_port_conflicts()
|
19
|
+
- UnifiedDashboardManager._cleanup_port_conflicts()
|
20
|
+
- Various daemon startup/stop logic spread across files
|
21
|
+
"""
|
22
|
+
|
23
|
+
import os
|
24
|
+
import signal
|
25
|
+
import socket
|
26
|
+
import subprocess
|
27
|
+
import sys
|
28
|
+
import tempfile
|
29
|
+
import threading
|
30
|
+
import time
|
31
|
+
from pathlib import Path
|
32
|
+
from typing import Optional, Tuple
|
33
|
+
|
34
|
+
from ...core.logging_config import get_logger
|
35
|
+
|
36
|
+
|
37
|
+
class DaemonManager:
|
38
|
+
"""Centralized manager for all daemon lifecycle operations.
|
39
|
+
|
40
|
+
This is the SINGLE source of truth for:
|
41
|
+
- Port conflict resolution
|
42
|
+
- Process cleanup
|
43
|
+
- Daemon startup/stop
|
44
|
+
- PID file management
|
45
|
+
- Service detection
|
46
|
+
"""
|
47
|
+
|
48
|
+
# Class-level lock for thread safety
|
49
|
+
_lock = threading.Lock()
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
port: int = 8765,
|
54
|
+
host: str = "localhost",
|
55
|
+
pid_file: Optional[str] = None,
|
56
|
+
log_file: Optional[str] = None,
|
57
|
+
):
|
58
|
+
"""Initialize the daemon manager.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
port: Port number for the daemon
|
62
|
+
host: Host to bind to
|
63
|
+
pid_file: Path to PID file (uses default if None)
|
64
|
+
log_file: Path to log file for daemon mode
|
65
|
+
"""
|
66
|
+
self.port = port
|
67
|
+
self.host = host
|
68
|
+
self.logger = get_logger(__name__)
|
69
|
+
|
70
|
+
# Set up paths
|
71
|
+
if pid_file:
|
72
|
+
self.pid_file = Path(pid_file)
|
73
|
+
else:
|
74
|
+
self.pid_file = self._get_default_pid_file()
|
75
|
+
|
76
|
+
self.log_file = Path(log_file) if log_file else self._get_default_log_file()
|
77
|
+
|
78
|
+
# Startup status communication
|
79
|
+
self.startup_status_file = None
|
80
|
+
|
81
|
+
def _get_default_pid_file(self) -> Path:
|
82
|
+
"""Get default PID file path."""
|
83
|
+
project_root = Path.cwd()
|
84
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
85
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
86
|
+
return claude_mpm_dir / "monitor-daemon.pid"
|
87
|
+
|
88
|
+
def _get_default_log_file(self) -> Path:
|
89
|
+
"""Get default log file path."""
|
90
|
+
project_root = Path.cwd()
|
91
|
+
claude_mpm_dir = project_root / ".claude-mpm"
|
92
|
+
claude_mpm_dir.mkdir(exist_ok=True)
|
93
|
+
return claude_mpm_dir / "monitor-daemon.log"
|
94
|
+
|
95
|
+
def cleanup_port_conflicts(self, max_retries: int = 3) -> bool:
|
96
|
+
"""Clean up any processes using the daemon port.
|
97
|
+
|
98
|
+
This is the SINGLE implementation for port cleanup, replacing
|
99
|
+
duplicate logic in multiple files.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
max_retries: Maximum number of cleanup attempts
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
True if port is available after cleanup, False otherwise
|
106
|
+
"""
|
107
|
+
with self._lock:
|
108
|
+
for attempt in range(max_retries):
|
109
|
+
if attempt > 0:
|
110
|
+
self.logger.info(
|
111
|
+
f"Port cleanup attempt {attempt + 1}/{max_retries}"
|
112
|
+
)
|
113
|
+
|
114
|
+
# First check if port is actually in use
|
115
|
+
if self._is_port_available():
|
116
|
+
self.logger.debug(f"Port {self.port} is available")
|
117
|
+
return True
|
118
|
+
|
119
|
+
self.logger.info(f"Port {self.port} is in use, attempting cleanup")
|
120
|
+
|
121
|
+
# Try to find and kill processes using the port
|
122
|
+
if self._kill_processes_on_port():
|
123
|
+
# Wait for port to be released
|
124
|
+
time.sleep(2 if attempt == 0 else 3)
|
125
|
+
|
126
|
+
# Verify port is now free
|
127
|
+
if self._is_port_available():
|
128
|
+
self.logger.info(f"Port {self.port} successfully cleaned up")
|
129
|
+
return True
|
130
|
+
|
131
|
+
if attempt < max_retries - 1:
|
132
|
+
# Wait longer between attempts
|
133
|
+
time.sleep(3)
|
134
|
+
|
135
|
+
self.logger.error(
|
136
|
+
f"Failed to clean up port {self.port} after {max_retries} attempts"
|
137
|
+
)
|
138
|
+
return False
|
139
|
+
|
140
|
+
def _is_port_available(self) -> bool:
|
141
|
+
"""Check if the port is available for binding.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
True if port is available, False otherwise
|
145
|
+
"""
|
146
|
+
try:
|
147
|
+
test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
148
|
+
test_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
149
|
+
test_sock.bind((self.host, self.port))
|
150
|
+
test_sock.close()
|
151
|
+
return True
|
152
|
+
except OSError:
|
153
|
+
return False
|
154
|
+
|
155
|
+
def _kill_processes_on_port(self) -> bool:
|
156
|
+
"""Kill processes using the daemon port.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
True if processes were killed or none found, False on error
|
160
|
+
"""
|
161
|
+
try:
|
162
|
+
# Try using lsof first (most reliable)
|
163
|
+
if self._kill_using_lsof():
|
164
|
+
return True
|
165
|
+
|
166
|
+
# Fallback to checking our known PID file
|
167
|
+
if self._kill_using_pid_file():
|
168
|
+
return True
|
169
|
+
|
170
|
+
# Try to identify claude-mpm processes
|
171
|
+
if self._kill_claude_mpm_processes():
|
172
|
+
return True
|
173
|
+
|
174
|
+
return False
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
self.logger.error(f"Error killing processes on port: {e}")
|
178
|
+
return False
|
179
|
+
|
180
|
+
def _kill_using_lsof(self) -> bool:
|
181
|
+
"""Kill processes using lsof to find them.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
True if successful or lsof not available, False on error
|
185
|
+
"""
|
186
|
+
try:
|
187
|
+
# Find processes using the port
|
188
|
+
result = subprocess.run(
|
189
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
190
|
+
)
|
191
|
+
|
192
|
+
if result.returncode != 0 or not result.stdout.strip():
|
193
|
+
self.logger.debug(f"No processes found using port {self.port}")
|
194
|
+
return True
|
195
|
+
|
196
|
+
pids = result.stdout.strip().split("\n")
|
197
|
+
self.logger.info(f"Found processes using port {self.port}: {pids}")
|
198
|
+
|
199
|
+
# Kill each process
|
200
|
+
for pid_str in pids:
|
201
|
+
try:
|
202
|
+
pid = int(pid_str.strip())
|
203
|
+
|
204
|
+
# Check if it's a Python/Claude process
|
205
|
+
process_info = subprocess.run(
|
206
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
207
|
+
capture_output=True,
|
208
|
+
text=True, check=False,
|
209
|
+
)
|
210
|
+
|
211
|
+
process_name = process_info.stdout.strip().lower()
|
212
|
+
if "python" in process_name or "claude" in process_name:
|
213
|
+
self.logger.info(f"Killing Python/Claude process {pid}")
|
214
|
+
os.kill(pid, signal.SIGTERM)
|
215
|
+
|
216
|
+
# Wait briefly for graceful shutdown
|
217
|
+
time.sleep(1)
|
218
|
+
|
219
|
+
# Check if still alive and force kill if needed
|
220
|
+
try:
|
221
|
+
os.kill(pid, 0) # Check if process exists
|
222
|
+
self.logger.warning(
|
223
|
+
f"Process {pid} didn't terminate, force killing"
|
224
|
+
)
|
225
|
+
os.kill(pid, signal.SIGKILL)
|
226
|
+
time.sleep(0.5)
|
227
|
+
except ProcessLookupError:
|
228
|
+
pass # Process already dead
|
229
|
+
else:
|
230
|
+
self.logger.warning(
|
231
|
+
f"Process {pid} ({process_name}) is not a Claude MPM process"
|
232
|
+
)
|
233
|
+
return False
|
234
|
+
|
235
|
+
except (ValueError, ProcessLookupError) as e:
|
236
|
+
self.logger.debug(f"Error handling PID {pid_str}: {e}")
|
237
|
+
continue
|
238
|
+
|
239
|
+
return True
|
240
|
+
|
241
|
+
except FileNotFoundError:
|
242
|
+
# lsof not available
|
243
|
+
self.logger.debug("lsof not available, using alternative methods")
|
244
|
+
return True
|
245
|
+
except Exception as e:
|
246
|
+
self.logger.error(f"Error using lsof: {e}")
|
247
|
+
return False
|
248
|
+
|
249
|
+
def _kill_using_pid_file(self) -> bool:
|
250
|
+
"""Kill process using PID file.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
True if successful or no PID file, False on error
|
254
|
+
"""
|
255
|
+
try:
|
256
|
+
if not self.pid_file.exists():
|
257
|
+
return True
|
258
|
+
|
259
|
+
with open(self.pid_file) as f:
|
260
|
+
pid = int(f.read().strip())
|
261
|
+
|
262
|
+
self.logger.info(f"Found PID {pid} in PID file")
|
263
|
+
|
264
|
+
# Kill the process
|
265
|
+
try:
|
266
|
+
os.kill(pid, signal.SIGTERM)
|
267
|
+
time.sleep(1)
|
268
|
+
|
269
|
+
# Check if still alive
|
270
|
+
try:
|
271
|
+
os.kill(pid, 0)
|
272
|
+
os.kill(pid, signal.SIGKILL)
|
273
|
+
time.sleep(0.5)
|
274
|
+
except ProcessLookupError:
|
275
|
+
pass
|
276
|
+
|
277
|
+
# Remove PID file
|
278
|
+
self.pid_file.unlink(missing_ok=True)
|
279
|
+
return True
|
280
|
+
|
281
|
+
except ProcessLookupError:
|
282
|
+
# Process doesn't exist, just remove PID file
|
283
|
+
self.pid_file.unlink(missing_ok=True)
|
284
|
+
return True
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
self.logger.error(f"Error killing process from PID file: {e}")
|
288
|
+
return False
|
289
|
+
|
290
|
+
def _kill_claude_mpm_processes(self) -> bool:
|
291
|
+
"""Kill any claude-mpm monitor processes.
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
True if successful, False on error
|
295
|
+
"""
|
296
|
+
try:
|
297
|
+
# Look for claude-mpm monitor processes
|
298
|
+
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, check=False)
|
299
|
+
|
300
|
+
if result.returncode != 0:
|
301
|
+
return False
|
302
|
+
|
303
|
+
lines = result.stdout.strip().split("\n")
|
304
|
+
killed_any = False
|
305
|
+
|
306
|
+
for line in lines:
|
307
|
+
if "claude" in line.lower() and "monitor" in line.lower():
|
308
|
+
parts = line.split()
|
309
|
+
if len(parts) > 1:
|
310
|
+
try:
|
311
|
+
pid = int(parts[1])
|
312
|
+
self.logger.info(
|
313
|
+
f"Killing claude-mpm monitor process {pid}"
|
314
|
+
)
|
315
|
+
os.kill(pid, signal.SIGTERM)
|
316
|
+
killed_any = True
|
317
|
+
time.sleep(0.5)
|
318
|
+
except (ValueError, ProcessLookupError):
|
319
|
+
continue
|
320
|
+
|
321
|
+
if killed_any:
|
322
|
+
time.sleep(1) # Give processes time to exit
|
323
|
+
|
324
|
+
return True
|
325
|
+
|
326
|
+
except Exception as e:
|
327
|
+
self.logger.error(f"Error killing claude-mpm processes: {e}")
|
328
|
+
return False
|
329
|
+
|
330
|
+
def is_our_service(self) -> Tuple[bool, Optional[int]]:
|
331
|
+
"""Check if the service on the port is our claude-mpm monitor.
|
332
|
+
|
333
|
+
Returns:
|
334
|
+
Tuple of (is_ours, pid) where is_ours is True if it's our service
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
# First check PID file
|
338
|
+
if self.pid_file.exists():
|
339
|
+
try:
|
340
|
+
with open(self.pid_file) as f:
|
341
|
+
pid = int(f.read().strip())
|
342
|
+
|
343
|
+
# Verify process exists
|
344
|
+
os.kill(pid, 0)
|
345
|
+
|
346
|
+
# Check if it's a Python process
|
347
|
+
process_info = subprocess.run(
|
348
|
+
["ps", "-p", str(pid), "-o", "comm="],
|
349
|
+
capture_output=True,
|
350
|
+
text=True, check=False,
|
351
|
+
)
|
352
|
+
|
353
|
+
if "python" in process_info.stdout.lower():
|
354
|
+
return True, pid
|
355
|
+
|
356
|
+
except (ValueError, ProcessLookupError, subprocess.CalledProcessError):
|
357
|
+
# PID file exists but process doesn't or isn't Python
|
358
|
+
self.pid_file.unlink(missing_ok=True)
|
359
|
+
|
360
|
+
# Check if service responds to our health endpoint
|
361
|
+
try:
|
362
|
+
import requests
|
363
|
+
|
364
|
+
response = requests.get(
|
365
|
+
f"http://{self.host}:{self.port}/health", timeout=2
|
366
|
+
)
|
367
|
+
|
368
|
+
if response.status_code == 200:
|
369
|
+
# Try to get service info
|
370
|
+
try:
|
371
|
+
data = response.json()
|
372
|
+
if "claude" in str(data).lower() or "mpm" in str(data).lower():
|
373
|
+
# It's likely our service, try to find PID
|
374
|
+
pid = self._find_service_pid()
|
375
|
+
return True, pid
|
376
|
+
except:
|
377
|
+
pass
|
378
|
+
|
379
|
+
except:
|
380
|
+
pass
|
381
|
+
|
382
|
+
return False, None
|
383
|
+
|
384
|
+
except Exception as e:
|
385
|
+
self.logger.error(f"Error checking service ownership: {e}")
|
386
|
+
return False, None
|
387
|
+
|
388
|
+
def _find_service_pid(self) -> Optional[int]:
|
389
|
+
"""Find PID of service on our port using lsof.
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
PID if found, None otherwise
|
393
|
+
"""
|
394
|
+
try:
|
395
|
+
result = subprocess.run(
|
396
|
+
["lsof", "-ti", f":{self.port}"], capture_output=True, text=True, check=False
|
397
|
+
)
|
398
|
+
|
399
|
+
if result.returncode == 0 and result.stdout.strip():
|
400
|
+
pids = result.stdout.strip().split("\n")
|
401
|
+
if pids:
|
402
|
+
return int(pids[0].strip())
|
403
|
+
|
404
|
+
except:
|
405
|
+
pass
|
406
|
+
|
407
|
+
return None
|
408
|
+
|
409
|
+
def start_daemon(self, force_restart: bool = False) -> bool:
|
410
|
+
"""Start the daemon with automatic cleanup and retry.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
force_restart: Force restart even if already running
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
True if daemon started successfully
|
417
|
+
"""
|
418
|
+
with self._lock:
|
419
|
+
# Check if already running
|
420
|
+
if self.is_running():
|
421
|
+
if not force_restart:
|
422
|
+
pid = self.get_pid()
|
423
|
+
self.logger.info(f"Daemon already running with PID {pid}")
|
424
|
+
return True
|
425
|
+
|
426
|
+
# Stop existing daemon
|
427
|
+
self.logger.info("Force restarting daemon")
|
428
|
+
if not self.stop_daemon():
|
429
|
+
self.logger.error("Failed to stop existing daemon")
|
430
|
+
return False
|
431
|
+
|
432
|
+
# Wait for cleanup
|
433
|
+
time.sleep(2)
|
434
|
+
|
435
|
+
# Clean up port conflicts
|
436
|
+
if not self.cleanup_port_conflicts():
|
437
|
+
self.logger.error(f"Cannot start daemon - port {self.port} is in use")
|
438
|
+
return False
|
439
|
+
|
440
|
+
# Daemonize the process
|
441
|
+
return self.daemonize()
|
442
|
+
|
443
|
+
def daemonize(self) -> bool:
|
444
|
+
"""Daemonize the current process.
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
True if successful (in parent), doesn't return in child
|
448
|
+
"""
|
449
|
+
try:
|
450
|
+
# Clean up asyncio event loops before forking
|
451
|
+
self._cleanup_event_loops()
|
452
|
+
|
453
|
+
# Create status file for communication
|
454
|
+
with tempfile.NamedTemporaryFile(
|
455
|
+
mode="w", delete=False, suffix=".status"
|
456
|
+
) as f:
|
457
|
+
self.startup_status_file = f.name
|
458
|
+
f.write("starting")
|
459
|
+
|
460
|
+
# First fork
|
461
|
+
pid = os.fork()
|
462
|
+
if pid > 0:
|
463
|
+
# Parent process - wait for child to confirm startup
|
464
|
+
return self._parent_wait_for_startup(pid)
|
465
|
+
|
466
|
+
except OSError as e:
|
467
|
+
self.logger.error(f"First fork failed: {e}")
|
468
|
+
return False
|
469
|
+
|
470
|
+
# Child process continues...
|
471
|
+
|
472
|
+
# Decouple from parent
|
473
|
+
os.chdir("/")
|
474
|
+
os.setsid()
|
475
|
+
os.umask(0)
|
476
|
+
|
477
|
+
try:
|
478
|
+
# Second fork
|
479
|
+
pid = os.fork()
|
480
|
+
if pid > 0:
|
481
|
+
# First child exits
|
482
|
+
sys.exit(0)
|
483
|
+
except OSError as e:
|
484
|
+
self.logger.error(f"Second fork failed: {e}")
|
485
|
+
self._report_startup_error(f"Second fork failed: {e}")
|
486
|
+
sys.exit(1)
|
487
|
+
|
488
|
+
# Grandchild process - the actual daemon
|
489
|
+
|
490
|
+
# Write PID file
|
491
|
+
self.write_pid_file()
|
492
|
+
|
493
|
+
# Redirect streams
|
494
|
+
self._redirect_streams()
|
495
|
+
|
496
|
+
# Setup signal handlers
|
497
|
+
self._setup_signal_handlers()
|
498
|
+
|
499
|
+
self.logger.info(f"Daemon process started with PID {os.getpid()}")
|
500
|
+
|
501
|
+
# Report successful startup
|
502
|
+
self._report_startup_success()
|
503
|
+
|
504
|
+
# Note: Daemon process continues running
|
505
|
+
# Caller is responsible for running the actual service
|
506
|
+
return True
|
507
|
+
|
508
|
+
def stop_daemon(self, timeout: int = 10) -> bool:
|
509
|
+
"""Stop the daemon process.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
timeout: Maximum time to wait for daemon to stop
|
513
|
+
|
514
|
+
Returns:
|
515
|
+
True if stopped successfully
|
516
|
+
"""
|
517
|
+
with self._lock:
|
518
|
+
try:
|
519
|
+
pid = self.get_pid()
|
520
|
+
if not pid:
|
521
|
+
self.logger.info("No daemon PID found")
|
522
|
+
# Still try to clean up port
|
523
|
+
self.cleanup_port_conflicts()
|
524
|
+
return True
|
525
|
+
|
526
|
+
self.logger.info(f"Stopping daemon with PID {pid}")
|
527
|
+
|
528
|
+
# Send SIGTERM for graceful shutdown
|
529
|
+
try:
|
530
|
+
os.kill(pid, signal.SIGTERM)
|
531
|
+
except ProcessLookupError:
|
532
|
+
# Process already dead
|
533
|
+
self.cleanup_pid_file()
|
534
|
+
return True
|
535
|
+
|
536
|
+
# Wait for process to exit
|
537
|
+
start_time = time.time()
|
538
|
+
while time.time() - start_time < timeout:
|
539
|
+
try:
|
540
|
+
os.kill(pid, 0) # Check if still alive
|
541
|
+
time.sleep(0.5)
|
542
|
+
except ProcessLookupError:
|
543
|
+
# Process exited
|
544
|
+
self.cleanup_pid_file()
|
545
|
+
return True
|
546
|
+
|
547
|
+
# Force kill if still running
|
548
|
+
self.logger.warning("Daemon didn't stop gracefully, force killing")
|
549
|
+
try:
|
550
|
+
os.kill(pid, signal.SIGKILL)
|
551
|
+
time.sleep(1)
|
552
|
+
except ProcessLookupError:
|
553
|
+
pass
|
554
|
+
|
555
|
+
self.cleanup_pid_file()
|
556
|
+
return True
|
557
|
+
|
558
|
+
except Exception as e:
|
559
|
+
self.logger.error(f"Error stopping daemon: {e}")
|
560
|
+
return False
|
561
|
+
|
562
|
+
def is_running(self) -> bool:
|
563
|
+
"""Check if daemon is running.
|
564
|
+
|
565
|
+
Returns:
|
566
|
+
True if daemon is running
|
567
|
+
"""
|
568
|
+
try:
|
569
|
+
pid = self.get_pid()
|
570
|
+
if not pid:
|
571
|
+
return False
|
572
|
+
|
573
|
+
# Check if process exists
|
574
|
+
os.kill(pid, 0)
|
575
|
+
return True
|
576
|
+
|
577
|
+
except ProcessLookupError:
|
578
|
+
# Process doesn't exist
|
579
|
+
self.cleanup_pid_file()
|
580
|
+
return False
|
581
|
+
|
582
|
+
def get_pid(self) -> Optional[int]:
|
583
|
+
"""Get daemon PID from PID file.
|
584
|
+
|
585
|
+
Returns:
|
586
|
+
PID if found, None otherwise
|
587
|
+
"""
|
588
|
+
try:
|
589
|
+
if not self.pid_file.exists():
|
590
|
+
return None
|
591
|
+
|
592
|
+
with open(self.pid_file) as f:
|
593
|
+
return int(f.read().strip())
|
594
|
+
|
595
|
+
except Exception as e:
|
596
|
+
self.logger.error(f"Error reading PID file: {e}")
|
597
|
+
return None
|
598
|
+
|
599
|
+
def write_pid_file(self):
|
600
|
+
"""Write current PID to PID file."""
|
601
|
+
try:
|
602
|
+
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
|
603
|
+
with open(self.pid_file, "w") as f:
|
604
|
+
f.write(str(os.getpid()))
|
605
|
+
self.logger.debug(f"PID file written: {self.pid_file}")
|
606
|
+
except Exception as e:
|
607
|
+
self.logger.error(f"Error writing PID file: {e}")
|
608
|
+
raise
|
609
|
+
|
610
|
+
def cleanup_pid_file(self):
|
611
|
+
"""Remove PID file."""
|
612
|
+
try:
|
613
|
+
self.pid_file.unlink(missing_ok=True)
|
614
|
+
self.logger.debug("PID file removed")
|
615
|
+
except Exception as e:
|
616
|
+
self.logger.error(f"Error removing PID file: {e}")
|
617
|
+
|
618
|
+
def _cleanup_event_loops(self):
|
619
|
+
"""Clean up asyncio event loops before forking."""
|
620
|
+
try:
|
621
|
+
import asyncio
|
622
|
+
|
623
|
+
try:
|
624
|
+
loop = asyncio.get_event_loop()
|
625
|
+
if loop and not loop.is_closed():
|
626
|
+
# Cancel pending tasks
|
627
|
+
pending = asyncio.all_tasks(loop)
|
628
|
+
for task in pending:
|
629
|
+
task.cancel()
|
630
|
+
|
631
|
+
# Stop and close loop
|
632
|
+
if loop.is_running():
|
633
|
+
loop.stop()
|
634
|
+
|
635
|
+
asyncio.set_event_loop(None)
|
636
|
+
loop.close()
|
637
|
+
|
638
|
+
except RuntimeError:
|
639
|
+
# No event loop
|
640
|
+
pass
|
641
|
+
|
642
|
+
except Exception as e:
|
643
|
+
self.logger.debug(f"Error cleaning up event loops: {e}")
|
644
|
+
|
645
|
+
def _redirect_streams(self):
|
646
|
+
"""Redirect standard streams for daemon mode."""
|
647
|
+
try:
|
648
|
+
sys.stdout.flush()
|
649
|
+
sys.stderr.flush()
|
650
|
+
|
651
|
+
# Redirect stdin to /dev/null
|
652
|
+
with open("/dev/null") as null_in:
|
653
|
+
os.dup2(null_in.fileno(), sys.stdin.fileno())
|
654
|
+
|
655
|
+
# Redirect stdout and stderr to log file
|
656
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
657
|
+
with open(self.log_file, "a") as log_out:
|
658
|
+
os.dup2(log_out.fileno(), sys.stdout.fileno())
|
659
|
+
os.dup2(log_out.fileno(), sys.stderr.fileno())
|
660
|
+
|
661
|
+
except Exception as e:
|
662
|
+
self.logger.error(f"Error redirecting streams: {e}")
|
663
|
+
|
664
|
+
def _setup_signal_handlers(self):
|
665
|
+
"""Setup signal handlers for graceful shutdown."""
|
666
|
+
|
667
|
+
def signal_handler(signum, frame):
|
668
|
+
self.logger.info(f"Received signal {signum}, shutting down")
|
669
|
+
self.cleanup_pid_file()
|
670
|
+
sys.exit(0)
|
671
|
+
|
672
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
673
|
+
signal.signal(signal.SIGINT, signal_handler)
|
674
|
+
|
675
|
+
def _parent_wait_for_startup(self, child_pid: int, timeout: float = 10.0) -> bool:
|
676
|
+
"""Parent process waits for child to confirm startup.
|
677
|
+
|
678
|
+
Args:
|
679
|
+
child_pid: PID of child process
|
680
|
+
timeout: Maximum time to wait
|
681
|
+
|
682
|
+
Returns:
|
683
|
+
True if child started successfully
|
684
|
+
"""
|
685
|
+
try:
|
686
|
+
start_time = time.time()
|
687
|
+
|
688
|
+
while time.time() - start_time < timeout:
|
689
|
+
if (
|
690
|
+
not self.startup_status_file
|
691
|
+
or not Path(self.startup_status_file).exists()
|
692
|
+
):
|
693
|
+
time.sleep(0.1)
|
694
|
+
continue
|
695
|
+
|
696
|
+
try:
|
697
|
+
with open(self.startup_status_file) as f:
|
698
|
+
status = f.read().strip()
|
699
|
+
|
700
|
+
if status == "success":
|
701
|
+
# Cleanup status file
|
702
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
703
|
+
return True
|
704
|
+
|
705
|
+
if status.startswith("error:"):
|
706
|
+
error_msg = status[6:]
|
707
|
+
self.logger.error(f"Daemon startup failed: {error_msg}")
|
708
|
+
Path(self.startup_status_file).unlink(missing_ok=True)
|
709
|
+
return False
|
710
|
+
|
711
|
+
except Exception:
|
712
|
+
pass
|
713
|
+
|
714
|
+
time.sleep(0.1)
|
715
|
+
|
716
|
+
self.logger.error("Daemon startup timed out")
|
717
|
+
return False
|
718
|
+
|
719
|
+
except Exception as e:
|
720
|
+
self.logger.error(f"Error waiting for daemon startup: {e}")
|
721
|
+
return False
|
722
|
+
|
723
|
+
def _report_startup_success(self):
|
724
|
+
"""Report successful startup to parent process."""
|
725
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
726
|
+
try:
|
727
|
+
with open(self.startup_status_file, "w") as f:
|
728
|
+
f.write("success")
|
729
|
+
except Exception as e:
|
730
|
+
self.logger.error(f"Error reporting startup success: {e}")
|
731
|
+
|
732
|
+
def _report_startup_error(self, error: str):
|
733
|
+
"""Report startup error to parent process."""
|
734
|
+
if self.startup_status_file and Path(self.startup_status_file).exists():
|
735
|
+
try:
|
736
|
+
with open(self.startup_status_file, "w") as f:
|
737
|
+
f.write(f"error:{error}")
|
738
|
+
except Exception as e:
|
739
|
+
self.logger.error(f"Error reporting startup error: {e}")
|
@@ -1,5 +1,5 @@
|
|
1
1
|
claude_mpm/BUILD_NUMBER,sha256=toytnNjkIKPgQaGwDqQdC1rpNTAdSEc6Vja50d7Ovug,4
|
2
|
-
claude_mpm/VERSION,sha256=
|
2
|
+
claude_mpm/VERSION,sha256=qzV1UP-rT2LC7PFssERpzaTJWlcarFF9vZy-peH0sho,7
|
3
3
|
claude_mpm/__init__.py,sha256=lyTZAYGH4DTaFGLRNWJKk5Q5oTjzN5I6AXmfVX-Jff0,1512
|
4
4
|
claude_mpm/__main__.py,sha256=Ro5UBWBoQaSAIoSqWAr7zkbLyvi4sSy28WShqAhKJG0,723
|
5
5
|
claude_mpm/constants.py,sha256=I946iCQzIIPRZVVJ8aO7lA4euiyDnNw2IX7EelAOkIE,5915
|
@@ -436,7 +436,7 @@ claude_mpm/services/cli/memory_crud_service.py,sha256=ciN9Pl_12iDAqF9zPBWOzu-iXi
|
|
436
436
|
claude_mpm/services/cli/memory_output_formatter.py,sha256=nbf7VsjGvH4e9fLv9c7PzjuO9COZhbK5P2fNZ79055w,24783
|
437
437
|
claude_mpm/services/cli/session_manager.py,sha256=rla_Stbcvt93wa9G9MCMu9UqB3FLGqlPt_eN5lQb3Gg,16599
|
438
438
|
claude_mpm/services/cli/startup_checker.py,sha256=efhuvu8ns5G16jcQ0nQZKVddmD2AktUEdlvjNcXjAuk,12232
|
439
|
-
claude_mpm/services/cli/unified_dashboard_manager.py,sha256=
|
439
|
+
claude_mpm/services/cli/unified_dashboard_manager.py,sha256=YXb3hbyukTk-yycqLAIUTYe54lt0GFI5OyH6VrLL_RI,15449
|
440
440
|
claude_mpm/services/communication/__init__.py,sha256=b4qc7_Rqy4DE9q7BAUlfUZjoYG4uimAyUnE0irPcXyU,560
|
441
441
|
claude_mpm/services/core/__init__.py,sha256=evEayLlBqJvxMZhrhuK6aagXmNrKGSj8Jm9OOxKzqvU,2195
|
442
442
|
claude_mpm/services/core/base.py,sha256=iA-F7DgGp-FJIMvQTiHQ68RkG_k-AtUWlArJPMw6ZPk,7297
|
@@ -552,7 +552,8 @@ claude_mpm/services/memory/cache/__init__.py,sha256=6M6-P8ParyxX8vOgp_IxHgLMvacr
|
|
552
552
|
claude_mpm/services/memory/cache/shared_prompt_cache.py,sha256=crnYPUT8zcS7TvoE1vW7pyaf4T77N5rJ1wUf_YQ2vvo,28704
|
553
553
|
claude_mpm/services/memory/cache/simple_cache.py,sha256=qsTjbcsPxj-kNfaod9VN_uE5NioIwpfkUin_mMVUJCg,10218
|
554
554
|
claude_mpm/services/monitor/__init__.py,sha256=X7gxSLUm9Fg_zEsX6LtCHP2ipF0qj6Emkun20h2So7g,745
|
555
|
-
claude_mpm/services/monitor/daemon.py,sha256=
|
555
|
+
claude_mpm/services/monitor/daemon.py,sha256=nkB_xslT4yxIiSVf2u6nGm56rYpkit0WDj4YPWr-osM,22961
|
556
|
+
claude_mpm/services/monitor/daemon_manager.py,sha256=6ZYXgRhwurnPDXxFgk9msLoa7x7ccE64m93FwqWVJfs,24519
|
556
557
|
claude_mpm/services/monitor/event_emitter.py,sha256=JzRLNg8PUJ5s3ulNnq_D4yqCPItvidJzu8DmFxriieQ,12224
|
557
558
|
claude_mpm/services/monitor/server.py,sha256=m98Eyv9caxRywJ4JtAdOuv5EB__z7vd2hYRZPwcqFLg,28498
|
558
559
|
claude_mpm/services/monitor/handlers/__init__.py,sha256=jgPIf4IJVERm_tAeD9834tfx9IcxtlHj5r9rhEWpkfM,701
|
@@ -643,9 +644,9 @@ claude_mpm/utils/subprocess_utils.py,sha256=zgiwLqh_17WxHpySvUPH65pb4bzIeUGOAYUJ
|
|
643
644
|
claude_mpm/validation/__init__.py,sha256=YZhwE3mhit-lslvRLuwfX82xJ_k4haZeKmh4IWaVwtk,156
|
644
645
|
claude_mpm/validation/agent_validator.py,sha256=3Lo6LK-Mw9IdnL_bd3zl_R6FkgSVDYKUUM7EeVVD3jc,20865
|
645
646
|
claude_mpm/validation/frontmatter_validator.py,sha256=u8g4Eyd_9O6ugj7Un47oSGh3kqv4wMkuks2i_CtWRvM,7028
|
646
|
-
claude_mpm-4.2.
|
647
|
-
claude_mpm-4.2.
|
648
|
-
claude_mpm-4.2.
|
649
|
-
claude_mpm-4.2.
|
650
|
-
claude_mpm-4.2.
|
651
|
-
claude_mpm-4.2.
|
647
|
+
claude_mpm-4.2.33.dist-info/licenses/LICENSE,sha256=lpaivOlPuBZW1ds05uQLJJswy8Rp_HMNieJEbFlqvLk,1072
|
648
|
+
claude_mpm-4.2.33.dist-info/METADATA,sha256=kb4EFalJKRp2DXRUfxF2WxmaTes1t6z7NFrJ8ZE6434,14451
|
649
|
+
claude_mpm-4.2.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
650
|
+
claude_mpm-4.2.33.dist-info/entry_points.txt,sha256=FDPZgz8JOvD-6iuXY2l9Zbo9zYVRuE4uz4Qr0vLeGOk,471
|
651
|
+
claude_mpm-4.2.33.dist-info/top_level.txt,sha256=1nUg3FEaBySgm8t-s54jK5zoPnu3_eY6EP6IOlekyHA,11
|
652
|
+
claude_mpm-4.2.33.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|