claude-mpm 4.2.21__py3-none-any.whl → 4.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
claude_mpm/VERSION CHANGED
@@ -1 +1 @@
1
- 4.2.21
1
+ 4.2.23
@@ -91,8 +91,11 @@ class MonitorCommand(BaseCommand):
91
91
  host=host, port=port, daemon_mode=daemon_mode
92
92
  )
93
93
 
94
+ # Get force restart flag
95
+ force_restart = getattr(args, "force", False)
96
+
94
97
  # Check if already running
95
- if self.daemon.lifecycle.is_running():
98
+ if self.daemon.lifecycle.is_running() and not force_restart:
96
99
  existing_pid = self.daemon.lifecycle.get_pid()
97
100
  return CommandResult.success_result(
98
101
  f"Unified monitor daemon already running with PID {existing_pid}",
@@ -103,14 +106,46 @@ class MonitorCommand(BaseCommand):
103
106
  },
104
107
  )
105
108
 
106
- # Start the daemon
107
- if self.daemon.start():
108
- mode_info = " in background" if daemon_mode else " in foreground"
109
+ # Start the daemon (with force restart if specified)
110
+ if self.daemon.start(force_restart=force_restart):
111
+ # For daemon mode, verify it actually started
112
+ if daemon_mode:
113
+ # Give it a moment to fully initialize
114
+ import time
115
+ time.sleep(0.5)
116
+
117
+ # Check if it's actually running
118
+ if not self.daemon.lifecycle.is_running():
119
+ return CommandResult.error_result(
120
+ "Monitor daemon failed to start. Check ~/.claude-mpm/monitor-daemon.log for details."
121
+ )
122
+
123
+ # Get the actual PID
124
+ actual_pid = self.daemon.lifecycle.get_pid()
125
+ mode_info = f" in background (PID: {actual_pid})"
126
+ else:
127
+ mode_info = " in foreground"
128
+
109
129
  return CommandResult.success_result(
110
130
  f"Unified monitor daemon started on {host}:{port}{mode_info}",
111
131
  data={"url": f"http://{host}:{port}", "port": port, "mode": mode_str},
112
132
  )
113
- return CommandResult.error_result("Failed to start unified monitor daemon")
133
+
134
+ # Check if error was due to port already in use
135
+ import socket
136
+ try:
137
+ test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
138
+ test_sock.connect((host, port))
139
+ test_sock.close()
140
+ return CommandResult.error_result(
141
+ f"Port {port} is already in use. Try 'claude-mpm monitor stop' first or use a different port."
142
+ )
143
+ except:
144
+ pass
145
+
146
+ return CommandResult.error_result(
147
+ "Failed to start unified monitor daemon. Check ~/.claude-mpm/monitor-daemon.log for details."
148
+ )
114
149
 
115
150
  def _stop_monitor(self, args) -> CommandResult:
116
151
  """Stop the unified monitor daemon."""
@@ -75,7 +75,8 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
75
75
  self._lock = threading.Lock()
76
76
 
77
77
  def start_dashboard(
78
- self, port: int = 8765, background: bool = False, open_browser: bool = True
78
+ self, port: int = 8765, background: bool = False, open_browser: bool = True,
79
+ force_restart: bool = False
79
80
  ) -> Tuple[bool, bool]:
80
81
  """
81
82
  Start the dashboard using unified daemon.
@@ -84,30 +85,41 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
84
85
  port: Port to run dashboard on
85
86
  background: Whether to run in background mode
86
87
  open_browser: Whether to open browser automatically
88
+ force_restart: If True, restart existing service if it's ours
87
89
 
88
90
  Returns:
89
91
  Tuple of (success, browser_opened)
90
92
  """
91
93
  try:
92
- # Check if already running
93
- if self.is_dashboard_running(port):
94
- self.logger.info(f"Dashboard already running on port {port}")
94
+ # Create daemon instance to check service status
95
+ daemon = UnifiedMonitorDaemon(
96
+ host="localhost", port=port, daemon_mode=background
97
+ )
98
+
99
+ # Check if it's our service running
100
+ is_ours, pid = daemon.lifecycle.is_our_service("localhost")
101
+
102
+ if is_ours and not force_restart:
103
+ # Our service is already running, just open browser if needed
104
+ self.logger.info(f"Our dashboard already running on port {port} (PID: {pid})")
95
105
  browser_opened = False
96
106
  if open_browser:
97
107
  browser_opened = self.open_browser(self.get_dashboard_url(port))
98
108
  return True, browser_opened
109
+ elif is_ours and force_restart:
110
+ self.logger.info(f"Force restarting our dashboard on port {port} (PID: {pid})")
111
+ elif self.is_dashboard_running(port) and not force_restart:
112
+ # Different service is using the port
113
+ self.logger.warning(f"Port {port} is in use by a different service")
114
+ return False, False
99
115
 
100
116
  self.logger.info(
101
- f"Starting unified dashboard on port {port} (background: {background})"
117
+ f"Starting unified dashboard on port {port} (background: {background}, force_restart: {force_restart})"
102
118
  )
103
119
 
104
120
  if background:
105
- # Start daemon in background mode
106
- daemon = UnifiedMonitorDaemon(
107
- host="localhost", port=port, daemon_mode=True
108
- )
109
-
110
- success = daemon.start()
121
+ # Start daemon in background mode with force restart if needed
122
+ success = daemon.start(force_restart=force_restart)
111
123
  if success:
112
124
  with self._lock:
113
125
  self._background_daemons[port] = daemon
@@ -293,7 +305,7 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
293
305
  return self.port_manager.find_available_port(preferred_port)
294
306
 
295
307
  def start_server(
296
- self, port: Optional[int] = None, timeout: int = 30
308
+ self, port: Optional[int] = None, timeout: int = 30, force_restart: bool = True
297
309
  ) -> Tuple[bool, DashboardInfo]:
298
310
  """
299
311
  Start the server (compatibility method for SocketIOManager interface).
@@ -301,6 +313,7 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
301
313
  Args:
302
314
  port: Port to use (finds available if None)
303
315
  timeout: Timeout for startup
316
+ force_restart: If True, restart existing service if it's ours
304
317
 
305
318
  Returns:
306
319
  Tuple of (success, DashboardInfo)
@@ -308,8 +321,9 @@ class UnifiedDashboardManager(IUnifiedDashboardManager):
308
321
  if port is None:
309
322
  port = self.find_available_port()
310
323
 
324
+ # Use force_restart to ensure we're using the latest code
311
325
  success, browser_opened = self.start_dashboard(
312
- port=port, background=True, open_browser=False
326
+ port=port, background=True, open_browser=False, force_restart=force_restart
313
327
  )
314
328
 
315
329
  if success:
@@ -57,9 +57,11 @@ class UnifiedMonitorDaemon:
57
57
  self.daemon_mode = daemon_mode
58
58
  self.logger = get_logger(__name__)
59
59
 
60
- # Daemon management
60
+ # Daemon management with port for verification
61
61
  self.lifecycle = DaemonLifecycle(
62
- pid_file=pid_file or self._get_default_pid_file(), log_file=log_file
62
+ pid_file=pid_file or self._get_default_pid_file(),
63
+ log_file=log_file,
64
+ port=port
63
65
  )
64
66
 
65
67
  # Core server
@@ -79,28 +81,82 @@ class UnifiedMonitorDaemon:
79
81
  claude_mpm_dir.mkdir(exist_ok=True)
80
82
  return str(claude_mpm_dir / "monitor-daemon.pid")
81
83
 
82
- def start(self) -> bool:
84
+ def start(self, force_restart: bool = False) -> bool:
83
85
  """Start the unified monitor daemon.
84
86
 
87
+ Args:
88
+ force_restart: If True, restart existing service if it's ours
89
+
85
90
  Returns:
86
91
  True if started successfully, False otherwise
87
92
  """
88
93
  try:
89
94
  if self.daemon_mode:
90
- return self._start_daemon()
91
- return self._start_foreground()
95
+ return self._start_daemon(force_restart=force_restart)
96
+ return self._start_foreground(force_restart=force_restart)
92
97
  except Exception as e:
93
98
  self.logger.error(f"Failed to start unified monitor daemon: {e}")
94
99
  return False
95
100
 
96
- def _start_daemon(self) -> bool:
97
- """Start as background daemon process."""
101
+ def _start_daemon(self, force_restart: bool = False) -> bool:
102
+ """Start as background daemon process.
103
+
104
+ Args:
105
+ force_restart: If True, restart existing service if it's ours
106
+ """
98
107
  self.logger.info("Starting unified monitor daemon in background mode")
99
108
 
100
109
  # Check if already running
101
110
  if self.lifecycle.is_running():
102
111
  existing_pid = self.lifecycle.get_pid()
103
- self.logger.warning(f"Daemon already running with PID {existing_pid}")
112
+
113
+ if force_restart:
114
+ # Check if it's our service
115
+ is_ours, pid = self.lifecycle.is_our_service(self.host)
116
+ if is_ours:
117
+ self.logger.info(f"Force restarting our existing daemon (PID: {pid or existing_pid})")
118
+ # Stop the existing daemon
119
+ if self.lifecycle.stop_daemon():
120
+ # Wait a moment for port to be released
121
+ time.sleep(2)
122
+ else:
123
+ self.logger.error("Failed to stop existing daemon for restart")
124
+ return False
125
+ else:
126
+ self.logger.warning(f"Daemon already running with PID {existing_pid}, but it's not our service")
127
+ return False
128
+ else:
129
+ self.logger.warning(f"Daemon already running with PID {existing_pid}")
130
+ return False
131
+
132
+ # Check for orphaned processes (service running but no PID file)
133
+ elif force_restart:
134
+ is_ours, pid = self.lifecycle.is_our_service(self.host)
135
+ if is_ours and pid:
136
+ self.logger.info(f"Found orphaned claude-mpm service (PID: {pid}), force restarting")
137
+ # Try to kill the orphaned process
138
+ try:
139
+ os.kill(pid, signal.SIGTERM)
140
+ # Wait for it to exit
141
+ for _ in range(10):
142
+ try:
143
+ os.kill(pid, 0) # Check if still exists
144
+ time.sleep(0.5)
145
+ except ProcessLookupError:
146
+ break
147
+ else:
148
+ # Force kill if still running
149
+ os.kill(pid, signal.SIGKILL)
150
+ time.sleep(1)
151
+ except Exception as e:
152
+ self.logger.error(f"Failed to kill orphaned process: {e}")
153
+ return False
154
+
155
+ # Verify port is available before forking
156
+ port_available, error_msg = self.lifecycle.verify_port_available(self.host)
157
+ if not port_available:
158
+ self.logger.error(error_msg)
159
+ print(f"Error: {error_msg}", file=sys.stderr)
104
160
  return False
105
161
 
106
162
  # Wait for any pre-warming threads to complete before forking
@@ -112,19 +168,73 @@ class UnifiedMonitorDaemon:
112
168
  return False
113
169
 
114
170
  # Start the server in daemon mode
115
- return self._run_server()
171
+ # This will run in the child process
172
+ try:
173
+ result = self._run_server()
174
+ if not result:
175
+ # Report failure before exiting
176
+ self.lifecycle._report_startup_error("Failed to start server")
177
+ return result
178
+ except Exception as e:
179
+ # Report any exceptions during startup
180
+ self.lifecycle._report_startup_error(f"Server startup exception: {e}")
181
+ raise
116
182
 
117
- def _start_foreground(self) -> bool:
118
- """Start in foreground mode."""
183
+ def _start_foreground(self, force_restart: bool = False) -> bool:
184
+ """Start in foreground mode.
185
+
186
+ Args:
187
+ force_restart: If True, restart existing service if it's ours
188
+ """
119
189
  self.logger.info(f"Starting unified monitor daemon on {self.host}:{self.port}")
120
190
 
121
191
  # Check if already running (check PID file even in foreground mode)
122
192
  if self.lifecycle.is_running():
123
193
  existing_pid = self.lifecycle.get_pid()
124
- self.logger.warning(
125
- f"Monitor daemon already running with PID {existing_pid}"
126
- )
127
- return False
194
+
195
+ if force_restart:
196
+ # Check if it's our service
197
+ is_ours, pid = self.lifecycle.is_our_service(self.host)
198
+ if is_ours:
199
+ self.logger.info(f"Force restarting our existing daemon (PID: {pid or existing_pid})")
200
+ # Stop the existing daemon
201
+ if self.lifecycle.stop_daemon():
202
+ # Wait a moment for port to be released
203
+ time.sleep(2)
204
+ else:
205
+ self.logger.error("Failed to stop existing daemon for restart")
206
+ return False
207
+ else:
208
+ self.logger.warning(f"Monitor daemon already running with PID {existing_pid}, but it's not our service")
209
+ return False
210
+ else:
211
+ self.logger.warning(
212
+ f"Monitor daemon already running with PID {existing_pid}"
213
+ )
214
+ return False
215
+
216
+ # Check for orphaned processes (service running but no PID file)
217
+ elif force_restart:
218
+ is_ours, pid = self.lifecycle.is_our_service(self.host)
219
+ if is_ours and pid:
220
+ self.logger.info(f"Found orphaned claude-mpm service (PID: {pid}), force restarting")
221
+ # Try to kill the orphaned process
222
+ try:
223
+ os.kill(pid, signal.SIGTERM)
224
+ # Wait for it to exit
225
+ for _ in range(10):
226
+ try:
227
+ os.kill(pid, 0) # Check if still exists
228
+ time.sleep(0.5)
229
+ except ProcessLookupError:
230
+ break
231
+ else:
232
+ # Force kill if still running
233
+ os.kill(pid, signal.SIGKILL)
234
+ time.sleep(1)
235
+ except Exception as e:
236
+ self.logger.error(f"Failed to kill orphaned process: {e}")
237
+ return False
128
238
 
129
239
  # Setup signal handlers for graceful shutdown
130
240
  self._setup_signal_handlers()
@@ -140,11 +250,17 @@ class UnifiedMonitorDaemon:
140
250
  try:
141
251
  # Ensure components exist before starting
142
252
  if not self.health_monitor:
143
- self.logger.error("Health monitor not initialized")
253
+ error_msg = "Health monitor not initialized"
254
+ self.logger.error(error_msg)
255
+ if self.daemon_mode:
256
+ self.lifecycle._report_startup_error(error_msg)
144
257
  return False
145
258
 
146
259
  if not self.server:
147
- self.logger.error("Server not initialized")
260
+ error_msg = "Server not initialized"
261
+ self.logger.error(error_msg)
262
+ if self.daemon_mode:
263
+ self.lifecycle._report_startup_error(error_msg)
148
264
  return False
149
265
 
150
266
  # Start health monitoring
@@ -153,11 +269,18 @@ class UnifiedMonitorDaemon:
153
269
  # Start the unified server
154
270
  success = self.server.start()
155
271
  if not success:
156
- self.logger.error("Failed to start unified monitor server")
272
+ error_msg = "Failed to start unified monitor server"
273
+ self.logger.error(error_msg)
274
+ if self.daemon_mode:
275
+ self.lifecycle._report_startup_error(error_msg)
157
276
  return False
158
277
 
159
278
  self.running = True
160
279
  self.logger.info("Unified monitor daemon started successfully")
280
+
281
+ # Report successful startup to parent (for daemon mode)
282
+ if self.daemon_mode:
283
+ self.lifecycle._report_startup_success()
161
284
 
162
285
  # Keep running until shutdown
163
286
  if self.daemon_mode:
@@ -15,10 +15,13 @@ DESIGN DECISIONS:
15
15
 
16
16
  import os
17
17
  import signal
18
+ import socket
18
19
  import sys
20
+ import tempfile
19
21
  import time
20
22
  from pathlib import Path
21
- from typing import Optional
23
+ from typing import Optional, Tuple
24
+ import json
22
25
 
23
26
  from ....core.logging_config import get_logger
24
27
 
@@ -30,16 +33,20 @@ class DaemonLifecycle:
30
33
  handling, and graceful shutdown capabilities.
31
34
  """
32
35
 
33
- def __init__(self, pid_file: str, log_file: Optional[str] = None):
36
+ def __init__(self, pid_file: str, log_file: Optional[str] = None, port: int = 8765):
34
37
  """Initialize daemon lifecycle manager.
35
38
 
36
39
  Args:
37
40
  pid_file: Path to PID file
38
41
  log_file: Path to log file for daemon mode
42
+ port: Port number for startup verification
39
43
  """
40
44
  self.pid_file = Path(pid_file)
41
45
  self.log_file = Path(log_file) if log_file else None
46
+ self.port = port
42
47
  self.logger = get_logger(__name__)
48
+ # Create a temporary file for startup status communication
49
+ self.startup_status_file = None
43
50
 
44
51
  def daemonize(self) -> bool:
45
52
  """Daemonize the current process.
@@ -50,14 +57,20 @@ class DaemonLifecycle:
50
57
  try:
51
58
  # Clean up any existing asyncio event loops before forking
52
59
  self._cleanup_event_loops()
60
+
61
+ # Create a temporary file for startup status communication
62
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.status') as f:
63
+ self.startup_status_file = f.name
64
+ f.write("starting")
53
65
 
54
66
  # First fork
55
67
  pid = os.fork()
56
68
  if pid > 0:
57
- # Parent process exits
58
- sys.exit(0)
69
+ # Parent process - wait for child to confirm startup
70
+ return self._parent_wait_for_startup(pid)
59
71
  except OSError as e:
60
72
  self.logger.error(f"First fork failed: {e}")
73
+ self._report_startup_error(f"First fork failed: {e}")
61
74
  return False
62
75
 
63
76
  # Decouple from parent environment
@@ -69,22 +82,33 @@ class DaemonLifecycle:
69
82
  # Second fork
70
83
  pid = os.fork()
71
84
  if pid > 0:
72
- # Parent process exits
85
+ # First child process exits
73
86
  sys.exit(0)
74
87
  except OSError as e:
75
88
  self.logger.error(f"Second fork failed: {e}")
89
+ self._report_startup_error(f"Second fork failed: {e}")
90
+ return False
91
+
92
+ # Set up error logging before redirecting streams
93
+ self._setup_early_error_logging()
94
+
95
+ # Write PID file first (before stream redirection)
96
+ try:
97
+ self.write_pid_file()
98
+ except Exception as e:
99
+ self._report_startup_error(f"Failed to write PID file: {e}")
76
100
  return False
77
101
 
78
102
  # Redirect standard file descriptors
79
103
  self._redirect_streams()
80
104
 
81
- # Write PID file
82
- self.write_pid_file()
83
-
84
105
  # Setup signal handlers
85
106
  self._setup_signal_handlers()
86
107
 
87
108
  self.logger.info(f"Daemon process started with PID {os.getpid()}")
109
+
110
+ # Report successful startup (after basic setup but before server start)
111
+ self._report_startup_success()
88
112
  return True
89
113
 
90
114
  def _redirect_streams(self):
@@ -105,13 +129,16 @@ class DaemonLifecycle:
105
129
  os.dup2(log_out.fileno(), sys.stdout.fileno())
106
130
  os.dup2(log_out.fileno(), sys.stderr.fileno())
107
131
  else:
108
- # Redirect to /dev/null
109
- with open("/dev/null", "w") as null_out:
110
- os.dup2(null_out.fileno(), sys.stdout.fileno())
111
- os.dup2(null_out.fileno(), sys.stderr.fileno())
132
+ # Default to a daemon log file instead of /dev/null for errors
133
+ default_log = Path.home() / ".claude-mpm" / "monitor-daemon.log"
134
+ default_log.parent.mkdir(parents=True, exist_ok=True)
135
+ with open(default_log, "a") as log_out:
136
+ os.dup2(log_out.fileno(), sys.stdout.fileno())
137
+ os.dup2(log_out.fileno(), sys.stderr.fileno())
112
138
 
113
139
  except Exception as e:
114
140
  self.logger.error(f"Error redirecting streams: {e}")
141
+ self._report_startup_error(f"Failed to redirect streams: {e}")
115
142
 
116
143
  def write_pid_file(self):
117
144
  """Write PID to PID file."""
@@ -336,3 +363,266 @@ class DaemonLifecycle:
336
363
  self.logger.debug(f"Error getting process info: {e}")
337
364
 
338
365
  return status
366
+
367
+ def _parent_wait_for_startup(self, child_pid: int, timeout: float = 10.0) -> bool:
368
+ """Parent process waits for child daemon to report startup status.
369
+
370
+ Args:
371
+ child_pid: PID of the child process
372
+ timeout: Maximum time to wait for startup
373
+
374
+ Returns:
375
+ True if child started successfully, False otherwise
376
+ """
377
+ import time
378
+ start_time = time.time()
379
+
380
+ # Wait for child to update status file
381
+ while time.time() - start_time < timeout:
382
+ try:
383
+ # Check if status file exists and read it
384
+ if self.startup_status_file and Path(self.startup_status_file).exists():
385
+ with open(self.startup_status_file, 'r') as f:
386
+ status = f.read().strip()
387
+
388
+ if status == "success":
389
+ # Child started successfully
390
+ self._cleanup_status_file()
391
+ return True
392
+ elif status.startswith("error:"):
393
+ # Child reported an error
394
+ error_msg = status[6:] # Remove "error:" prefix
395
+ self.logger.error(f"Daemon startup failed: {error_msg}")
396
+ print(f"Error: Failed to start monitor daemon: {error_msg}", file=sys.stderr)
397
+ self._cleanup_status_file()
398
+ return False
399
+ elif status == "starting":
400
+ # Still starting, continue waiting
401
+ pass
402
+
403
+ # Also check if child process is still alive
404
+ try:
405
+ os.kill(child_pid, 0) # Check if process exists
406
+ except ProcessLookupError:
407
+ # Child process died
408
+ self.logger.error("Child daemon process died during startup")
409
+ print("Error: Monitor daemon process died during startup", file=sys.stderr)
410
+ self._cleanup_status_file()
411
+ return False
412
+
413
+ except Exception as e:
414
+ self.logger.debug(f"Error checking startup status: {e}")
415
+
416
+ time.sleep(0.1) # Check every 100ms
417
+
418
+ # Timeout reached
419
+ self.logger.error(f"Daemon startup timed out after {timeout} seconds")
420
+ print(f"Error: Monitor daemon startup timed out after {timeout} seconds", file=sys.stderr)
421
+ self._cleanup_status_file()
422
+ return False
423
+
424
+ def _report_startup_success(self):
425
+ """Report successful startup to parent process."""
426
+ if self.startup_status_file:
427
+ try:
428
+ with open(self.startup_status_file, 'w') as f:
429
+ f.write("success")
430
+ except Exception as e:
431
+ self.logger.error(f"Failed to report startup success: {e}")
432
+
433
+ def _report_startup_error(self, error_msg: str):
434
+ """Report startup error to parent process.
435
+
436
+ Args:
437
+ error_msg: Error message to report
438
+ """
439
+ if self.startup_status_file:
440
+ try:
441
+ with open(self.startup_status_file, 'w') as f:
442
+ f.write(f"error:{error_msg}")
443
+ except Exception:
444
+ pass # Can't report if file write fails
445
+
446
+ def _cleanup_status_file(self):
447
+ """Clean up the temporary status file."""
448
+ if self.startup_status_file:
449
+ try:
450
+ Path(self.startup_status_file).unlink(missing_ok=True)
451
+ except Exception:
452
+ pass # Ignore cleanup errors
453
+ finally:
454
+ self.startup_status_file = None
455
+
456
+ def _setup_early_error_logging(self):
457
+ """Set up error logging before stream redirection.
458
+
459
+ This ensures we can capture and report errors that occur during
460
+ daemon initialization, especially port binding errors.
461
+ """
462
+ try:
463
+ # If no log file specified, create a default one
464
+ if not self.log_file:
465
+ default_log = Path.home() / ".claude-mpm" / "monitor-daemon.log"
466
+ default_log.parent.mkdir(parents=True, exist_ok=True)
467
+ self.log_file = default_log
468
+
469
+ # Configure logger to write to file immediately
470
+ import logging
471
+ file_handler = logging.FileHandler(self.log_file)
472
+ file_handler.setLevel(logging.DEBUG)
473
+ formatter = logging.Formatter(
474
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
475
+ )
476
+ file_handler.setFormatter(formatter)
477
+ self.logger.addHandler(file_handler)
478
+
479
+ except Exception as e:
480
+ # If we can't set up logging, at least try to report the error
481
+ self._report_startup_error(f"Failed to setup error logging: {e}")
482
+
483
+ def verify_port_available(self, host: str = "localhost") -> Tuple[bool, Optional[str]]:
484
+ """Verify that the port is available for binding.
485
+
486
+ Args:
487
+ host: Host to check port on
488
+
489
+ Returns:
490
+ Tuple of (is_available, error_message)
491
+ """
492
+ try:
493
+ test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
494
+ test_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
495
+ test_sock.bind((host, self.port))
496
+ test_sock.close()
497
+ return True, None
498
+ except OSError as e:
499
+ error_msg = f"Port {self.port} is already in use or cannot be bound: {e}"
500
+ return False, error_msg
501
+
502
+ def is_our_service(self, host: str = "localhost") -> Tuple[bool, Optional[int]]:
503
+ """Check if the service on the port is our Socket.IO service.
504
+
505
+ This uses multiple detection methods:
506
+ 1. Check health endpoint for service signature
507
+ 2. Check Socket.IO namespace availability
508
+ 3. Check process ownership if PID file exists
509
+
510
+ Args:
511
+ host: Host to check
512
+
513
+ Returns:
514
+ Tuple of (is_ours, pid_if_found)
515
+ """
516
+ try:
517
+ # Method 1: Check health endpoint
518
+ import urllib.request
519
+ import urllib.error
520
+
521
+ health_url = f"http://{host}:{self.port}/health"
522
+ try:
523
+ with urllib.request.urlopen(health_url, timeout=2) as response:
524
+ if response.status == 200:
525
+ data = json.loads(response.read().decode())
526
+ # Check for our service signature
527
+ if data.get("service") == "claude-mpm-monitor":
528
+ # Try to get PID from response
529
+ pid = data.get("pid")
530
+ if pid:
531
+ self.logger.debug(f"Found our service via health endpoint, PID: {pid}")
532
+ return True, pid
533
+ else:
534
+ # Service is ours but no PID in response
535
+ # Try to get from PID file
536
+ file_pid = self.get_pid()
537
+ self.logger.debug(f"Found our service via health endpoint, PID from file: {file_pid}")
538
+ return True, file_pid
539
+ except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError):
540
+ # Health endpoint not accessible or invalid response
541
+ pass
542
+
543
+ # Method 2: Check if PID file exists and process matches
544
+ pid = self.get_pid()
545
+ if pid:
546
+ try:
547
+ # Check if process exists
548
+ os.kill(pid, 0)
549
+
550
+ # Process exists, check if it's using our port
551
+ # This requires psutil for accurate port checking
552
+ try:
553
+ import psutil
554
+ process = psutil.Process(pid)
555
+ connections = process.connections()
556
+ for conn in connections:
557
+ if conn.laddr.port == self.port:
558
+ self.logger.debug(f"Found our service via PID file, PID: {pid}")
559
+ return True, pid
560
+ except ImportError:
561
+ # psutil not available, assume it's ours if PID matches
562
+ self.logger.debug(f"Found process with our PID file: {pid}, assuming it's ours")
563
+ return True, pid
564
+ except Exception:
565
+ pass
566
+
567
+ except (OSError, ProcessLookupError):
568
+ # Process doesn't exist
569
+ pass
570
+
571
+ # Method 3: Try Socket.IO connection to check namespace
572
+ try:
573
+ import socketio
574
+ sio_client = socketio.Client()
575
+
576
+ # Try to connect with a short timeout
577
+ connected = False
578
+ def on_connect():
579
+ nonlocal connected
580
+ connected = True
581
+
582
+ sio_client.on('connect', on_connect)
583
+
584
+ try:
585
+ sio_client.connect(f'http://{host}:{self.port}', wait_timeout=2)
586
+ if connected:
587
+ # Successfully connected to Socket.IO
588
+ sio_client.disconnect()
589
+
590
+ # Check for orphaned process (no PID file but service running)
591
+ try:
592
+ # Try to find process using the port
593
+ import psutil
594
+ for proc in psutil.process_iter(['pid', 'name']):
595
+ try:
596
+ for conn in proc.connections():
597
+ if conn.laddr.port == self.port and conn.status == 'LISTEN':
598
+ # Found process listening on our port
599
+ if 'python' in proc.name().lower():
600
+ self.logger.debug(f"Found likely orphaned claude-mpm service on port {self.port}, PID: {proc.pid}")
601
+ return True, proc.pid
602
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
603
+ continue
604
+ except ImportError:
605
+ pass
606
+
607
+ # Socket.IO service exists but can't determine if it's ours
608
+ self.logger.debug(f"Found Socket.IO service on port {self.port}, but cannot confirm ownership")
609
+ return False, None
610
+
611
+ except Exception:
612
+ pass
613
+ finally:
614
+ if sio_client.connected:
615
+ sio_client.disconnect()
616
+
617
+ except ImportError:
618
+ # socketio not available
619
+ pass
620
+ except Exception as e:
621
+ self.logger.debug(f"Error checking Socket.IO connection: {e}")
622
+
623
+ # No service detected or not ours
624
+ return False, None
625
+
626
+ except Exception as e:
627
+ self.logger.error(f"Error checking if service is ours: {e}")
628
+ return False, None
@@ -15,6 +15,7 @@ DESIGN DECISIONS:
15
15
  """
16
16
 
17
17
  import asyncio
18
+ import os
18
19
  import threading
19
20
  import time
20
21
  from datetime import datetime
@@ -79,6 +80,7 @@ class UnifiedMonitorServer:
79
80
  self.running = False
80
81
  self.loop = None
81
82
  self.server_thread = None
83
+ self.startup_error = None # Track startup errors
82
84
 
83
85
  # Heartbeat tracking
84
86
  self.heartbeat_task: Optional[asyncio.Task] = None
@@ -106,10 +108,15 @@ class UnifiedMonitorServer:
106
108
  for _ in range(50): # Wait up to 5 seconds
107
109
  if self.running:
108
110
  break
111
+ if self.startup_error:
112
+ # Server thread reported an error
113
+ self.logger.error(f"Server startup failed: {self.startup_error}")
114
+ return False
109
115
  time.sleep(0.1)
110
116
 
111
117
  if not self.running:
112
- self.logger.error("Server failed to start within timeout")
118
+ error_msg = self.startup_error or "Server failed to start within timeout"
119
+ self.logger.error(error_msg)
113
120
  return False
114
121
 
115
122
  self.logger.info("Unified monitor server started successfully")
@@ -131,8 +138,17 @@ class UnifiedMonitorServer:
131
138
  # Run the async server
132
139
  loop.run_until_complete(self._start_async_server())
133
140
 
141
+ except OSError as e:
142
+ # Specific handling for port binding errors
143
+ if "Address already in use" in str(e) or "[Errno 48]" in str(e):
144
+ self.logger.error(f"Port {self.port} is already in use: {e}")
145
+ self.startup_error = f"Port {self.port} is already in use"
146
+ else:
147
+ self.logger.error(f"OS error in server thread: {e}")
148
+ self.startup_error = str(e)
134
149
  except Exception as e:
135
150
  self.logger.error(f"Error in server thread: {e}")
151
+ self.startup_error = str(e)
136
152
  finally:
137
153
  # Always ensure loop cleanup happens
138
154
  if loop is not None:
@@ -212,11 +228,23 @@ class UnifiedMonitorServer:
212
228
  self.runner = web.AppRunner(self.app)
213
229
  await self.runner.setup()
214
230
 
215
- self.site = web.TCPSite(self.runner, self.host, self.port)
216
- await self.site.start()
217
-
218
- self.running = True
219
- self.logger.info(f"Server running on http://{self.host}:{self.port}")
231
+ try:
232
+ self.site = web.TCPSite(self.runner, self.host, self.port)
233
+ await self.site.start()
234
+
235
+ self.running = True
236
+ self.logger.info(f"Server running on http://{self.host}:{self.port}")
237
+ except OSError as e:
238
+ # Port binding error - make sure it's reported clearly
239
+ if "Address already in use" in str(e) or "[Errno 48]" in str(e):
240
+ error_msg = f"Port {self.port} is already in use. Another process may be using this port."
241
+ self.logger.error(error_msg)
242
+ self.startup_error = error_msg
243
+ raise OSError(error_msg) from e
244
+ else:
245
+ self.logger.error(f"Failed to bind to {self.host}:{self.port}: {e}")
246
+ self.startup_error = str(e)
247
+ raise
220
248
 
221
249
  # Keep the server running
222
250
  while self.running:
@@ -281,12 +309,23 @@ class UnifiedMonitorServer:
281
309
 
282
310
  # Health check
283
311
  async def health_check(request):
312
+ # Get version from VERSION file
313
+ version = "1.0.0"
314
+ try:
315
+ version_file = Path(__file__).parent.parent.parent.parent.parent / "VERSION"
316
+ if version_file.exists():
317
+ version = version_file.read_text().strip()
318
+ except Exception:
319
+ pass
320
+
284
321
  return web.json_response(
285
322
  {
286
323
  "status": "healthy",
287
- "service": "unified-monitor",
288
- "version": "1.0.0",
324
+ "service": "claude-mpm-monitor", # Important: must match what is_our_service() checks
325
+ "version": version,
289
326
  "port": self.port,
327
+ "pid": os.getpid(),
328
+ "uptime": int(time.time() - self.server_start_time),
290
329
  }
291
330
  )
292
331
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: claude-mpm
3
- Version: 4.2.21
3
+ Version: 4.2.23
4
4
  Summary: Claude Multi-Agent Project Manager - Orchestrate Claude with agent delegation and ticket tracking
5
5
  Author-email: Bob Matsuoka <bob@matsuoka.com>
6
6
  Maintainer: Claude MPM Team
@@ -149,11 +149,18 @@ pip install claude-mpm
149
149
 
150
150
  Or with pipx (recommended for isolated installation):
151
151
  ```bash
152
+ # Install with monitor support (recommended)
153
+ pipx install "claude-mpm[monitor]"
154
+
155
+ # Basic installation without monitor
152
156
  pipx install claude-mpm
157
+
153
158
  # Configure MCP for pipx users:
154
159
  claude-mpm mcp-pipx-config
155
160
  ```
156
161
 
162
+ **💡 Pipx Tip**: Use `"claude-mpm[monitor]"` to get full monitoring dashboard functionality! The `[monitor]` optional dependency includes Socket.IO and async web server components needed for real-time agent monitoring.
163
+
157
164
  **🎉 Pipx Support Now Fully Functional!** Recent improvements ensure complete compatibility:
158
165
  - ✅ Socket.IO daemon script path resolution (fixed)
159
166
  - ✅ Commands directory access (fixed)
@@ -1,5 +1,5 @@
1
1
  claude_mpm/BUILD_NUMBER,sha256=toytnNjkIKPgQaGwDqQdC1rpNTAdSEc6Vja50d7Ovug,4
2
- claude_mpm/VERSION,sha256=wsCsu3SGhnVCrJ3G8gVo7Vf1VJhypvvtmS7mEhwhYs4,7
2
+ claude_mpm/VERSION,sha256=bLmVrJAUXnac417S8JQgDFhX08Y4MIxjz0i-TIhW9zA,7
3
3
  claude_mpm/__init__.py,sha256=lyTZAYGH4DTaFGLRNWJKk5Q5oTjzN5I6AXmfVX-Jff0,1512
4
4
  claude_mpm/__main__.py,sha256=Ro5UBWBoQaSAIoSqWAr7zkbLyvi4sSy28WShqAhKJG0,723
5
5
  claude_mpm/constants.py,sha256=I946iCQzIIPRZVVJ8aO7lA4euiyDnNw2IX7EelAOkIE,5915
@@ -83,7 +83,7 @@ claude_mpm/cli/commands/mcp_pipx_config.py,sha256=sE62VD6Q1CcO2k1nlbIhHMfAJFQTZf
83
83
  claude_mpm/cli/commands/mcp_server_commands.py,sha256=-1G_2Y5ScTvzDd-kY8fTAao2H6FH7DnsLimleF1rVqQ,6197
84
84
  claude_mpm/cli/commands/mcp_tool_commands.py,sha256=q17GzlFT3JiLTrDqwPO2tz1-fKmPO5QU449syTnKTz4,1283
85
85
  claude_mpm/cli/commands/memory.py,sha256=Yzfs3_oiKciv3sfOoDm2lJL4M9idG7ARV3-sNw1ge_g,26186
86
- claude_mpm/cli/commands/monitor.py,sha256=Z7JPvx1PO4t8vUsWVZvWqdEkPtDG7E-01Bn1lUjY2sY,8213
86
+ claude_mpm/cli/commands/monitor.py,sha256=K8TNtOsdsFgzr9VePELxFnNqZOGhL5a7XFbgOpNYq0g,9621
87
87
  claude_mpm/cli/commands/mpm_init.py,sha256=lO7N91ZHn_n18XbchUUcYoyme7L5NLcXVnhWm5F_Gq8,22367
88
88
  claude_mpm/cli/commands/mpm_init_handler.py,sha256=-pCB0XL3KipqGtnta8CC7Lg5TPMwstEhMFBcgF4aaa4,2919
89
89
  claude_mpm/cli/commands/run.py,sha256=qS3eolLiDrE8EXLQJioB6kL1ONr_l0c3OE3qMUJCqbA,43489
@@ -432,7 +432,7 @@ claude_mpm/services/cli/memory_crud_service.py,sha256=ciN9Pl_12iDAqF9zPBWOzu-iXi
432
432
  claude_mpm/services/cli/memory_output_formatter.py,sha256=nbf7VsjGvH4e9fLv9c7PzjuO9COZhbK5P2fNZ79055w,24783
433
433
  claude_mpm/services/cli/session_manager.py,sha256=rla_Stbcvt93wa9G9MCMu9UqB3FLGqlPt_eN5lQb3Gg,16599
434
434
  claude_mpm/services/cli/startup_checker.py,sha256=efhuvu8ns5G16jcQ0nQZKVddmD2AktUEdlvjNcXjAuk,12232
435
- claude_mpm/services/cli/unified_dashboard_manager.py,sha256=GHLDe57UI1SMW04G6E97oc-bZPnNDv4eMXrUfll2ShE,11570
435
+ claude_mpm/services/cli/unified_dashboard_manager.py,sha256=3IRum9HH6IA4UQtTzD5l6rCngfxfqeqfTGzpKIpmZd8,12607
436
436
  claude_mpm/services/communication/__init__.py,sha256=b4qc7_Rqy4DE9q7BAUlfUZjoYG4uimAyUnE0irPcXyU,560
437
437
  claude_mpm/services/core/__init__.py,sha256=evEayLlBqJvxMZhrhuK6aagXmNrKGSj8Jm9OOxKzqvU,2195
438
438
  claude_mpm/services/core/base.py,sha256=iA-F7DgGp-FJIMvQTiHQ68RkG_k-AtUWlArJPMw6ZPk,7297
@@ -548,9 +548,9 @@ claude_mpm/services/memory/cache/__init__.py,sha256=6M6-P8ParyxX8vOgp_IxHgLMvacr
548
548
  claude_mpm/services/memory/cache/shared_prompt_cache.py,sha256=crnYPUT8zcS7TvoE1vW7pyaf4T77N5rJ1wUf_YQ2vvo,28704
549
549
  claude_mpm/services/memory/cache/simple_cache.py,sha256=qsTjbcsPxj-kNfaod9VN_uE5NioIwpfkUin_mMVUJCg,10218
550
550
  claude_mpm/services/monitor/__init__.py,sha256=X7gxSLUm9Fg_zEsX6LtCHP2ipF0qj6Emkun20h2So7g,745
551
- claude_mpm/services/monitor/daemon.py,sha256=f51ulONe_IyHpfFTFaEl93CQc5gnNT3nrQGMdb6hfo0,15351
551
+ claude_mpm/services/monitor/daemon.py,sha256=g7g7g7kZTzCowM5iNJHhkE2UYDRms_7C34Net2CpdjQ,21015
552
552
  claude_mpm/services/monitor/event_emitter.py,sha256=JzRLNg8PUJ5s3ulNnq_D4yqCPItvidJzu8DmFxriieQ,12224
553
- claude_mpm/services/monitor/server.py,sha256=Z7K4pKEbrDtEVVJzrB6QVPVhW_3eEcTaQAnpP3boJco,26401
553
+ claude_mpm/services/monitor/server.py,sha256=2-xLo14qFBZf5MDYwBxBhTrFCNnbu3tOS3eEu8vyddc,28476
554
554
  claude_mpm/services/monitor/handlers/__init__.py,sha256=jgPIf4IJVERm_tAeD9834tfx9IcxtlHj5r9rhEWpkfM,701
555
555
  claude_mpm/services/monitor/handlers/code_analysis.py,sha256=mHyI27Wp6WVmUBc0m0i991ogyFZBTvkrfR7Kf3EAk5U,11474
556
556
  claude_mpm/services/monitor/handlers/dashboard.py,sha256=uGBhb-6RG6u4WLipUXgdx7RCW-vb_qek5dIfHIwAC7o,9805
@@ -558,7 +558,7 @@ claude_mpm/services/monitor/handlers/file.py,sha256=p3C4wffl0GIcN00b-KkrmZ8F-Amd
558
558
  claude_mpm/services/monitor/handlers/hooks.py,sha256=dlrmyFu8WChlvn6-sND9DLjSbm5nrMfNZrAgoWN-2No,17582
559
559
  claude_mpm/services/monitor/management/__init__.py,sha256=mxaEFRgvvgV85gUpXu_DsnHtywihdP14EisvISAVZuQ,525
560
560
  claude_mpm/services/monitor/management/health.py,sha256=Wm92Cli_4cWD6B89KX_CdpAvvevuEaGB8Ah59ILhFww,3772
561
- claude_mpm/services/monitor/management/lifecycle.py,sha256=Cahpc1-R09ihDYVWiMI9wnv-Qw20cNhHHcJyxZ9JcBo,10575
561
+ claude_mpm/services/monitor/management/lifecycle.py,sha256=DO3UYLWN2tW1P72--kTbWCpWZBNxK1Ok2GeI_PdOgHc,23713
562
562
  claude_mpm/services/project/__init__.py,sha256=IUclN1L7ChHCNya7PJiVxu4nttxsrj3WRIpwyA1A_hw,512
563
563
  claude_mpm/services/project/analyzer.py,sha256=VHlLrP8-S5gr12w4Yzs7-6d7LWdJKISHPCFSG7SDiQU,38434
564
564
  claude_mpm/services/project/analyzer_refactored.py,sha256=USYEdPAhSoGPqZCpaT89Dw6ElFW_L1yXSURheQjAhLA,18243
@@ -639,9 +639,9 @@ claude_mpm/utils/subprocess_utils.py,sha256=zgiwLqh_17WxHpySvUPH65pb4bzIeUGOAYUJ
639
639
  claude_mpm/validation/__init__.py,sha256=YZhwE3mhit-lslvRLuwfX82xJ_k4haZeKmh4IWaVwtk,156
640
640
  claude_mpm/validation/agent_validator.py,sha256=3Lo6LK-Mw9IdnL_bd3zl_R6FkgSVDYKUUM7EeVVD3jc,20865
641
641
  claude_mpm/validation/frontmatter_validator.py,sha256=u8g4Eyd_9O6ugj7Un47oSGh3kqv4wMkuks2i_CtWRvM,7028
642
- claude_mpm-4.2.21.dist-info/licenses/LICENSE,sha256=lpaivOlPuBZW1ds05uQLJJswy8Rp_HMNieJEbFlqvLk,1072
643
- claude_mpm-4.2.21.dist-info/METADATA,sha256=79KK9Dg30eyjYMaOvrTCuyKmVtkZNKPR8gdrjJ5bp90,14110
644
- claude_mpm-4.2.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
645
- claude_mpm-4.2.21.dist-info/entry_points.txt,sha256=FDPZgz8JOvD-6iuXY2l9Zbo9zYVRuE4uz4Qr0vLeGOk,471
646
- claude_mpm-4.2.21.dist-info/top_level.txt,sha256=1nUg3FEaBySgm8t-s54jK5zoPnu3_eY6EP6IOlekyHA,11
647
- claude_mpm-4.2.21.dist-info/RECORD,,
642
+ claude_mpm-4.2.23.dist-info/licenses/LICENSE,sha256=lpaivOlPuBZW1ds05uQLJJswy8Rp_HMNieJEbFlqvLk,1072
643
+ claude_mpm-4.2.23.dist-info/METADATA,sha256=5lkYIMEIiPKrYRkOrljAj3wfsRqrBzIip_Vh9sb4eZE,14451
644
+ claude_mpm-4.2.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
645
+ claude_mpm-4.2.23.dist-info/entry_points.txt,sha256=FDPZgz8JOvD-6iuXY2l9Zbo9zYVRuE4uz4Qr0vLeGOk,471
646
+ claude_mpm-4.2.23.dist-info/top_level.txt,sha256=1nUg3FEaBySgm8t-s54jK5zoPnu3_eY6EP6IOlekyHA,11
647
+ claude_mpm-4.2.23.dist-info/RECORD,,