claude-mpm 4.2.6__py3-none-any.whl → 4.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,11 +12,18 @@ DESIGN DECISIONS:
12
12
  - Graceful fallbacks for missing dependencies
13
13
  """
14
14
 
15
+ import asyncio
15
16
  import glob
17
+ import json
18
+ import logging
16
19
  import os
17
20
  import sys
21
+ import time
22
+ import traceback
23
+ from collections import deque
24
+ from datetime import datetime
18
25
  from pathlib import Path
19
- from typing import Any, Dict, Optional
26
+ from typing import Any, Deque, Dict, Optional
20
27
 
21
28
  try:
22
29
  import aiohttp
@@ -30,6 +37,12 @@ except ImportError:
30
37
  aiohttp = None
31
38
  web = None
32
39
 
40
+ # Set up logging
41
+ logging.basicConfig(
42
+ level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
43
+ )
44
+ logger = logging.getLogger(__name__)
45
+
33
46
 
34
47
  def find_dashboard_files() -> Optional[Path]:
35
48
  """Find dashboard files across different installation methods."""
@@ -171,6 +184,27 @@ class StableDashboardServer:
171
184
  self.server_runner = None
172
185
  self.server_site = None
173
186
 
187
+ # Event storage with circular buffer (keep last 500 events)
188
+ self.event_history: Deque[Dict[str, Any]] = deque(maxlen=500)
189
+ self.event_count = 0
190
+ self.server_start_time = time.time()
191
+ self.last_event_time = None
192
+ self.connected_clients = set()
193
+
194
+ # Resilience features
195
+ self.retry_count = 0
196
+ self.max_retries = 3
197
+ self.health_check_failures = 0
198
+ self.is_healthy = True
199
+
200
+ # Persistent event storage (optional)
201
+ self.persist_events = (
202
+ os.environ.get("CLAUDE_MPM_PERSIST_EVENTS", "false").lower() == "true"
203
+ )
204
+ self.event_log_path = Path.home() / ".claude" / "dashboard_events.jsonl"
205
+ if self.persist_events:
206
+ self.event_log_path.parent.mkdir(parents=True, exist_ok=True)
207
+
174
208
  def setup(self) -> bool:
175
209
  """Set up the server components."""
176
210
  if not DEPENDENCIES_AVAILABLE:
@@ -179,17 +213,34 @@ class StableDashboardServer:
179
213
  )
180
214
  return False
181
215
 
182
- # Find dashboard files
183
- self.dashboard_path = find_dashboard_files()
216
+ # Find dashboard files only if not already set (for testing)
184
217
  if not self.dashboard_path:
185
- print("❌ Error: Could not find dashboard files")
186
- print("Please ensure Claude MPM is properly installed")
218
+ self.dashboard_path = find_dashboard_files()
219
+ if not self.dashboard_path:
220
+ print("❌ Error: Could not find dashboard files")
221
+ print("Please ensure Claude MPM is properly installed")
222
+ return False
223
+
224
+ # Validate that the dashboard path has the required files
225
+ template_path = self.dashboard_path / "templates" / "index.html"
226
+ static_path = self.dashboard_path / "static"
227
+
228
+ if not template_path.exists():
229
+ print(f"❌ Error: Dashboard template not found at {template_path}")
230
+ print("Please ensure Claude MPM dashboard files are properly installed")
231
+ return False
232
+
233
+ if not static_path.exists():
234
+ print(f"❌ Error: Dashboard static files not found at {static_path}")
235
+ print("Please ensure Claude MPM dashboard files are properly installed")
187
236
  return False
188
237
 
189
238
  if self.debug:
190
239
  print(f"🔍 Debug: Dashboard path resolved to: {self.dashboard_path}")
191
- print(f"🔍 Debug: Checking for required files...")
192
- template_exists = (self.dashboard_path / "templates" / "index.html").exists()
240
+ print("🔍 Debug: Checking for required files...")
241
+ template_exists = (
242
+ self.dashboard_path / "templates" / "index.html"
243
+ ).exists()
193
244
  static_exists = (self.dashboard_path / "static").exists()
194
245
  print(f" - templates/index.html: {template_exists}")
195
246
  print(f" - static directory: {static_exists}")
@@ -206,6 +257,8 @@ class StableDashboardServer:
206
257
  ping_timeout=60, # Match client's 60 second timeout
207
258
  max_http_buffer_size=1e8, # Allow larger messages
208
259
  )
260
+ # Create app WITHOUT any static file handlers to prevent directory listing
261
+ # This is critical - we only want explicit routes we define
209
262
  self.app = web.Application()
210
263
  self.sio.attach(self.app)
211
264
  print("✅ SocketIO server created and attached")
@@ -220,31 +273,54 @@ class StableDashboardServer:
220
273
 
221
274
  def _setup_routes(self):
222
275
  """Set up HTTP routes."""
276
+ # IMPORTANT: Only add explicit routes, never add static file serving for root
277
+ # This prevents aiohttp from serving directory listings
223
278
  self.app.router.add_get("/", self._serve_dashboard)
279
+ self.app.router.add_get(
280
+ "/index.html", self._serve_dashboard
281
+ ) # Also handle /index.html
224
282
  self.app.router.add_get("/static/{path:.*}", self._serve_static)
225
283
  self.app.router.add_get("/api/directory/list", self._list_directory)
226
284
  self.app.router.add_get("/api/file/read", self._read_file)
227
285
  self.app.router.add_get("/version.json", self._serve_version)
228
286
 
287
+ # New resilience endpoints
288
+ self.app.router.add_get("/health", self._health_check)
289
+ self.app.router.add_get("/api/status", self._serve_status)
290
+ self.app.router.add_get("/api/events/history", self._serve_event_history)
291
+
292
+ # CRITICAL: Add the missing /api/events endpoint for receiving events
293
+ self.app.router.add_post("/api/events", self._receive_event)
294
+
229
295
  def _setup_socketio_events(self):
230
296
  """Set up SocketIO event handlers."""
231
297
 
232
298
  @self.sio.event
233
299
  async def connect(sid, environ):
300
+ self.connected_clients.add(sid)
234
301
  if self.debug:
235
302
  print(f"✅ SocketIO client connected: {sid}")
236
- user_agent = environ.get('HTTP_USER_AGENT', 'Unknown')
303
+ user_agent = environ.get("HTTP_USER_AGENT", "Unknown")
237
304
  # Truncate long user agents for readability
238
305
  if len(user_agent) > 80:
239
306
  user_agent = user_agent[:77] + "..."
240
307
  print(f" Client info: {user_agent}")
241
- # Send a test message to confirm connection
308
+
309
+ # Send connection confirmation
242
310
  await self.sio.emit(
243
311
  "connection_test", {"status": "connected", "server": "stable"}, room=sid
244
312
  )
245
313
 
314
+ # Send recent event history to new client
315
+ if self.event_history:
316
+ # Send last 20 events to catch up new client
317
+ recent_events = list(self.event_history)[-20:]
318
+ for event in recent_events:
319
+ await self.sio.emit("claude_event", event, room=sid)
320
+
246
321
  @self.sio.event
247
322
  async def disconnect(sid):
323
+ self.connected_clients.discard(sid)
248
324
  if self.debug:
249
325
  print(f"📤 SocketIO client disconnected: {sid}")
250
326
 
@@ -262,7 +338,9 @@ class StableDashboardServer:
262
338
  response = create_mock_ast_data(file_path, file_name)
263
339
 
264
340
  if self.debug:
265
- print(f"📤 Sending analysis response: {len(response['elements'])} elements")
341
+ print(
342
+ f"📤 Sending analysis response: {len(response['elements'])} elements"
343
+ )
266
344
  await self.sio.emit("code:file:analyzed", response, room=sid)
267
345
 
268
346
  # CRITICAL: Handle the actual event name with colons that the client sends
@@ -280,7 +358,9 @@ class StableDashboardServer:
280
358
  response = create_mock_ast_data(file_path, file_name)
281
359
 
282
360
  if self.debug:
283
- print(f"📤 Sending analysis response: {len(response['elements'])} elements")
361
+ print(
362
+ f"📤 Sending analysis response: {len(response['elements'])} elements"
363
+ )
284
364
  await self.sio.emit("code:file:analyzed", response, room=sid)
285
365
 
286
366
  # Handle other events the dashboard sends
@@ -315,14 +395,190 @@ class StableDashboardServer:
315
395
  print(f"📡 Received top-level discovery request from {sid}")
316
396
  await self.sio.emit("code:top_level:discovered", {"status": "ok"}, room=sid)
317
397
 
398
+ # Mock event generator when no real events
399
+ @self.sio.event
400
+ async def request_mock_event(sid, data):
401
+ """Generate a mock event for testing."""
402
+ if self.debug:
403
+ print(f"📡 Mock event requested by {sid}")
404
+
405
+ mock_event = self._create_mock_event()
406
+ # Store and broadcast like a real event
407
+ self.event_count += 1
408
+ self.last_event_time = datetime.now()
409
+ self.event_history.append(mock_event)
410
+ await self.sio.emit("claude_event", mock_event)
411
+
412
+ def _create_mock_event(self) -> Dict[str, Any]:
413
+ """Create a mock event for testing/demo purposes."""
414
+ import random
415
+
416
+ event_types = ["file", "command", "test", "build", "deploy"]
417
+ event_subtypes = ["start", "progress", "complete", "error", "warning"]
418
+
419
+ return {
420
+ "type": random.choice(event_types),
421
+ "subtype": random.choice(event_subtypes),
422
+ "timestamp": datetime.now().isoformat(),
423
+ "source": "mock",
424
+ "data": {
425
+ "message": f"Mock {random.choice(['operation', 'task', 'process'])} {random.choice(['started', 'completed', 'in progress'])}",
426
+ "file": f"/path/to/file_{random.randint(1, 100)}.py",
427
+ "line": random.randint(1, 500),
428
+ "progress": random.randint(0, 100),
429
+ },
430
+ "session_id": "mock-session",
431
+ "server_event_id": self.event_count + 1,
432
+ }
433
+
434
+ async def _start_mock_event_generator(self):
435
+ """Start generating mock events if no real events for a while."""
436
+ try:
437
+ while True:
438
+ await asyncio.sleep(30) # Check every 30 seconds
439
+
440
+ # If no events in last 60 seconds and clients connected, generate mock
441
+ if self.connected_clients and (
442
+ not self.last_event_time
443
+ or (datetime.now() - self.last_event_time).total_seconds() > 60
444
+ ):
445
+ if self.debug:
446
+ print("⏰ No recent events, generating mock event")
447
+
448
+ mock_event = self._create_mock_event()
449
+ self.event_count += 1
450
+ self.last_event_time = datetime.now()
451
+ self.event_history.append(mock_event)
452
+
453
+ await self.sio.emit("claude_event", mock_event)
454
+ except asyncio.CancelledError:
455
+ pass
456
+ except Exception as e:
457
+ logger.error(f"Mock event generator error: {e}")
458
+
318
459
  async def _serve_dashboard(self, request):
319
- """Serve the main dashboard HTML."""
320
- dashboard_file = self.dashboard_path / "templates" / "index.html"
321
- if dashboard_file.exists():
322
- with open(dashboard_file) as f:
323
- content = f.read()
324
- return web.Response(text=content, content_type="text/html")
325
- return web.Response(text="Dashboard not found", status=404)
460
+ """Serve the main dashboard HTML with fallback."""
461
+ dashboard_file = (
462
+ self.dashboard_path / "templates" / "index.html"
463
+ if self.dashboard_path
464
+ else None
465
+ )
466
+
467
+ # Try to serve actual dashboard
468
+ if dashboard_file and dashboard_file.exists():
469
+ try:
470
+ with open(dashboard_file, encoding="utf-8") as f:
471
+ content = f.read()
472
+ return web.Response(text=content, content_type="text/html")
473
+ except Exception as e:
474
+ logger.error(f"Error reading dashboard template: {e}")
475
+ # Fall through to fallback HTML
476
+
477
+ # Fallback HTML if template missing or error
478
+ fallback_html = """
479
+ <!DOCTYPE html>
480
+ <html lang="en">
481
+ <head>
482
+ <meta charset="UTF-8">
483
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
484
+ <title>Claude MPM Dashboard - Fallback Mode</title>
485
+ <style>
486
+ body { font-family: system-ui, -apple-system, sans-serif; margin: 0; padding: 20px; background: #1e1e1e; color: #e0e0e0; }
487
+ .container { max-width: 1200px; margin: 0 auto; }
488
+ .header { background: #2d2d2d; padding: 20px; border-radius: 8px; margin-bottom: 20px; }
489
+ .status { background: #2d2d2d; padding: 15px; border-radius: 8px; margin-bottom: 20px; }
490
+ .status.healthy { border-left: 4px solid #4caf50; }
491
+ .status.degraded { border-left: 4px solid #ff9800; }
492
+ .events { background: #2d2d2d; padding: 20px; border-radius: 8px; }
493
+ .event { background: #1e1e1e; padding: 10px; margin: 10px 0; border-radius: 4px; }
494
+ h1 { color: #fff; margin: 0; }
495
+ .subtitle { color: #999; margin-top: 5px; }
496
+ .metric { display: inline-block; margin-right: 20px; }
497
+ .metric-label { color: #999; font-size: 12px; }
498
+ .metric-value { color: #fff; font-size: 20px; font-weight: bold; }
499
+ </style>
500
+ </head>
501
+ <body>
502
+ <div class="container">
503
+ <div class="header">
504
+ <h1>Claude MPM Dashboard</h1>
505
+ <div class="subtitle">Fallback Mode - Template not found</div>
506
+ </div>
507
+
508
+ <div id="status" class="status healthy">
509
+ <h3>Server Status</h3>
510
+ <div class="metric">
511
+ <div class="metric-label">Health</div>
512
+ <div class="metric-value" id="health">Loading...</div>
513
+ </div>
514
+ <div class="metric">
515
+ <div class="metric-label">Uptime</div>
516
+ <div class="metric-value" id="uptime">Loading...</div>
517
+ </div>
518
+ <div class="metric">
519
+ <div class="metric-label">Events</div>
520
+ <div class="metric-value" id="events">Loading...</div>
521
+ </div>
522
+ </div>
523
+
524
+ <div class="events">
525
+ <h3>Recent Events</h3>
526
+ <div id="event-list">
527
+ <div class="event">Waiting for events...</div>
528
+ </div>
529
+ </div>
530
+ </div>
531
+
532
+ <script src="https://cdn.socket.io/4.5.4/socket.io.min.js"></script>
533
+ <script>
534
+ // Fallback dashboard JavaScript
535
+ const socket = io();
536
+
537
+ // Update status periodically
538
+ async function updateStatus() {
539
+ try {
540
+ const response = await fetch('/api/status');
541
+ const data = await response.json();
542
+
543
+ document.getElementById('health').textContent = data.status;
544
+ document.getElementById('uptime').textContent = data.uptime.human;
545
+ document.getElementById('events').textContent = data.events.total;
546
+
547
+ const statusDiv = document.getElementById('status');
548
+ statusDiv.className = data.status === 'running' ? 'status healthy' : 'status degraded';
549
+ } catch (e) {
550
+ console.error('Failed to fetch status:', e);
551
+ }
552
+ }
553
+
554
+ // Listen for events
555
+ socket.on('claude_event', (event) => {
556
+ const eventList = document.getElementById('event-list');
557
+ const eventDiv = document.createElement('div');
558
+ eventDiv.className = 'event';
559
+ eventDiv.textContent = JSON.stringify(event, null, 2);
560
+ eventList.insertBefore(eventDiv, eventList.firstChild);
561
+
562
+ // Keep only last 10 events
563
+ while (eventList.children.length > 10) {
564
+ eventList.removeChild(eventList.lastChild);
565
+ }
566
+ });
567
+
568
+ socket.on('connect', () => {
569
+ console.log('Connected to dashboard server');
570
+ });
571
+
572
+ // Initial load and periodic updates
573
+ updateStatus();
574
+ setInterval(updateStatus, 5000);
575
+ </script>
576
+ </body>
577
+ </html>
578
+ """
579
+
580
+ logger.warning("Serving fallback dashboard HTML")
581
+ return web.Response(text=fallback_html, content_type="text/html")
326
582
 
327
583
  async def _serve_static(self, request):
328
584
  """Serve static files."""
@@ -393,6 +649,10 @@ class StableDashboardServer:
393
649
  return web.json_response({"error": "Not a file"}, status=400)
394
650
 
395
651
  try:
652
+ # Determine file type
653
+ file_ext = os.path.splitext(abs_path)[1].lower()
654
+ is_json = file_ext in [".json", ".jsonl", ".geojson"]
655
+
396
656
  # Read file with appropriate encoding
397
657
  encodings = ["utf-8", "latin-1", "cp1252"]
398
658
  content = None
@@ -408,13 +668,29 @@ class StableDashboardServer:
408
668
  if content is None:
409
669
  return web.json_response({"error": "Could not decode file"}, status=400)
410
670
 
671
+ # Format JSON files for better readability
672
+ formatted_content = content
673
+ is_valid_json = False
674
+ if is_json:
675
+ try:
676
+ import json
677
+
678
+ parsed = json.loads(content)
679
+ formatted_content = json.dumps(parsed, indent=2, sort_keys=False)
680
+ is_valid_json = True
681
+ except json.JSONDecodeError:
682
+ # Not valid JSON, return as-is
683
+ is_valid_json = False
684
+
411
685
  return web.json_response(
412
686
  {
413
687
  "path": abs_path,
414
688
  "name": os.path.basename(abs_path),
415
- "content": content,
416
- "lines": len(content.splitlines()),
689
+ "content": formatted_content,
690
+ "lines": len(formatted_content.splitlines()),
417
691
  "size": os.path.getsize(abs_path),
692
+ "type": "json" if is_json else "text",
693
+ "is_valid_json": is_valid_json,
418
694
  }
419
695
  )
420
696
 
@@ -423,36 +699,247 @@ class StableDashboardServer:
423
699
  except Exception as e:
424
700
  return web.json_response({"error": str(e)}, status=500)
425
701
 
702
+ async def _health_check(self, request):
703
+ """Health check endpoint for monitoring."""
704
+ uptime = time.time() - self.server_start_time
705
+ status = "healthy" if self.is_healthy else "degraded"
706
+
707
+ health_info = {
708
+ "status": status,
709
+ "uptime_seconds": round(uptime, 2),
710
+ "connected_clients": len(self.connected_clients),
711
+ "event_count": self.event_count,
712
+ "last_event": (
713
+ self.last_event_time.isoformat() if self.last_event_time else None
714
+ ),
715
+ "retry_count": self.retry_count,
716
+ "health_check_failures": self.health_check_failures,
717
+ "event_history_size": len(self.event_history),
718
+ }
719
+
720
+ status_code = 200 if self.is_healthy else 503
721
+ return web.json_response(health_info, status=status_code)
722
+
723
+ async def _serve_status(self, request):
724
+ """Detailed server status endpoint."""
725
+ uptime = time.time() - self.server_start_time
726
+
727
+ status_info = {
728
+ "server": "stable",
729
+ "version": "4.2.3",
730
+ "status": "running" if self.is_healthy else "degraded",
731
+ "uptime": {
732
+ "seconds": round(uptime, 2),
733
+ "human": self._format_uptime(uptime),
734
+ },
735
+ "connections": {
736
+ "active": len(self.connected_clients),
737
+ "clients": list(self.connected_clients),
738
+ },
739
+ "events": {
740
+ "total": self.event_count,
741
+ "buffered": len(self.event_history),
742
+ "last_received": (
743
+ self.last_event_time.isoformat() if self.last_event_time else None
744
+ ),
745
+ },
746
+ "features": [
747
+ "http",
748
+ "socketio",
749
+ "event_bridge",
750
+ "health_monitoring",
751
+ "auto_retry",
752
+ "event_history",
753
+ "graceful_degradation",
754
+ ],
755
+ "resilience": {
756
+ "retry_count": self.retry_count,
757
+ "max_retries": self.max_retries,
758
+ "health_failures": self.health_check_failures,
759
+ "persist_events": self.persist_events,
760
+ },
761
+ }
762
+ return web.json_response(status_info)
763
+
764
+ async def _serve_event_history(self, request):
765
+ """Serve recent event history."""
766
+ limit = int(request.query.get("limit", "100"))
767
+ events = list(self.event_history)[-limit:]
768
+ return web.json_response(
769
+ {"events": events, "count": len(events), "total_events": self.event_count}
770
+ )
771
+
772
+ async def _receive_event(self, request):
773
+ """Receive events from hook system via HTTP POST."""
774
+ try:
775
+ # Parse event data
776
+ data = await request.json()
777
+
778
+ # Add server metadata
779
+ event = {
780
+ **data,
781
+ "received_at": datetime.now().isoformat(),
782
+ "server_event_id": self.event_count + 1,
783
+ }
784
+
785
+ # Update tracking
786
+ self.event_count += 1
787
+ self.last_event_time = datetime.now()
788
+
789
+ # Store in circular buffer
790
+ self.event_history.append(event)
791
+
792
+ # Persist to disk if enabled
793
+ if self.persist_events:
794
+ try:
795
+ with open(self.event_log_path, "a") as f:
796
+ f.write(json.dumps(event) + "\n")
797
+ except Exception as e:
798
+ logger.error(f"Failed to persist event: {e}")
799
+
800
+ # Emit to all connected SocketIO clients
801
+ if self.sio and self.connected_clients:
802
+ await self.sio.emit("claude_event", event)
803
+ if self.debug:
804
+ print(
805
+ f"📡 Forwarded event to {len(self.connected_clients)} clients"
806
+ )
807
+
808
+ # Return success response
809
+ return web.json_response(
810
+ {
811
+ "status": "received",
812
+ "event_id": event["server_event_id"],
813
+ "clients_notified": len(self.connected_clients),
814
+ }
815
+ )
816
+
817
+ except json.JSONDecodeError as e:
818
+ logger.error(f"Invalid JSON in event request: {e}")
819
+ return web.json_response(
820
+ {"error": "Invalid JSON", "details": str(e)}, status=400
821
+ )
822
+ except Exception as e:
823
+ logger.error(f"Error processing event: {e}")
824
+ if self.debug:
825
+ traceback.print_exc()
826
+ return web.json_response(
827
+ {"error": "Failed to process event", "details": str(e)}, status=500
828
+ )
829
+
426
830
  async def _serve_version(self, request):
427
831
  """Serve version information."""
428
832
  version_info = {
429
- "version": "4.2.2",
833
+ "version": "4.2.3",
430
834
  "server": "stable",
431
- "features": ["http", "socketio", "mock_ast"],
432
- "status": "running",
835
+ "features": ["http", "socketio", "event_bridge", "resilience"],
836
+ "status": "running" if self.is_healthy else "degraded",
433
837
  }
434
838
  return web.json_response(version_info)
435
839
 
840
+ def _format_uptime(self, seconds: float) -> str:
841
+ """Format uptime in human-readable format."""
842
+ days = int(seconds // 86400)
843
+ hours = int((seconds % 86400) // 3600)
844
+ minutes = int((seconds % 3600) // 60)
845
+ secs = int(seconds % 60)
846
+
847
+ parts = []
848
+ if days > 0:
849
+ parts.append(f"{days}d")
850
+ if hours > 0:
851
+ parts.append(f"{hours}h")
852
+ if minutes > 0:
853
+ parts.append(f"{minutes}m")
854
+ parts.append(f"{secs}s")
855
+
856
+ return " ".join(parts)
857
+
436
858
  def run(self):
437
- """Run the server with automatic port conflict resolution."""
438
- print("🔧 Setting up server...")
439
- if not self.setup():
440
- print("❌ Server setup failed")
441
- return False
859
+ """Run the server with automatic restart on crash."""
860
+ restart_attempts = 0
861
+ max_restart_attempts = 5
862
+
863
+ while restart_attempts < max_restart_attempts:
864
+ try:
865
+ print(
866
+ f"🔧 Setting up server... (attempt {restart_attempts + 1}/{max_restart_attempts})"
867
+ )
868
+
869
+ # Reset health status on restart
870
+ self.is_healthy = True
871
+ self.health_check_failures = 0
872
+
873
+ if not self.setup():
874
+ if not DEPENDENCIES_AVAILABLE:
875
+ print("❌ Missing required dependencies")
876
+ return False
877
+
878
+ # Continue with fallback mode even if dashboard files not found
879
+ print("⚠️ Dashboard files not found - running in fallback mode")
880
+ print(
881
+ " Server will provide basic functionality and receive events"
882
+ )
883
+
884
+ # Set up minimal server without dashboard files
885
+ self.sio = socketio.AsyncServer(
886
+ cors_allowed_origins="*",
887
+ logger=self.debug,
888
+ engineio_logger=self.debug,
889
+ ping_interval=30,
890
+ ping_timeout=60,
891
+ max_http_buffer_size=1e8,
892
+ )
893
+ self.app = web.Application()
894
+ self.sio.attach(self.app)
895
+ self._setup_routes()
896
+ self._setup_socketio_events()
897
+
898
+ return self._run_with_resilience()
899
+
900
+ except Exception as e:
901
+ restart_attempts += 1
902
+ logger.error(f"Server crashed: {e}")
903
+ if self.debug:
904
+ traceback.print_exc()
905
+
906
+ if restart_attempts < max_restart_attempts:
907
+ wait_time = min(
908
+ 2**restart_attempts, 30
909
+ ) # Exponential backoff, max 30s
910
+ print(f"🔄 Restarting server in {wait_time} seconds...")
911
+ time.sleep(wait_time)
912
+ else:
913
+ print(
914
+ f"❌ Server failed after {max_restart_attempts} restart attempts"
915
+ )
916
+ return False
917
+
918
+ return False
919
+
920
+ def _run_with_resilience(self):
921
+ """Run server with port conflict resolution and error handling."""
442
922
 
443
923
  print(f"🚀 Starting stable dashboard server at http://{self.host}:{self.port}")
444
- print("✅ Server ready: HTTP + SocketIO on same port")
445
- print("🎯 This is a standalone server - no monitor service required")
446
- print("📡 SocketIO events registered:")
447
- print(" - connect/disconnect")
924
+ print("✅ Server ready: HTTP + SocketIO with resilience features")
925
+ print("🛡️ Resilience features enabled:")
926
+ print(" - Automatic restart on crash")
927
+ print(" - Health monitoring endpoint (/health)")
928
+ print(" - Event history buffer (500 events)")
929
+ print(" - Graceful degradation")
930
+ print(" - Connection retry logic")
931
+ print("📡 SocketIO events:")
932
+ print(" - claude_event (real-time events from hooks)")
448
933
  print(" - code:analyze:file (code analysis)")
449
- print(" - Various dashboard events")
450
- print("🌐 HTTP endpoints available:")
451
- print(" - GET / (dashboard)")
452
- print(" - GET /static/* (static files)")
453
- print(" - GET /api/directory/list (directory listing)")
454
- print(" - GET /api/file/read (file content)")
455
- print(" - GET /version.json (version info)")
934
+ print(" - connection management")
935
+ print("🌐 HTTP endpoints:")
936
+ print(" - GET / (dashboard)")
937
+ print(" - GET /health (health check)")
938
+ print(" - POST /api/events (receive hook events)")
939
+ print(" - GET /api/status (detailed status)")
940
+ print(" - GET /api/events/history (event history)")
941
+ print(" - GET /api/directory/list")
942
+ print(" - GET /api/file/read")
456
943
  print(f"\n🔗 Open in browser: http://{self.host}:{self.port}")
457
944
  print("\n Press Ctrl+C to stop the server\n")
458
945
 
@@ -467,19 +954,23 @@ class StableDashboardServer:
467
954
  web.run_app(self.app, host=self.host, port=self.port)
468
955
  else:
469
956
  web.run_app(
470
- self.app,
471
- host=self.host,
472
- port=self.port,
957
+ self.app,
958
+ host=self.host,
959
+ port=self.port,
473
960
  access_log=None,
474
- print=lambda *args: None # Suppress startup messages in non-debug mode
961
+ print=lambda *args: None, # Suppress startup messages in non-debug mode
475
962
  )
476
- break # Server started successfully
963
+ return True # Server started successfully
477
964
  except KeyboardInterrupt:
478
965
  print("\n🛑 Server stopped by user")
479
- break
966
+ return True
480
967
  except OSError as e:
481
968
  error_str = str(e)
482
- if "[Errno 48]" in error_str or "Address already in use" in error_str or "address already in use" in error_str.lower():
969
+ if (
970
+ "[Errno 48]" in error_str
971
+ or "Address already in use" in error_str
972
+ or "address already in use" in error_str.lower()
973
+ ):
483
974
  # Port is already in use
484
975
  if attempt < max_port_attempts - 1:
485
976
  self.port += 1
@@ -493,7 +984,9 @@ class StableDashboardServer:
493
984
  f"❌ Could not find available port after {max_port_attempts} attempts"
494
985
  )
495
986
  print(f" Ports {original_port} to {self.port} are all in use")
496
- print("\n💡 Tip: Check if another dashboard instance is running")
987
+ print(
988
+ "\n💡 Tip: Check if another dashboard instance is running"
989
+ )
497
990
  print(" You can stop it with: claude-mpm dashboard stop")
498
991
  return False
499
992
  else:
@@ -501,12 +994,14 @@ class StableDashboardServer:
501
994
  print(f"❌ Server error: {e}")
502
995
  if self.debug:
503
996
  import traceback
997
+
504
998
  traceback.print_exc()
505
999
  return False
506
1000
  except Exception as e:
507
1001
  print(f"❌ Unexpected server error: {e}")
508
1002
  if self.debug:
509
1003
  import traceback
1004
+
510
1005
  traceback.print_exc()
511
1006
  else:
512
1007
  print("\n💡 Run with --debug flag for more details")