claude-mpm 4.1.4__py3-none-any.whl → 4.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/templates/research.json +39 -13
- claude_mpm/cli/__init__.py +2 -0
- claude_mpm/cli/commands/__init__.py +2 -0
- claude_mpm/cli/commands/configure.py +1221 -0
- claude_mpm/cli/commands/configure_tui.py +1921 -0
- claude_mpm/cli/commands/tickets.py +365 -784
- claude_mpm/cli/parsers/base_parser.py +7 -0
- claude_mpm/cli/parsers/configure_parser.py +119 -0
- claude_mpm/cli/startup_logging.py +39 -12
- claude_mpm/constants.py +1 -0
- claude_mpm/core/output_style_manager.py +24 -0
- claude_mpm/core/socketio_pool.py +35 -3
- claude_mpm/core/unified_agent_registry.py +46 -15
- claude_mpm/dashboard/static/css/connection-status.css +370 -0
- claude_mpm/dashboard/static/js/components/connection-debug.js +654 -0
- claude_mpm/dashboard/static/js/connection-manager.js +536 -0
- claude_mpm/dashboard/templates/index.html +11 -0
- claude_mpm/hooks/claude_hooks/services/__init__.py +3 -1
- claude_mpm/hooks/claude_hooks/services/connection_manager_http.py +190 -0
- claude_mpm/services/agents/deployment/agent_discovery_service.py +12 -3
- claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +172 -233
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +575 -0
- claude_mpm/services/agents/deployment/agent_operation_service.py +573 -0
- claude_mpm/services/agents/deployment/agent_record_service.py +419 -0
- claude_mpm/services/agents/deployment/agent_state_service.py +381 -0
- claude_mpm/services/agents/deployment/multi_source_deployment_service.py +4 -2
- claude_mpm/services/diagnostics/checks/__init__.py +2 -0
- claude_mpm/services/diagnostics/checks/instructions_check.py +418 -0
- claude_mpm/services/diagnostics/diagnostic_runner.py +15 -2
- claude_mpm/services/event_bus/direct_relay.py +173 -0
- claude_mpm/services/infrastructure/__init__.py +31 -5
- claude_mpm/services/infrastructure/monitoring/__init__.py +43 -0
- claude_mpm/services/infrastructure/monitoring/aggregator.py +437 -0
- claude_mpm/services/infrastructure/monitoring/base.py +130 -0
- claude_mpm/services/infrastructure/monitoring/legacy.py +203 -0
- claude_mpm/services/infrastructure/monitoring/network.py +218 -0
- claude_mpm/services/infrastructure/monitoring/process.py +342 -0
- claude_mpm/services/infrastructure/monitoring/resources.py +243 -0
- claude_mpm/services/infrastructure/monitoring/service.py +367 -0
- claude_mpm/services/infrastructure/monitoring.py +67 -1030
- claude_mpm/services/project/analyzer.py +13 -4
- claude_mpm/services/project/analyzer_refactored.py +450 -0
- claude_mpm/services/project/analyzer_v2.py +566 -0
- claude_mpm/services/project/architecture_analyzer.py +461 -0
- claude_mpm/services/project/dependency_analyzer.py +462 -0
- claude_mpm/services/project/language_analyzer.py +265 -0
- claude_mpm/services/project/metrics_collector.py +410 -0
- claude_mpm/services/socketio/handlers/connection_handler.py +345 -0
- claude_mpm/services/socketio/server/broadcaster.py +32 -1
- claude_mpm/services/socketio/server/connection_manager.py +516 -0
- claude_mpm/services/socketio/server/core.py +63 -0
- claude_mpm/services/socketio/server/eventbus_integration.py +20 -9
- claude_mpm/services/socketio/server/main.py +27 -1
- claude_mpm/services/ticket_manager.py +5 -1
- claude_mpm/services/ticket_services/__init__.py +26 -0
- claude_mpm/services/ticket_services/crud_service.py +328 -0
- claude_mpm/services/ticket_services/formatter_service.py +290 -0
- claude_mpm/services/ticket_services/search_service.py +324 -0
- claude_mpm/services/ticket_services/validation_service.py +303 -0
- claude_mpm/services/ticket_services/workflow_service.py +244 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/METADATA +3 -1
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/RECORD +67 -46
- claude_mpm/agents/OUTPUT_STYLE.md +0 -73
- claude_mpm/agents/backups/INSTRUCTIONS.md +0 -352
- claude_mpm/agents/templates/OPTIMIZATION_REPORT.md +0 -156
- claude_mpm/agents/templates/backup/data_engineer_agent_20250726_234551.json +0 -79
- claude_mpm/agents/templates/backup/documentation_agent_20250726_234551.json +0 -68
- claude_mpm/agents/templates/backup/engineer_agent_20250726_234551.json +0 -77
- claude_mpm/agents/templates/backup/ops_agent_20250726_234551.json +0 -78
- claude_mpm/agents/templates/backup/qa_agent_20250726_234551.json +0 -67
- claude_mpm/agents/templates/backup/research_agent_2025011_234551.json +0 -88
- claude_mpm/agents/templates/backup/research_agent_20250726_234551.json +0 -72
- claude_mpm/agents/templates/backup/research_memory_efficient.json +0 -88
- claude_mpm/agents/templates/backup/security_agent_20250726_234551.json +0 -78
- claude_mpm/agents/templates/backup/version_control_agent_20250726_234551.json +0 -62
- claude_mpm/agents/templates/vercel_ops_instructions.md +0 -582
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/WHEEL +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced Connection Manager for SocketIO Server.
|
|
3
|
+
|
|
4
|
+
WHY: This module provides robust connection management with state tracking,
|
|
5
|
+
health monitoring, event buffering for disconnected clients, and automatic
|
|
6
|
+
recovery from connection failures.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Centralized connection management ensures consistent handling
|
|
9
|
+
of client states, proper event delivery, and automatic recovery mechanisms.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import time
|
|
14
|
+
from collections import deque
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Deque, Dict, List, Optional
|
|
19
|
+
from uuid import uuid4
|
|
20
|
+
|
|
21
|
+
from ....core.logging_config import get_logger
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConnectionState(Enum):
|
|
25
|
+
"""Connection states for tracking client lifecycle."""
|
|
26
|
+
|
|
27
|
+
CONNECTING = "connecting"
|
|
28
|
+
CONNECTED = "connected"
|
|
29
|
+
DISCONNECTING = "disconnecting"
|
|
30
|
+
DISCONNECTED = "disconnected"
|
|
31
|
+
RECONNECTING = "reconnecting"
|
|
32
|
+
STALE = "stale" # Connected but not responding
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ConnectionMetrics:
|
|
37
|
+
"""Metrics for a single connection."""
|
|
38
|
+
|
|
39
|
+
connect_count: int = 0
|
|
40
|
+
disconnect_count: int = 0
|
|
41
|
+
reconnect_count: int = 0
|
|
42
|
+
events_sent: int = 0
|
|
43
|
+
events_acked: int = 0
|
|
44
|
+
events_buffered: int = 0
|
|
45
|
+
events_dropped: int = 0
|
|
46
|
+
last_activity: float = field(default_factory=time.time)
|
|
47
|
+
total_uptime: float = 0.0
|
|
48
|
+
total_downtime: float = 0.0
|
|
49
|
+
connection_quality: float = 1.0 # 0-1 quality score
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ClientConnection:
|
|
54
|
+
"""Represents a client connection with full state tracking."""
|
|
55
|
+
|
|
56
|
+
sid: str # Socket ID
|
|
57
|
+
client_id: str # Persistent client ID across reconnections
|
|
58
|
+
state: ConnectionState
|
|
59
|
+
connected_at: float
|
|
60
|
+
disconnected_at: Optional[float] = None
|
|
61
|
+
last_ping: Optional[float] = None
|
|
62
|
+
last_pong: Optional[float] = None
|
|
63
|
+
last_event: Optional[float] = None
|
|
64
|
+
event_buffer: Deque[Dict[str, Any]] = field(
|
|
65
|
+
default_factory=lambda: deque(maxlen=1000)
|
|
66
|
+
)
|
|
67
|
+
event_sequence: int = 0
|
|
68
|
+
last_acked_sequence: int = 0
|
|
69
|
+
pending_acks: Dict[int, Dict[str, Any]] = field(default_factory=dict)
|
|
70
|
+
metrics: ConnectionMetrics = field(default_factory=ConnectionMetrics)
|
|
71
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
72
|
+
|
|
73
|
+
def is_healthy(self, timeout: float = 90.0) -> bool:
|
|
74
|
+
"""Check if connection is healthy based on activity."""
|
|
75
|
+
if self.state != ConnectionState.CONNECTED:
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
now = time.time()
|
|
79
|
+
|
|
80
|
+
# Check last activity (ping, pong, or event)
|
|
81
|
+
last_activity = max(
|
|
82
|
+
self.last_ping or 0,
|
|
83
|
+
self.last_pong or 0,
|
|
84
|
+
self.last_event or 0,
|
|
85
|
+
self.connected_at,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return (now - last_activity) < timeout
|
|
89
|
+
|
|
90
|
+
def calculate_quality(self) -> float:
|
|
91
|
+
"""Calculate connection quality score (0-1)."""
|
|
92
|
+
if self.state != ConnectionState.CONNECTED:
|
|
93
|
+
return 0.0
|
|
94
|
+
|
|
95
|
+
# Factors for quality calculation
|
|
96
|
+
factors = []
|
|
97
|
+
|
|
98
|
+
# Reconnection rate (lower is better)
|
|
99
|
+
if self.metrics.connect_count > 0:
|
|
100
|
+
reconnect_rate = self.metrics.reconnect_count / self.metrics.connect_count
|
|
101
|
+
factors.append(1.0 - min(reconnect_rate, 1.0))
|
|
102
|
+
|
|
103
|
+
# Event acknowledgment rate
|
|
104
|
+
if self.metrics.events_sent > 0:
|
|
105
|
+
ack_rate = self.metrics.events_acked / self.metrics.events_sent
|
|
106
|
+
factors.append(ack_rate)
|
|
107
|
+
|
|
108
|
+
# Uptime ratio
|
|
109
|
+
total_time = self.metrics.total_uptime + self.metrics.total_downtime
|
|
110
|
+
if total_time > 0:
|
|
111
|
+
uptime_ratio = self.metrics.total_uptime / total_time
|
|
112
|
+
factors.append(uptime_ratio)
|
|
113
|
+
|
|
114
|
+
# Recent activity (exponential decay over 5 minutes)
|
|
115
|
+
now = time.time()
|
|
116
|
+
time_since_activity = now - self.metrics.last_activity
|
|
117
|
+
activity_score = max(0, 1.0 - (time_since_activity / 300))
|
|
118
|
+
factors.append(activity_score)
|
|
119
|
+
|
|
120
|
+
# Calculate average quality
|
|
121
|
+
if factors:
|
|
122
|
+
quality = sum(factors) / len(factors)
|
|
123
|
+
else:
|
|
124
|
+
quality = 1.0 if self.state == ConnectionState.CONNECTED else 0.0
|
|
125
|
+
|
|
126
|
+
self.metrics.connection_quality = quality
|
|
127
|
+
return quality
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ConnectionManager:
|
|
131
|
+
"""
|
|
132
|
+
Enhanced connection manager with robust state tracking and recovery.
|
|
133
|
+
|
|
134
|
+
Features:
|
|
135
|
+
- Persistent client IDs across reconnections
|
|
136
|
+
- Event buffering for disconnected clients
|
|
137
|
+
- Sequence numbers for event ordering
|
|
138
|
+
- Health monitoring with automatic stale detection
|
|
139
|
+
- Connection quality metrics
|
|
140
|
+
- Automatic event replay on reconnection
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, max_buffer_size: int = 1000, event_ttl: int = 300):
|
|
144
|
+
"""
|
|
145
|
+
Initialize connection manager.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
max_buffer_size: Maximum events to buffer per client
|
|
149
|
+
event_ttl: Time-to-live for buffered events in seconds
|
|
150
|
+
"""
|
|
151
|
+
self.logger = get_logger(__name__)
|
|
152
|
+
self.connections: Dict[str, ClientConnection] = {}
|
|
153
|
+
self.client_mapping: Dict[str, str] = {} # client_id -> current sid
|
|
154
|
+
self.max_buffer_size = max_buffer_size
|
|
155
|
+
self.event_ttl = event_ttl
|
|
156
|
+
self.global_sequence = 0
|
|
157
|
+
self.health_check_interval = 30 # seconds
|
|
158
|
+
self.stale_timeout = 90 # seconds
|
|
159
|
+
self.health_task = None
|
|
160
|
+
self._lock = asyncio.Lock()
|
|
161
|
+
|
|
162
|
+
async def register_connection(
|
|
163
|
+
self, sid: str, client_id: Optional[str] = None
|
|
164
|
+
) -> ClientConnection:
|
|
165
|
+
"""
|
|
166
|
+
Register a new connection or reconnection.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
sid: Socket ID
|
|
170
|
+
client_id: Optional persistent client ID for reconnection
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
ClientConnection object
|
|
174
|
+
"""
|
|
175
|
+
async with self._lock:
|
|
176
|
+
now = time.time()
|
|
177
|
+
|
|
178
|
+
# Check if this is a reconnection
|
|
179
|
+
if client_id and client_id in self.client_mapping:
|
|
180
|
+
old_sid = self.client_mapping[client_id]
|
|
181
|
+
if old_sid in self.connections:
|
|
182
|
+
old_conn = self.connections[old_sid]
|
|
183
|
+
|
|
184
|
+
# Create new connection with history
|
|
185
|
+
conn = ClientConnection(
|
|
186
|
+
sid=sid,
|
|
187
|
+
client_id=client_id,
|
|
188
|
+
state=ConnectionState.CONNECTED,
|
|
189
|
+
connected_at=now,
|
|
190
|
+
event_buffer=old_conn.event_buffer,
|
|
191
|
+
event_sequence=old_conn.event_sequence,
|
|
192
|
+
last_acked_sequence=old_conn.last_acked_sequence,
|
|
193
|
+
metrics=old_conn.metrics,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Update metrics
|
|
197
|
+
conn.metrics.reconnect_count += 1
|
|
198
|
+
conn.metrics.connect_count += 1
|
|
199
|
+
if old_conn.disconnected_at:
|
|
200
|
+
conn.metrics.total_downtime += now - old_conn.disconnected_at
|
|
201
|
+
|
|
202
|
+
# Clean up old connection
|
|
203
|
+
del self.connections[old_sid]
|
|
204
|
+
|
|
205
|
+
self.logger.info(
|
|
206
|
+
f"Client {client_id} reconnected (new sid: {sid}, "
|
|
207
|
+
f"buffered events: {len(conn.event_buffer)})"
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
# No old connection found, create new
|
|
211
|
+
client_id = client_id or str(uuid4())
|
|
212
|
+
conn = self._create_new_connection(sid, client_id, now)
|
|
213
|
+
else:
|
|
214
|
+
# New client
|
|
215
|
+
client_id = client_id or str(uuid4())
|
|
216
|
+
conn = self._create_new_connection(sid, client_id, now)
|
|
217
|
+
|
|
218
|
+
# Register connection
|
|
219
|
+
self.connections[sid] = conn
|
|
220
|
+
self.client_mapping[client_id] = sid
|
|
221
|
+
|
|
222
|
+
return conn
|
|
223
|
+
|
|
224
|
+
def _create_new_connection(
|
|
225
|
+
self, sid: str, client_id: str, now: float
|
|
226
|
+
) -> ClientConnection:
|
|
227
|
+
"""Create a new connection object."""
|
|
228
|
+
conn = ClientConnection(
|
|
229
|
+
sid=sid,
|
|
230
|
+
client_id=client_id,
|
|
231
|
+
state=ConnectionState.CONNECTED,
|
|
232
|
+
connected_at=now,
|
|
233
|
+
)
|
|
234
|
+
conn.metrics.connect_count = 1
|
|
235
|
+
self.logger.info(f"New client connected: {client_id} (sid: {sid})")
|
|
236
|
+
return conn
|
|
237
|
+
|
|
238
|
+
async def unregister_connection(self, sid: str, reason: str = "unknown") -> None:
|
|
239
|
+
"""
|
|
240
|
+
Unregister a connection but keep state for reconnection.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
sid: Socket ID
|
|
244
|
+
reason: Disconnection reason
|
|
245
|
+
"""
|
|
246
|
+
async with self._lock:
|
|
247
|
+
if sid not in self.connections:
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
conn = self.connections[sid]
|
|
251
|
+
now = time.time()
|
|
252
|
+
|
|
253
|
+
# Update connection state
|
|
254
|
+
conn.state = ConnectionState.DISCONNECTED
|
|
255
|
+
conn.disconnected_at = now
|
|
256
|
+
conn.metrics.disconnect_count += 1
|
|
257
|
+
|
|
258
|
+
# Update uptime
|
|
259
|
+
if conn.connected_at:
|
|
260
|
+
conn.metrics.total_uptime += now - conn.connected_at
|
|
261
|
+
|
|
262
|
+
self.logger.info(
|
|
263
|
+
f"Client {conn.client_id} disconnected (sid: {sid}, reason: {reason}, "
|
|
264
|
+
f"buffered events: {len(conn.event_buffer)})"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Keep connection for potential reconnection
|
|
268
|
+
# It will be cleaned up by health check if not reconnected
|
|
269
|
+
|
|
270
|
+
async def buffer_event(self, sid: str, event: Dict[str, Any]) -> bool:
|
|
271
|
+
"""
|
|
272
|
+
Buffer an event for a client.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
sid: Socket ID
|
|
276
|
+
event: Event to buffer
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
True if buffered successfully
|
|
280
|
+
"""
|
|
281
|
+
async with self._lock:
|
|
282
|
+
if sid not in self.connections:
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
conn = self.connections[sid]
|
|
286
|
+
|
|
287
|
+
# Add sequence number
|
|
288
|
+
self.global_sequence += 1
|
|
289
|
+
event["sequence"] = self.global_sequence
|
|
290
|
+
event["timestamp"] = time.time()
|
|
291
|
+
|
|
292
|
+
# Buffer the event
|
|
293
|
+
conn.event_buffer.append(event)
|
|
294
|
+
conn.event_sequence = self.global_sequence
|
|
295
|
+
conn.metrics.events_buffered += 1
|
|
296
|
+
|
|
297
|
+
# Drop old events if buffer is full
|
|
298
|
+
if len(conn.event_buffer) >= self.max_buffer_size:
|
|
299
|
+
conn.metrics.events_dropped += 1
|
|
300
|
+
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
async def get_replay_events(
|
|
304
|
+
self, sid: str, last_sequence: int = 0
|
|
305
|
+
) -> List[Dict[str, Any]]:
|
|
306
|
+
"""
|
|
307
|
+
Get events to replay for a client after reconnection.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
sid: Socket ID
|
|
311
|
+
last_sequence: Last sequence number received by client
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
List of events to replay
|
|
315
|
+
"""
|
|
316
|
+
async with self._lock:
|
|
317
|
+
if sid not in self.connections:
|
|
318
|
+
return []
|
|
319
|
+
|
|
320
|
+
conn = self.connections[sid]
|
|
321
|
+
now = time.time()
|
|
322
|
+
|
|
323
|
+
# Filter events by sequence and TTL
|
|
324
|
+
replay_events = []
|
|
325
|
+
for event in conn.event_buffer:
|
|
326
|
+
if event.get("sequence", 0) > last_sequence:
|
|
327
|
+
# Check TTL
|
|
328
|
+
event_age = now - event.get("timestamp", 0)
|
|
329
|
+
if event_age < self.event_ttl:
|
|
330
|
+
replay_events.append(event)
|
|
331
|
+
|
|
332
|
+
self.logger.info(
|
|
333
|
+
f"Replaying {len(replay_events)} events for {conn.client_id} "
|
|
334
|
+
f"(from sequence {last_sequence})"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return replay_events
|
|
338
|
+
|
|
339
|
+
async def acknowledge_event(self, sid: str, sequence: int) -> None:
|
|
340
|
+
"""
|
|
341
|
+
Acknowledge receipt of an event by a client.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
sid: Socket ID
|
|
345
|
+
sequence: Sequence number of acknowledged event
|
|
346
|
+
"""
|
|
347
|
+
async with self._lock:
|
|
348
|
+
if sid not in self.connections:
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
conn = self.connections[sid]
|
|
352
|
+
conn.last_acked_sequence = max(conn.last_acked_sequence, sequence)
|
|
353
|
+
conn.metrics.events_acked += 1
|
|
354
|
+
|
|
355
|
+
# Remove from pending acks
|
|
356
|
+
if sequence in conn.pending_acks:
|
|
357
|
+
del conn.pending_acks[sequence]
|
|
358
|
+
|
|
359
|
+
async def update_activity(self, sid: str, activity_type: str = "event") -> None:
|
|
360
|
+
"""
|
|
361
|
+
Update last activity time for a connection.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
sid: Socket ID
|
|
365
|
+
activity_type: Type of activity (event, ping, pong)
|
|
366
|
+
"""
|
|
367
|
+
if sid not in self.connections:
|
|
368
|
+
return
|
|
369
|
+
|
|
370
|
+
conn = self.connections[sid]
|
|
371
|
+
now = time.time()
|
|
372
|
+
|
|
373
|
+
if activity_type == "ping":
|
|
374
|
+
conn.last_ping = now
|
|
375
|
+
elif activity_type == "pong":
|
|
376
|
+
conn.last_pong = now
|
|
377
|
+
else:
|
|
378
|
+
conn.last_event = now
|
|
379
|
+
|
|
380
|
+
conn.metrics.last_activity = now
|
|
381
|
+
|
|
382
|
+
async def start_health_monitoring(self) -> None:
|
|
383
|
+
"""Start the health monitoring task."""
|
|
384
|
+
if self.health_task:
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
self.health_task = asyncio.create_task(self._health_check_loop())
|
|
388
|
+
self.logger.info("Started connection health monitoring")
|
|
389
|
+
|
|
390
|
+
async def stop_health_monitoring(self) -> None:
|
|
391
|
+
"""Stop the health monitoring task."""
|
|
392
|
+
if self.health_task:
|
|
393
|
+
self.health_task.cancel()
|
|
394
|
+
try:
|
|
395
|
+
await self.health_task
|
|
396
|
+
except asyncio.CancelledError:
|
|
397
|
+
pass
|
|
398
|
+
self.health_task = None
|
|
399
|
+
self.logger.info("Stopped connection health monitoring")
|
|
400
|
+
|
|
401
|
+
async def _health_check_loop(self) -> None:
|
|
402
|
+
"""Periodic health check for all connections."""
|
|
403
|
+
while True:
|
|
404
|
+
try:
|
|
405
|
+
await asyncio.sleep(self.health_check_interval)
|
|
406
|
+
await self.check_connection_health()
|
|
407
|
+
except asyncio.CancelledError:
|
|
408
|
+
break
|
|
409
|
+
except Exception as e:
|
|
410
|
+
self.logger.error(f"Error in health check loop: {e}")
|
|
411
|
+
|
|
412
|
+
async def check_connection_health(self) -> Dict[str, Any]:
|
|
413
|
+
"""
|
|
414
|
+
Check health of all connections and clean up stale ones.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Health status report
|
|
418
|
+
"""
|
|
419
|
+
async with self._lock:
|
|
420
|
+
now = time.time()
|
|
421
|
+
report = {
|
|
422
|
+
"timestamp": datetime.now().isoformat(),
|
|
423
|
+
"total_connections": len(self.connections),
|
|
424
|
+
"healthy": 0,
|
|
425
|
+
"stale": 0,
|
|
426
|
+
"disconnected": 0,
|
|
427
|
+
"cleaned": 0,
|
|
428
|
+
"quality_scores": {},
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
to_clean = []
|
|
432
|
+
|
|
433
|
+
for sid, conn in self.connections.items():
|
|
434
|
+
# Calculate quality
|
|
435
|
+
quality = conn.calculate_quality()
|
|
436
|
+
report["quality_scores"][conn.client_id] = quality
|
|
437
|
+
|
|
438
|
+
if conn.state == ConnectionState.CONNECTED:
|
|
439
|
+
if conn.is_healthy(self.stale_timeout):
|
|
440
|
+
report["healthy"] += 1
|
|
441
|
+
else:
|
|
442
|
+
# Mark as stale
|
|
443
|
+
conn.state = ConnectionState.STALE
|
|
444
|
+
report["stale"] += 1
|
|
445
|
+
self.logger.warning(
|
|
446
|
+
f"Connection {conn.client_id} marked as stale "
|
|
447
|
+
f"(last activity: {now - conn.metrics.last_activity:.1f}s ago)"
|
|
448
|
+
)
|
|
449
|
+
elif conn.state == ConnectionState.DISCONNECTED:
|
|
450
|
+
report["disconnected"] += 1
|
|
451
|
+
|
|
452
|
+
# Clean up old disconnected connections
|
|
453
|
+
if (
|
|
454
|
+
conn.disconnected_at
|
|
455
|
+
and (now - conn.disconnected_at) > self.event_ttl
|
|
456
|
+
):
|
|
457
|
+
to_clean.append(sid)
|
|
458
|
+
|
|
459
|
+
# Clean up old connections
|
|
460
|
+
for sid in to_clean:
|
|
461
|
+
conn = self.connections[sid]
|
|
462
|
+
del self.connections[sid]
|
|
463
|
+
if conn.client_id in self.client_mapping:
|
|
464
|
+
del self.client_mapping[conn.client_id]
|
|
465
|
+
report["cleaned"] += 1
|
|
466
|
+
self.logger.info(f"Cleaned up old connection: {conn.client_id}")
|
|
467
|
+
|
|
468
|
+
if report["stale"] > 0 or report["cleaned"] > 0:
|
|
469
|
+
self.logger.info(
|
|
470
|
+
f"Health check: {report['healthy']} healthy, "
|
|
471
|
+
f"{report['stale']} stale, {report['disconnected']} disconnected, "
|
|
472
|
+
f"{report['cleaned']} cleaned"
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
return report
|
|
476
|
+
|
|
477
|
+
def get_connection(self, sid: str) -> Optional[ClientConnection]:
|
|
478
|
+
"""Get connection by socket ID."""
|
|
479
|
+
return self.connections.get(sid)
|
|
480
|
+
|
|
481
|
+
def get_all_connections(self) -> Dict[str, ClientConnection]:
|
|
482
|
+
"""Get all connections."""
|
|
483
|
+
return self.connections.copy()
|
|
484
|
+
|
|
485
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
486
|
+
"""Get overall connection metrics."""
|
|
487
|
+
total_events_sent = sum(
|
|
488
|
+
c.metrics.events_sent for c in self.connections.values()
|
|
489
|
+
)
|
|
490
|
+
total_events_acked = sum(
|
|
491
|
+
c.metrics.events_acked for c in self.connections.values()
|
|
492
|
+
)
|
|
493
|
+
total_events_buffered = sum(
|
|
494
|
+
c.metrics.events_buffered for c in self.connections.values()
|
|
495
|
+
)
|
|
496
|
+
total_events_dropped = sum(
|
|
497
|
+
c.metrics.events_dropped for c in self.connections.values()
|
|
498
|
+
)
|
|
499
|
+
avg_quality = sum(
|
|
500
|
+
c.metrics.connection_quality for c in self.connections.values()
|
|
501
|
+
) / max(len(self.connections), 1)
|
|
502
|
+
|
|
503
|
+
return {
|
|
504
|
+
"total_connections": len(self.connections),
|
|
505
|
+
"active_connections": sum(
|
|
506
|
+
1
|
|
507
|
+
for c in self.connections.values()
|
|
508
|
+
if c.state == ConnectionState.CONNECTED
|
|
509
|
+
),
|
|
510
|
+
"total_events_sent": total_events_sent,
|
|
511
|
+
"total_events_acked": total_events_acked,
|
|
512
|
+
"total_events_buffered": total_events_buffered,
|
|
513
|
+
"total_events_dropped": total_events_dropped,
|
|
514
|
+
"average_quality": avg_quality,
|
|
515
|
+
"global_sequence": self.global_sequence,
|
|
516
|
+
}
|
|
@@ -172,6 +172,9 @@ class SocketIOServerCore:
|
|
|
172
172
|
self.app = web.Application()
|
|
173
173
|
self.sio.attach(self.app)
|
|
174
174
|
|
|
175
|
+
# Setup HTTP API endpoints for receiving events from hook handlers
|
|
176
|
+
self._setup_http_api()
|
|
177
|
+
|
|
175
178
|
# Find and serve static files
|
|
176
179
|
self._setup_static_files()
|
|
177
180
|
|
|
@@ -229,6 +232,48 @@ class SocketIOServerCore:
|
|
|
229
232
|
except Exception as e:
|
|
230
233
|
self.logger.error(f"Error stopping Socket.IO server: {e}")
|
|
231
234
|
|
|
235
|
+
def _setup_http_api(self):
|
|
236
|
+
"""Setup HTTP API endpoints for receiving events from hook handlers.
|
|
237
|
+
|
|
238
|
+
WHY: Hook handlers are ephemeral processes that spawn and die quickly.
|
|
239
|
+
Using HTTP POST allows them to send events without managing persistent
|
|
240
|
+
connections, eliminating disconnection issues.
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
async def api_events_handler(request):
|
|
244
|
+
"""Handle POST /api/events from hook handlers."""
|
|
245
|
+
try:
|
|
246
|
+
# Parse JSON payload
|
|
247
|
+
event_data = await request.json()
|
|
248
|
+
|
|
249
|
+
# Log receipt if debugging
|
|
250
|
+
event_type = event_data.get("subtype", "unknown")
|
|
251
|
+
self.logger.debug(f"Received HTTP event: {event_type}")
|
|
252
|
+
|
|
253
|
+
# Broadcast to all connected dashboard clients via SocketIO
|
|
254
|
+
if self.sio:
|
|
255
|
+
# The event is already in claude_event format from the hook handler
|
|
256
|
+
await self.sio.emit("claude_event", event_data)
|
|
257
|
+
|
|
258
|
+
# Update stats
|
|
259
|
+
self.stats["events_sent"] = self.stats.get("events_sent", 0) + 1
|
|
260
|
+
|
|
261
|
+
# Add to event buffer for late-joining clients
|
|
262
|
+
with self.buffer_lock:
|
|
263
|
+
self.event_buffer.append(event_data)
|
|
264
|
+
self.stats["events_buffered"] = len(self.event_buffer)
|
|
265
|
+
|
|
266
|
+
# Return 204 No Content for success
|
|
267
|
+
return web.Response(status=204)
|
|
268
|
+
|
|
269
|
+
except Exception as e:
|
|
270
|
+
self.logger.error(f"Error handling HTTP event: {e}")
|
|
271
|
+
return web.Response(status=500, text=str(e))
|
|
272
|
+
|
|
273
|
+
# Register the HTTP POST endpoint
|
|
274
|
+
self.app.router.add_post("/api/events", api_events_handler)
|
|
275
|
+
self.logger.info("✅ HTTP API endpoint registered at /api/events")
|
|
276
|
+
|
|
232
277
|
def _setup_static_files(self):
|
|
233
278
|
"""Setup static file serving for the dashboard."""
|
|
234
279
|
try:
|
|
@@ -261,6 +306,24 @@ class SocketIOServerCore:
|
|
|
261
306
|
|
|
262
307
|
self.app.router.add_get("/", index_handler)
|
|
263
308
|
|
|
309
|
+
# Serve the actual dashboard template at /dashboard
|
|
310
|
+
async def dashboard_handler(request):
|
|
311
|
+
dashboard_template = (
|
|
312
|
+
self.dashboard_path.parent / "templates" / "index.html"
|
|
313
|
+
)
|
|
314
|
+
if dashboard_template.exists():
|
|
315
|
+
self.logger.debug(
|
|
316
|
+
f"Serving dashboard template from: {dashboard_template}"
|
|
317
|
+
)
|
|
318
|
+
return web.FileResponse(dashboard_template)
|
|
319
|
+
# Fallback to the main index if template doesn't exist
|
|
320
|
+
self.logger.warning(
|
|
321
|
+
f"Dashboard template not found at: {dashboard_template}, falling back to index"
|
|
322
|
+
)
|
|
323
|
+
return await index_handler(request)
|
|
324
|
+
|
|
325
|
+
self.app.router.add_get("/dashboard", dashboard_handler)
|
|
326
|
+
|
|
264
327
|
# Serve version.json from dashboard directory
|
|
265
328
|
async def version_handler(request):
|
|
266
329
|
version_file = self.dashboard_path / "version.json"
|
|
@@ -10,8 +10,9 @@ WHY this integration module:
|
|
|
10
10
|
import logging
|
|
11
11
|
from typing import Optional
|
|
12
12
|
|
|
13
|
-
from claude_mpm.services.event_bus import EventBus
|
|
13
|
+
from claude_mpm.services.event_bus import EventBus
|
|
14
14
|
from claude_mpm.services.event_bus.config import get_config
|
|
15
|
+
from claude_mpm.services.event_bus.direct_relay import DirectSocketIORelay
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -33,7 +34,7 @@ class EventBusIntegration:
|
|
|
33
34
|
server_instance: Optional Socket.IO server instance
|
|
34
35
|
"""
|
|
35
36
|
self.server = server_instance
|
|
36
|
-
self.relay: Optional[
|
|
37
|
+
self.relay: Optional[DirectSocketIORelay] = None
|
|
37
38
|
self.event_bus: Optional[EventBus] = None
|
|
38
39
|
self.config = get_config()
|
|
39
40
|
self.enabled = self.config.enabled and self.config.relay_enabled
|
|
@@ -76,23 +77,33 @@ class EventBusIntegration:
|
|
|
76
77
|
# Apply configuration
|
|
77
78
|
self.config.apply_to_eventbus(self.event_bus)
|
|
78
79
|
|
|
79
|
-
# Create
|
|
80
|
-
relay_port = port or self.config.relay_port
|
|
80
|
+
# Create direct relay that uses server's broadcaster
|
|
81
81
|
print(
|
|
82
|
-
f"[{datetime.now().isoformat()}] Creating
|
|
82
|
+
f"[{datetime.now().isoformat()}] Creating DirectSocketIORelay...",
|
|
83
83
|
flush=True,
|
|
84
84
|
)
|
|
85
|
-
self.
|
|
86
|
-
|
|
85
|
+
if self.server:
|
|
86
|
+
self.relay = DirectSocketIORelay(self.server)
|
|
87
|
+
print(
|
|
88
|
+
f"[{datetime.now().isoformat()}] DirectSocketIORelay created with server instance",
|
|
89
|
+
flush=True,
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
logger.warning("No server instance provided, relay won't work")
|
|
93
|
+
print(
|
|
94
|
+
f"[{datetime.now().isoformat()}] WARNING: No server instance for relay",
|
|
95
|
+
flush=True,
|
|
96
|
+
)
|
|
97
|
+
return False
|
|
87
98
|
|
|
88
99
|
# Start the relay
|
|
89
100
|
print(f"[{datetime.now().isoformat()}] Starting relay...", flush=True)
|
|
90
101
|
self.relay.start()
|
|
91
102
|
print(f"[{datetime.now().isoformat()}] Relay started", flush=True)
|
|
92
103
|
|
|
93
|
-
logger.info(
|
|
104
|
+
logger.info("EventBus integration setup complete with DirectSocketIORelay")
|
|
94
105
|
print(
|
|
95
|
-
f"[{datetime.now().isoformat()}] EventBus integration setup complete",
|
|
106
|
+
f"[{datetime.now().isoformat()}] EventBus integration setup complete with DirectSocketIORelay",
|
|
96
107
|
flush=True,
|
|
97
108
|
)
|
|
98
109
|
return True
|