claude-mpm 4.1.4__py3-none-any.whl → 4.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/templates/research.json +39 -13
- claude_mpm/cli/__init__.py +2 -0
- claude_mpm/cli/commands/__init__.py +2 -0
- claude_mpm/cli/commands/configure.py +1221 -0
- claude_mpm/cli/commands/configure_tui.py +1921 -0
- claude_mpm/cli/commands/tickets.py +365 -784
- claude_mpm/cli/parsers/base_parser.py +7 -0
- claude_mpm/cli/parsers/configure_parser.py +119 -0
- claude_mpm/cli/startup_logging.py +39 -12
- claude_mpm/constants.py +1 -0
- claude_mpm/core/output_style_manager.py +24 -0
- claude_mpm/core/socketio_pool.py +35 -3
- claude_mpm/core/unified_agent_registry.py +46 -15
- claude_mpm/dashboard/static/css/connection-status.css +370 -0
- claude_mpm/dashboard/static/js/components/connection-debug.js +654 -0
- claude_mpm/dashboard/static/js/connection-manager.js +536 -0
- claude_mpm/dashboard/templates/index.html +11 -0
- claude_mpm/hooks/claude_hooks/services/__init__.py +3 -1
- claude_mpm/hooks/claude_hooks/services/connection_manager_http.py +190 -0
- claude_mpm/services/agents/deployment/agent_discovery_service.py +12 -3
- claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +172 -233
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +575 -0
- claude_mpm/services/agents/deployment/agent_operation_service.py +573 -0
- claude_mpm/services/agents/deployment/agent_record_service.py +419 -0
- claude_mpm/services/agents/deployment/agent_state_service.py +381 -0
- claude_mpm/services/agents/deployment/multi_source_deployment_service.py +4 -2
- claude_mpm/services/diagnostics/checks/__init__.py +2 -0
- claude_mpm/services/diagnostics/checks/instructions_check.py +418 -0
- claude_mpm/services/diagnostics/diagnostic_runner.py +15 -2
- claude_mpm/services/event_bus/direct_relay.py +173 -0
- claude_mpm/services/infrastructure/__init__.py +31 -5
- claude_mpm/services/infrastructure/monitoring/__init__.py +43 -0
- claude_mpm/services/infrastructure/monitoring/aggregator.py +437 -0
- claude_mpm/services/infrastructure/monitoring/base.py +130 -0
- claude_mpm/services/infrastructure/monitoring/legacy.py +203 -0
- claude_mpm/services/infrastructure/monitoring/network.py +218 -0
- claude_mpm/services/infrastructure/monitoring/process.py +342 -0
- claude_mpm/services/infrastructure/monitoring/resources.py +243 -0
- claude_mpm/services/infrastructure/monitoring/service.py +367 -0
- claude_mpm/services/infrastructure/monitoring.py +67 -1030
- claude_mpm/services/project/analyzer.py +13 -4
- claude_mpm/services/project/analyzer_refactored.py +450 -0
- claude_mpm/services/project/analyzer_v2.py +566 -0
- claude_mpm/services/project/architecture_analyzer.py +461 -0
- claude_mpm/services/project/dependency_analyzer.py +462 -0
- claude_mpm/services/project/language_analyzer.py +265 -0
- claude_mpm/services/project/metrics_collector.py +410 -0
- claude_mpm/services/socketio/handlers/connection_handler.py +345 -0
- claude_mpm/services/socketio/server/broadcaster.py +32 -1
- claude_mpm/services/socketio/server/connection_manager.py +516 -0
- claude_mpm/services/socketio/server/core.py +63 -0
- claude_mpm/services/socketio/server/eventbus_integration.py +20 -9
- claude_mpm/services/socketio/server/main.py +27 -1
- claude_mpm/services/ticket_manager.py +5 -1
- claude_mpm/services/ticket_services/__init__.py +26 -0
- claude_mpm/services/ticket_services/crud_service.py +328 -0
- claude_mpm/services/ticket_services/formatter_service.py +290 -0
- claude_mpm/services/ticket_services/search_service.py +324 -0
- claude_mpm/services/ticket_services/validation_service.py +303 -0
- claude_mpm/services/ticket_services/workflow_service.py +244 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/METADATA +3 -1
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/RECORD +67 -46
- claude_mpm/agents/OUTPUT_STYLE.md +0 -73
- claude_mpm/agents/backups/INSTRUCTIONS.md +0 -352
- claude_mpm/agents/templates/OPTIMIZATION_REPORT.md +0 -156
- claude_mpm/agents/templates/backup/data_engineer_agent_20250726_234551.json +0 -79
- claude_mpm/agents/templates/backup/documentation_agent_20250726_234551.json +0 -68
- claude_mpm/agents/templates/backup/engineer_agent_20250726_234551.json +0 -77
- claude_mpm/agents/templates/backup/ops_agent_20250726_234551.json +0 -78
- claude_mpm/agents/templates/backup/qa_agent_20250726_234551.json +0 -67
- claude_mpm/agents/templates/backup/research_agent_2025011_234551.json +0 -88
- claude_mpm/agents/templates/backup/research_agent_20250726_234551.json +0 -72
- claude_mpm/agents/templates/backup/research_memory_efficient.json +0 -88
- claude_mpm/agents/templates/backup/security_agent_20250726_234551.json +0 -78
- claude_mpm/agents/templates/backup/version_control_agent_20250726_234551.json +0 -62
- claude_mpm/agents/templates/vercel_ops_instructions.md +0 -582
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/WHEEL +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"""Service health monitoring for application-level metrics.
|
|
2
|
+
|
|
3
|
+
Monitors service-specific metrics like client connections, event processing, and error rates.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
from .base import BaseMonitoringService, HealthMetric, HealthStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ServiceHealthService(BaseMonitoringService):
|
|
13
|
+
"""Service for monitoring application-level health metrics.
|
|
14
|
+
|
|
15
|
+
Monitors:
|
|
16
|
+
- Connected clients count
|
|
17
|
+
- Event processing rate
|
|
18
|
+
- Error rates
|
|
19
|
+
- Response times
|
|
20
|
+
- Service activity
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
service_stats: Dict[str, Any],
|
|
26
|
+
max_clients: int = 1000,
|
|
27
|
+
max_error_rate: float = 0.1,
|
|
28
|
+
stale_activity_seconds: int = 300,
|
|
29
|
+
):
|
|
30
|
+
"""Initialize service health monitoring.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
service_stats: Reference to service statistics dictionary
|
|
34
|
+
max_clients: Maximum allowed connected clients
|
|
35
|
+
max_error_rate: Maximum allowed error rate (0.0-1.0)
|
|
36
|
+
stale_activity_seconds: Seconds before activity is considered stale
|
|
37
|
+
"""
|
|
38
|
+
super().__init__("ServiceHealth")
|
|
39
|
+
self.service_stats = service_stats
|
|
40
|
+
self.max_clients = max_clients
|
|
41
|
+
self.max_error_rate = max_error_rate
|
|
42
|
+
self.stale_activity_seconds = stale_activity_seconds
|
|
43
|
+
|
|
44
|
+
# Rate calculation state
|
|
45
|
+
self.last_check_time = time.time()
|
|
46
|
+
self.last_events_processed = 0
|
|
47
|
+
self.last_errors = 0
|
|
48
|
+
|
|
49
|
+
async def check_health(self) -> List[HealthMetric]:
|
|
50
|
+
"""Check service-specific health metrics."""
|
|
51
|
+
metrics = []
|
|
52
|
+
current_time = time.time()
|
|
53
|
+
|
|
54
|
+
# Connected clients
|
|
55
|
+
metrics.extend(self._check_client_connections())
|
|
56
|
+
|
|
57
|
+
# Event processing
|
|
58
|
+
metrics.extend(self._check_event_processing(current_time))
|
|
59
|
+
|
|
60
|
+
# Error rates
|
|
61
|
+
metrics.extend(self._check_error_rates())
|
|
62
|
+
|
|
63
|
+
# Service activity
|
|
64
|
+
metrics.extend(self._check_service_activity(current_time))
|
|
65
|
+
|
|
66
|
+
# Response times (if available)
|
|
67
|
+
metrics.extend(self._check_response_times())
|
|
68
|
+
|
|
69
|
+
# Update state for next check
|
|
70
|
+
self.last_check_time = current_time
|
|
71
|
+
|
|
72
|
+
return metrics
|
|
73
|
+
|
|
74
|
+
def _check_client_connections(self) -> List[HealthMetric]:
|
|
75
|
+
"""Check client connection metrics."""
|
|
76
|
+
metrics = []
|
|
77
|
+
try:
|
|
78
|
+
client_count = self.service_stats.get("clients_connected", 0)
|
|
79
|
+
|
|
80
|
+
# Determine status based on thresholds
|
|
81
|
+
if client_count > self.max_clients:
|
|
82
|
+
client_status = HealthStatus.CRITICAL
|
|
83
|
+
elif client_count > self.max_clients * 0.8:
|
|
84
|
+
client_status = HealthStatus.WARNING
|
|
85
|
+
else:
|
|
86
|
+
client_status = HealthStatus.HEALTHY
|
|
87
|
+
|
|
88
|
+
metrics.append(
|
|
89
|
+
HealthMetric(
|
|
90
|
+
name="connected_clients",
|
|
91
|
+
value=client_count,
|
|
92
|
+
status=client_status,
|
|
93
|
+
threshold=self.max_clients,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Client connection rate (if available)
|
|
98
|
+
if "connection_rate" in self.service_stats:
|
|
99
|
+
metrics.append(
|
|
100
|
+
HealthMetric(
|
|
101
|
+
name="connection_rate",
|
|
102
|
+
value=self.service_stats["connection_rate"],
|
|
103
|
+
status=HealthStatus.HEALTHY,
|
|
104
|
+
unit="connections/sec",
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
metrics.append(
|
|
109
|
+
HealthMetric(
|
|
110
|
+
name="connected_clients",
|
|
111
|
+
value=-1,
|
|
112
|
+
status=HealthStatus.UNKNOWN,
|
|
113
|
+
message=f"Failed to get client count: {e}",
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
return metrics
|
|
117
|
+
|
|
118
|
+
def _check_event_processing(self, current_time: float) -> List[HealthMetric]:
|
|
119
|
+
"""Check event processing metrics."""
|
|
120
|
+
metrics = []
|
|
121
|
+
try:
|
|
122
|
+
events_processed = self.service_stats.get("events_processed", 0)
|
|
123
|
+
|
|
124
|
+
# Calculate processing rate
|
|
125
|
+
time_diff = current_time - self.last_check_time
|
|
126
|
+
if time_diff > 0 and self.last_events_processed > 0:
|
|
127
|
+
event_rate = (events_processed - self.last_events_processed) / time_diff
|
|
128
|
+
|
|
129
|
+
# Determine status based on rate
|
|
130
|
+
rate_status = HealthStatus.HEALTHY
|
|
131
|
+
if event_rate == 0 and events_processed > 0:
|
|
132
|
+
rate_status = HealthStatus.WARNING # Processing stopped
|
|
133
|
+
|
|
134
|
+
metrics.append(
|
|
135
|
+
HealthMetric(
|
|
136
|
+
name="event_processing_rate",
|
|
137
|
+
value=round(event_rate, 2),
|
|
138
|
+
status=rate_status,
|
|
139
|
+
unit="events/sec",
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Update for next calculation
|
|
144
|
+
self.last_events_processed = events_processed
|
|
145
|
+
|
|
146
|
+
# Total events processed
|
|
147
|
+
metrics.append(
|
|
148
|
+
HealthMetric(
|
|
149
|
+
name="total_events_processed",
|
|
150
|
+
value=events_processed,
|
|
151
|
+
status=HealthStatus.HEALTHY,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Event queue size (if available)
|
|
156
|
+
if "event_queue_size" in self.service_stats:
|
|
157
|
+
queue_size = self.service_stats["event_queue_size"]
|
|
158
|
+
queue_status = HealthStatus.HEALTHY
|
|
159
|
+
if queue_size > 1000:
|
|
160
|
+
queue_status = HealthStatus.WARNING
|
|
161
|
+
if queue_size > 5000:
|
|
162
|
+
queue_status = HealthStatus.CRITICAL
|
|
163
|
+
|
|
164
|
+
metrics.append(
|
|
165
|
+
HealthMetric(
|
|
166
|
+
name="event_queue_size",
|
|
167
|
+
value=queue_size,
|
|
168
|
+
status=queue_status,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
except Exception as e:
|
|
172
|
+
metrics.append(
|
|
173
|
+
HealthMetric(
|
|
174
|
+
name="event_processing_rate",
|
|
175
|
+
value=-1,
|
|
176
|
+
status=HealthStatus.UNKNOWN,
|
|
177
|
+
message=f"Failed to calculate event rate: {e}",
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
return metrics
|
|
181
|
+
|
|
182
|
+
def _check_error_rates(self) -> List[HealthMetric]:
|
|
183
|
+
"""Check error rate metrics."""
|
|
184
|
+
metrics = []
|
|
185
|
+
try:
|
|
186
|
+
errors = self.service_stats.get("errors", 0)
|
|
187
|
+
total_events = self.service_stats.get("events_processed", 1)
|
|
188
|
+
|
|
189
|
+
# Calculate error rate
|
|
190
|
+
error_rate = errors / max(total_events, 1)
|
|
191
|
+
|
|
192
|
+
# Determine status based on rate
|
|
193
|
+
if error_rate > self.max_error_rate:
|
|
194
|
+
error_status = HealthStatus.CRITICAL
|
|
195
|
+
elif error_rate > self.max_error_rate * 0.5:
|
|
196
|
+
error_status = HealthStatus.WARNING
|
|
197
|
+
else:
|
|
198
|
+
error_status = HealthStatus.HEALTHY
|
|
199
|
+
|
|
200
|
+
metrics.append(
|
|
201
|
+
HealthMetric(
|
|
202
|
+
name="error_rate",
|
|
203
|
+
value=round(error_rate, 4),
|
|
204
|
+
status=error_status,
|
|
205
|
+
threshold=self.max_error_rate,
|
|
206
|
+
unit="ratio",
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Total errors
|
|
211
|
+
metrics.append(
|
|
212
|
+
HealthMetric(
|
|
213
|
+
name="total_errors",
|
|
214
|
+
value=errors,
|
|
215
|
+
status=(
|
|
216
|
+
HealthStatus.HEALTHY if errors == 0 else HealthStatus.WARNING
|
|
217
|
+
),
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Recent error rate (errors in last check period)
|
|
222
|
+
if self.last_errors is not None:
|
|
223
|
+
recent_errors = errors - self.last_errors
|
|
224
|
+
metrics.append(
|
|
225
|
+
HealthMetric(
|
|
226
|
+
name="recent_errors",
|
|
227
|
+
value=recent_errors,
|
|
228
|
+
status=(
|
|
229
|
+
HealthStatus.HEALTHY
|
|
230
|
+
if recent_errors == 0
|
|
231
|
+
else HealthStatus.WARNING
|
|
232
|
+
),
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
self.last_errors = errors
|
|
237
|
+
|
|
238
|
+
except Exception as e:
|
|
239
|
+
metrics.append(
|
|
240
|
+
HealthMetric(
|
|
241
|
+
name="error_rate",
|
|
242
|
+
value=-1,
|
|
243
|
+
status=HealthStatus.UNKNOWN,
|
|
244
|
+
message=f"Failed to calculate error rate: {e}",
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
return metrics
|
|
248
|
+
|
|
249
|
+
def _check_service_activity(self, current_time: float) -> List[HealthMetric]:
|
|
250
|
+
"""Check service activity freshness."""
|
|
251
|
+
metrics = []
|
|
252
|
+
try:
|
|
253
|
+
last_activity = self.service_stats.get("last_activity")
|
|
254
|
+
|
|
255
|
+
if last_activity:
|
|
256
|
+
# Parse timestamp if needed
|
|
257
|
+
if isinstance(last_activity, str):
|
|
258
|
+
last_activity_timestamp = self._parse_timestamp(last_activity)
|
|
259
|
+
else:
|
|
260
|
+
last_activity_timestamp = float(last_activity)
|
|
261
|
+
|
|
262
|
+
time_since_activity = current_time - last_activity_timestamp
|
|
263
|
+
|
|
264
|
+
# Determine status based on staleness
|
|
265
|
+
if time_since_activity > self.stale_activity_seconds * 2:
|
|
266
|
+
activity_status = HealthStatus.CRITICAL
|
|
267
|
+
elif time_since_activity > self.stale_activity_seconds:
|
|
268
|
+
activity_status = HealthStatus.WARNING
|
|
269
|
+
else:
|
|
270
|
+
activity_status = HealthStatus.HEALTHY
|
|
271
|
+
|
|
272
|
+
metrics.append(
|
|
273
|
+
HealthMetric(
|
|
274
|
+
name="time_since_last_activity",
|
|
275
|
+
value=round(time_since_activity, 2),
|
|
276
|
+
status=activity_status,
|
|
277
|
+
unit="seconds",
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
metrics.append(
|
|
282
|
+
HealthMetric(
|
|
283
|
+
name="time_since_last_activity",
|
|
284
|
+
value=-1,
|
|
285
|
+
status=HealthStatus.WARNING,
|
|
286
|
+
message="No last activity recorded",
|
|
287
|
+
)
|
|
288
|
+
)
|
|
289
|
+
except Exception as e:
|
|
290
|
+
metrics.append(
|
|
291
|
+
HealthMetric(
|
|
292
|
+
name="time_since_last_activity",
|
|
293
|
+
value=-1,
|
|
294
|
+
status=HealthStatus.UNKNOWN,
|
|
295
|
+
message=f"Failed to parse last activity: {e}",
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
return metrics
|
|
299
|
+
|
|
300
|
+
def _check_response_times(self) -> List[HealthMetric]:
|
|
301
|
+
"""Check response time metrics if available."""
|
|
302
|
+
metrics = []
|
|
303
|
+
|
|
304
|
+
# Average response time
|
|
305
|
+
if "avg_response_time_ms" in self.service_stats:
|
|
306
|
+
avg_time = self.service_stats["avg_response_time_ms"]
|
|
307
|
+
|
|
308
|
+
# Determine status based on response time
|
|
309
|
+
if avg_time > 1000: # > 1 second
|
|
310
|
+
time_status = HealthStatus.CRITICAL
|
|
311
|
+
elif avg_time > 500: # > 500ms
|
|
312
|
+
time_status = HealthStatus.WARNING
|
|
313
|
+
else:
|
|
314
|
+
time_status = HealthStatus.HEALTHY
|
|
315
|
+
|
|
316
|
+
metrics.append(
|
|
317
|
+
HealthMetric(
|
|
318
|
+
name="avg_response_time",
|
|
319
|
+
value=round(avg_time, 2),
|
|
320
|
+
status=time_status,
|
|
321
|
+
unit="ms",
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# P95 response time
|
|
326
|
+
if "p95_response_time_ms" in self.service_stats:
|
|
327
|
+
metrics.append(
|
|
328
|
+
HealthMetric(
|
|
329
|
+
name="p95_response_time",
|
|
330
|
+
value=round(self.service_stats["p95_response_time_ms"], 2),
|
|
331
|
+
status=HealthStatus.HEALTHY,
|
|
332
|
+
unit="ms",
|
|
333
|
+
)
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return metrics
|
|
337
|
+
|
|
338
|
+
def _parse_timestamp(self, timestamp_str: str) -> float:
|
|
339
|
+
"""Parse ISO timestamp string to float.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
timestamp_str: ISO format timestamp string
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
Timestamp as float (seconds since epoch)
|
|
346
|
+
"""
|
|
347
|
+
try:
|
|
348
|
+
from dateutil.parser import parse
|
|
349
|
+
|
|
350
|
+
dt = parse(timestamp_str)
|
|
351
|
+
return dt.timestamp()
|
|
352
|
+
except ImportError:
|
|
353
|
+
# Fallback: try manual parsing
|
|
354
|
+
from datetime import datetime
|
|
355
|
+
|
|
356
|
+
clean_timestamp = timestamp_str.rstrip("Z")
|
|
357
|
+
dt = datetime.fromisoformat(clean_timestamp.replace("T", " "))
|
|
358
|
+
return dt.timestamp()
|
|
359
|
+
|
|
360
|
+
def update_stats(self, **kwargs) -> None:
|
|
361
|
+
"""Update service statistics.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
**kwargs: Statistics to update
|
|
365
|
+
"""
|
|
366
|
+
self.service_stats.update(kwargs)
|
|
367
|
+
self.service_stats["last_activity"] = time.time()
|