claude-mpm 4.15.2__py3-none-any.whl → 4.15.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of claude-mpm might be problematic. Click here for more details.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
- claude_mpm/agents/templates/api_qa.json +7 -1
- claude_mpm/agents/templates/clerk-ops.json +8 -1
- claude_mpm/agents/templates/code_analyzer.json +4 -1
- claude_mpm/agents/templates/dart_engineer.json +11 -1
- claude_mpm/agents/templates/data_engineer.json +11 -1
- claude_mpm/agents/templates/documentation.json +6 -1
- claude_mpm/agents/templates/engineer.json +13 -0
- claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
- claude_mpm/agents/templates/golang_engineer.json +11 -1
- claude_mpm/agents/templates/java_engineer.json +12 -2
- claude_mpm/agents/templates/local_ops_agent.json +216 -37
- claude_mpm/agents/templates/nextjs_engineer.json +11 -1
- claude_mpm/agents/templates/ops.json +8 -1
- claude_mpm/agents/templates/php-engineer.json +11 -1
- claude_mpm/agents/templates/project_organizer.json +9 -2
- claude_mpm/agents/templates/prompt-engineer.json +5 -1
- claude_mpm/agents/templates/python_engineer.json +11 -1
- claude_mpm/agents/templates/qa.json +7 -1
- claude_mpm/agents/templates/react_engineer.json +11 -1
- claude_mpm/agents/templates/refactoring_engineer.json +8 -1
- claude_mpm/agents/templates/research.json +4 -1
- claude_mpm/agents/templates/ruby-engineer.json +11 -1
- claude_mpm/agents/templates/rust_engineer.json +11 -1
- claude_mpm/agents/templates/security.json +6 -1
- claude_mpm/agents/templates/ticketing.json +6 -1
- claude_mpm/agents/templates/typescript_engineer.json +11 -1
- claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
- claude_mpm/agents/templates/version_control.json +8 -1
- claude_mpm/agents/templates/web_qa.json +7 -1
- claude_mpm/agents/templates/web_ui.json +11 -1
- claude_mpm/cli/commands/configure.py +164 -16
- claude_mpm/cli/commands/configure_agent_display.py +6 -6
- claude_mpm/cli/commands/configure_behavior_manager.py +8 -8
- claude_mpm/cli/commands/configure_navigation.py +20 -18
- claude_mpm/cli/commands/configure_startup_manager.py +14 -14
- claude_mpm/cli/commands/configure_template_editor.py +8 -8
- claude_mpm/cli/interactive/__init__.py +3 -0
- claude_mpm/cli/interactive/skills_wizard.py +491 -0
- claude_mpm/cli/startup.py +26 -0
- claude_mpm/core/enums.py +18 -0
- claude_mpm/core/types.py +2 -9
- claude_mpm/dashboard/static/js/dashboard.js +0 -14
- claude_mpm/dashboard/templates/index.html +3 -41
- claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
- claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
- claude_mpm/services/core/models/health.py +1 -28
- claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
- claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
- claude_mpm/services/infrastructure/monitoring/base.py +5 -13
- claude_mpm/services/infrastructure/monitoring/network.py +7 -6
- claude_mpm/services/infrastructure/monitoring/process.py +13 -12
- claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
- claude_mpm/services/infrastructure/monitoring/service.py +16 -15
- claude_mpm/services/local_ops/__init__.py +1 -1
- claude_mpm/services/local_ops/crash_detector.py +1 -1
- claude_mpm/services/local_ops/health_checks/http_check.py +2 -1
- claude_mpm/services/local_ops/health_checks/process_check.py +2 -1
- claude_mpm/services/local_ops/health_checks/resource_check.py +2 -1
- claude_mpm/services/local_ops/health_manager.py +1 -1
- claude_mpm/services/local_ops/restart_manager.py +1 -1
- claude_mpm/services/shared/async_service_base.py +16 -27
- claude_mpm/services/shared/lifecycle_service_base.py +1 -14
- claude_mpm/services/socketio/handlers/__init__.py +5 -2
- claude_mpm/services/socketio/handlers/hook.py +10 -0
- claude_mpm/services/socketio/handlers/registry.py +4 -2
- claude_mpm/services/socketio/server/main.py +7 -7
- claude_mpm/skills/__init__.py +21 -0
- claude_mpm/skills/bundled/__init__.py +6 -0
- claude_mpm/skills/registry.py +198 -0
- claude_mpm/skills/skill_manager.py +310 -0
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/METADATA +1 -1
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/RECORD +78 -80
- claude_mpm/dashboard/static/css/code-tree.css +0 -1639
- claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
- claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
- claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
- claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
- claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
- claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/WHEEL +0 -0
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.15.2.dist-info → claude_mpm-4.15.6.dist-info}/top_level.txt +0 -0
|
@@ -33,7 +33,6 @@
|
|
|
33
33
|
<link rel="stylesheet" href="/static/css/dashboard.css">
|
|
34
34
|
<link rel="stylesheet" href="/static/css/connection-status.css">
|
|
35
35
|
<link rel="stylesheet" href="/static/css/activity.css">
|
|
36
|
-
<link rel="stylesheet" href="/static/css/code-tree.css">
|
|
37
36
|
|
|
38
37
|
<!-- Additional styles for file operations -->
|
|
39
38
|
<style>
|
|
@@ -258,7 +257,6 @@
|
|
|
258
257
|
<a href="#tools" class="tab-button" data-tab="tools">🔧 Tools</a>
|
|
259
258
|
<a href="#files" class="tab-button" data-tab="files">📁 Files</a>
|
|
260
259
|
<a href="#activity" class="tab-button" data-tab="activity">🌳 Activity</a>
|
|
261
|
-
<a href="#file_tree" class="tab-button" data-tab="claude-tree">📝 File Tree</a>
|
|
262
260
|
</div>
|
|
263
261
|
|
|
264
262
|
<!-- Events Tab -->
|
|
@@ -397,15 +395,6 @@
|
|
|
397
395
|
</div>
|
|
398
396
|
</div>
|
|
399
397
|
|
|
400
|
-
<!-- File Tree Tab -->
|
|
401
|
-
<div class="tab-content" id="claude-tree-tab">
|
|
402
|
-
<div id="claude-tree-container" style="width: 100%; height: 100%; position: relative;">
|
|
403
|
-
<!-- File activity tree will be rendered here by code-viewer.js -->
|
|
404
|
-
<!-- This container is ISOLATED from other tabs -->
|
|
405
|
-
</div>
|
|
406
|
-
</div>
|
|
407
|
-
|
|
408
|
-
|
|
409
398
|
</div>
|
|
410
399
|
</div>
|
|
411
400
|
</div>
|
|
@@ -489,16 +478,12 @@
|
|
|
489
478
|
});
|
|
490
479
|
};
|
|
491
480
|
|
|
492
|
-
// Load shared services first, then
|
|
481
|
+
// Load shared services first, then components
|
|
493
482
|
// Load services sequentially to ensure dependencies are available
|
|
494
483
|
loadModule('/static/js/shared/tooltip-service.js')
|
|
495
484
|
.then(() => loadModule('/static/js/shared/dom-helpers.js'))
|
|
496
485
|
.then(() => loadModule('/static/js/shared/event-bus.js'))
|
|
497
486
|
.then(() => loadModule('/static/js/shared/logger.js'))
|
|
498
|
-
.then(() => loadModule('/static/js/components/code-tree/tree-utils.js'))
|
|
499
|
-
.then(() => loadModule('/static/js/components/code-tree/tree-constants.js'))
|
|
500
|
-
.then(() => loadModule('/static/js/components/code-tree/tree-search.js'))
|
|
501
|
-
.then(() => loadModule('/static/js/components/code-tree/tree-breadcrumb.js'))
|
|
502
487
|
.then(() => {
|
|
503
488
|
// CRITICAL: Load socket-client.js FIRST (dependency of socket-manager)
|
|
504
489
|
return loadModule('/static/js/socket-client.js');
|
|
@@ -530,29 +515,15 @@
|
|
|
530
515
|
return Promise.all([
|
|
531
516
|
loadModule('/static/dist/dashboard.js'), // Use dist version that requires above components
|
|
532
517
|
loadModule('/static/dist/components/activity-tree.js'),
|
|
533
|
-
loadModule('/static/js/components/code-tree.js'), // TEMPORARY: Direct source for debugging
|
|
534
|
-
loadModule('/static/js/components/code-viewer.js').catch(err => {
|
|
535
|
-
console.error('[CRITICAL] Failed to load code-viewer.js:', err);
|
|
536
|
-
throw err;
|
|
537
|
-
}), // Code viewer now includes file change tracking
|
|
538
518
|
loadModule('/static/dist/components/file-viewer.js') // File viewer for viewing file contents
|
|
539
519
|
]);
|
|
540
520
|
})
|
|
541
521
|
.then(() => {
|
|
542
522
|
console.log('All dashboard modules loaded successfully');
|
|
543
|
-
|
|
544
|
-
// Debug: Check if CodeViewer loaded
|
|
545
|
-
if (window.CodeViewer) {
|
|
546
|
-
console.log('[DEBUG] CodeViewer is available on window object');
|
|
547
|
-
} else {
|
|
548
|
-
console.error('[ERROR] CodeViewer NOT FOUND on window object!');
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
// CodeViewer will auto-initialize and handle tab switching internally
|
|
552
|
-
|
|
523
|
+
|
|
553
524
|
// Browser Log Viewer initialization is now handled by UIStateManager
|
|
554
525
|
// This prevents duplicate event handlers and tab selection conflicts
|
|
555
|
-
|
|
526
|
+
|
|
556
527
|
// Load bulletproof tab isolation fix
|
|
557
528
|
loadModule('/static/js/tab-isolation-fix.js')
|
|
558
529
|
.then(() => {
|
|
@@ -564,15 +535,6 @@
|
|
|
564
535
|
|
|
565
536
|
// Hash navigation will handle default tab based on URL
|
|
566
537
|
// If no hash, default will be 'events' as per hashToTab mapping
|
|
567
|
-
// To start with File Tree, we can set hash if not present
|
|
568
|
-
setTimeout(() => {
|
|
569
|
-
if (!window.location.hash) {
|
|
570
|
-
console.log('No hash present, setting default to File Tree tab...');
|
|
571
|
-
window.location.hash = '#file_tree';
|
|
572
|
-
} else {
|
|
573
|
-
console.log('Hash present:', window.location.hash);
|
|
574
|
-
}
|
|
575
|
-
}, 500);
|
|
576
538
|
})
|
|
577
539
|
.catch(error => {
|
|
578
540
|
console.error('[CRITICAL] Error loading dashboard modules:', error);
|
|
@@ -5,10 +5,12 @@ including template validation, agent file validation, and
|
|
|
5
5
|
deployment environment validation.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from claude_mpm.core.enums import ValidationSeverity
|
|
9
|
+
|
|
8
10
|
from .agent_validator import AgentValidator
|
|
9
11
|
from .deployment_validator import DeploymentValidator
|
|
10
12
|
from .template_validator import TemplateValidator
|
|
11
|
-
from .validation_result import ValidationResult
|
|
13
|
+
from .validation_result import ValidationResult
|
|
12
14
|
|
|
13
15
|
__all__ = [
|
|
14
16
|
"AgentValidator",
|
|
@@ -1,17 +1,9 @@
|
|
|
1
1
|
"""Validation result classes for deployment validation."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from enum import Enum
|
|
5
4
|
from typing import Any, Dict, List, Optional
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
class ValidationSeverity(Enum):
|
|
9
|
-
"""Severity levels for validation issues."""
|
|
10
|
-
|
|
11
|
-
INFO = "info"
|
|
12
|
-
WARNING = "warning"
|
|
13
|
-
ERROR = "error"
|
|
14
|
-
CRITICAL = "critical"
|
|
6
|
+
from claude_mpm.core.enums import ValidationSeverity
|
|
15
7
|
|
|
16
8
|
|
|
17
9
|
@dataclass
|
|
@@ -16,36 +16,9 @@ ARCHITECTURE:
|
|
|
16
16
|
|
|
17
17
|
from dataclasses import asdict, dataclass, field
|
|
18
18
|
from datetime import datetime
|
|
19
|
-
from enum import Enum
|
|
20
19
|
from typing import Any, Dict, List
|
|
21
20
|
|
|
22
|
-
|
|
23
|
-
class HealthStatus(Enum):
|
|
24
|
-
"""
|
|
25
|
-
Health status levels.
|
|
26
|
-
|
|
27
|
-
WHY: Provides granular health states to distinguish between different
|
|
28
|
-
levels of service degradation.
|
|
29
|
-
|
|
30
|
-
States:
|
|
31
|
-
HEALTHY: All checks passing, process operating normally
|
|
32
|
-
DEGRADED: Process running but with issues (high resource usage, slow responses)
|
|
33
|
-
UNHEALTHY: Critical failure (process dead, crashed, or unresponsive)
|
|
34
|
-
UNKNOWN: Cannot determine health status
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
HEALTHY = "healthy"
|
|
38
|
-
DEGRADED = "degraded"
|
|
39
|
-
UNHEALTHY = "unhealthy"
|
|
40
|
-
UNKNOWN = "unknown"
|
|
41
|
-
|
|
42
|
-
def is_operational(self) -> bool:
|
|
43
|
-
"""Check if status indicates operational service."""
|
|
44
|
-
return self in (HealthStatus.HEALTHY, HealthStatus.DEGRADED)
|
|
45
|
-
|
|
46
|
-
def is_critical(self) -> bool:
|
|
47
|
-
"""Check if status indicates critical failure."""
|
|
48
|
-
return self == HealthStatus.UNHEALTHY
|
|
21
|
+
from ....core.enums import HealthStatus
|
|
49
22
|
|
|
50
23
|
|
|
51
24
|
@dataclass
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
Exports main monitoring components for backward compatibility.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from ....core.enums import HealthStatus
|
|
6
7
|
from .aggregator import MonitoringAggregatorService
|
|
7
8
|
from .base import (
|
|
8
9
|
HealthChecker,
|
|
9
10
|
HealthCheckResult,
|
|
10
11
|
HealthMetric,
|
|
11
|
-
HealthStatus,
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
# Legacy exports for backward compatibility
|
|
@@ -9,12 +9,12 @@ import time
|
|
|
9
9
|
from collections import deque
|
|
10
10
|
from typing import Any, Callable, Dict, List, Optional
|
|
11
11
|
|
|
12
|
+
from ....core.enums import HealthStatus
|
|
12
13
|
from .base import (
|
|
13
14
|
BaseMonitoringService,
|
|
14
15
|
HealthChecker,
|
|
15
16
|
HealthCheckResult,
|
|
16
17
|
HealthMetric,
|
|
17
|
-
HealthStatus,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
20
|
|
|
@@ -212,14 +212,14 @@ class MonitoringAggregatorService(BaseMonitoringService):
|
|
|
212
212
|
|
|
213
213
|
total_metrics = len(metrics)
|
|
214
214
|
|
|
215
|
-
#
|
|
216
|
-
if status_counts[HealthStatus.
|
|
217
|
-
return HealthStatus.
|
|
215
|
+
# Unhealthy if any unhealthy metrics
|
|
216
|
+
if status_counts[HealthStatus.UNHEALTHY] > 0:
|
|
217
|
+
return HealthStatus.UNHEALTHY
|
|
218
218
|
|
|
219
|
-
#
|
|
220
|
-
|
|
221
|
-
if
|
|
222
|
-
return HealthStatus.
|
|
219
|
+
# Degraded if >30% degraded metrics
|
|
220
|
+
degraded_ratio = status_counts[HealthStatus.DEGRADED] / total_metrics
|
|
221
|
+
if degraded_ratio > 0.3:
|
|
222
|
+
return HealthStatus.DEGRADED
|
|
223
223
|
|
|
224
224
|
# Unknown if >50% unknown metrics
|
|
225
225
|
unknown_ratio = status_counts[HealthStatus.UNKNOWN] / total_metrics
|
|
@@ -375,10 +375,10 @@ class MonitoringAggregatorService(BaseMonitoringService):
|
|
|
375
375
|
checks_count = len(recent_results)
|
|
376
376
|
|
|
377
377
|
# Determine aggregated status
|
|
378
|
-
if status_counts[HealthStatus.
|
|
379
|
-
aggregated_status = HealthStatus.
|
|
380
|
-
elif status_counts[HealthStatus.
|
|
381
|
-
aggregated_status = HealthStatus.
|
|
378
|
+
if status_counts[HealthStatus.UNHEALTHY] > 0:
|
|
379
|
+
aggregated_status = HealthStatus.UNHEALTHY
|
|
380
|
+
elif status_counts[HealthStatus.DEGRADED] > checks_count * 0.3:
|
|
381
|
+
aggregated_status = HealthStatus.DEGRADED
|
|
382
382
|
elif status_counts[HealthStatus.UNKNOWN] > checks_count * 0.5:
|
|
383
383
|
aggregated_status = HealthStatus.UNKNOWN
|
|
384
384
|
else:
|
|
@@ -7,17 +7,9 @@ import time
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
8
|
from dataclasses import asdict, dataclass
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
|
-
from enum import Enum
|
|
11
10
|
from typing import Any, Dict, List, Optional, Union
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
class HealthStatus(Enum):
|
|
15
|
-
"""Health status levels for monitoring."""
|
|
16
|
-
|
|
17
|
-
HEALTHY = "healthy"
|
|
18
|
-
WARNING = "warning"
|
|
19
|
-
CRITICAL = "critical"
|
|
20
|
-
UNKNOWN = "unknown"
|
|
12
|
+
from ....core.enums import HealthStatus
|
|
21
13
|
|
|
22
14
|
|
|
23
15
|
@dataclass
|
|
@@ -75,11 +67,11 @@ class HealthCheckResult:
|
|
|
75
67
|
"healthy_metrics": len(
|
|
76
68
|
[m for m in self.metrics if m.status == HealthStatus.HEALTHY]
|
|
77
69
|
),
|
|
78
|
-
"
|
|
79
|
-
[m for m in self.metrics if m.status == HealthStatus.
|
|
70
|
+
"degraded_metrics": len(
|
|
71
|
+
[m for m in self.metrics if m.status == HealthStatus.DEGRADED]
|
|
80
72
|
),
|
|
81
|
-
"
|
|
82
|
-
[m for m in self.metrics if m.status == HealthStatus.
|
|
73
|
+
"unhealthy_metrics": len(
|
|
74
|
+
[m for m in self.metrics if m.status == HealthStatus.UNHEALTHY]
|
|
83
75
|
),
|
|
84
76
|
}
|
|
85
77
|
|
|
@@ -6,7 +6,8 @@ Monitors network connectivity, port availability, and socket health.
|
|
|
6
6
|
import socket
|
|
7
7
|
from typing import Dict, List, Optional
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from ....core.enums import HealthStatus
|
|
10
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class NetworkHealthService(BaseMonitoringService):
|
|
@@ -98,7 +99,7 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
98
99
|
HealthMetric(
|
|
99
100
|
name="socket_creation",
|
|
100
101
|
value=False,
|
|
101
|
-
status=HealthStatus.
|
|
102
|
+
status=HealthStatus.UNHEALTHY,
|
|
102
103
|
message=f"Failed to create socket: {e}",
|
|
103
104
|
)
|
|
104
105
|
)
|
|
@@ -142,11 +143,11 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
142
143
|
)
|
|
143
144
|
)
|
|
144
145
|
else:
|
|
145
|
-
# Determine if this is
|
|
146
|
+
# Determine if this is unhealthy or degraded based on endpoint type
|
|
146
147
|
status = (
|
|
147
|
-
HealthStatus.
|
|
148
|
+
HealthStatus.DEGRADED
|
|
148
149
|
if "optional" in name.lower()
|
|
149
|
-
else HealthStatus.
|
|
150
|
+
else HealthStatus.UNHEALTHY
|
|
150
151
|
)
|
|
151
152
|
metrics.append(
|
|
152
153
|
HealthMetric(
|
|
@@ -161,7 +162,7 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
161
162
|
HealthMetric(
|
|
162
163
|
name=metric_name,
|
|
163
164
|
value=False,
|
|
164
|
-
status=HealthStatus.
|
|
165
|
+
status=HealthStatus.DEGRADED,
|
|
165
166
|
message=f"Connection to {host}:{port} timed out after {timeout}s",
|
|
166
167
|
)
|
|
167
168
|
)
|
|
@@ -6,8 +6,9 @@ Monitors individual process health including CPU, memory, file descriptors, and
|
|
|
6
6
|
from typing import List
|
|
7
7
|
|
|
8
8
|
from claude_mpm.core.constants import ResourceLimits, TimeoutConfig
|
|
9
|
+
from claude_mpm.core.enums import HealthStatus
|
|
9
10
|
|
|
10
|
-
from .base import BaseMonitoringService, HealthMetric
|
|
11
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
14
|
import psutil
|
|
@@ -66,7 +67,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
66
67
|
HealthMetric(
|
|
67
68
|
name="psutil_availability",
|
|
68
69
|
value=False,
|
|
69
|
-
status=HealthStatus.
|
|
70
|
+
status=HealthStatus.DEGRADED,
|
|
70
71
|
message="psutil not available for process monitoring",
|
|
71
72
|
)
|
|
72
73
|
)
|
|
@@ -77,7 +78,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
77
78
|
HealthMetric(
|
|
78
79
|
name="process_exists",
|
|
79
80
|
value=False,
|
|
80
|
-
status=HealthStatus.
|
|
81
|
+
status=HealthStatus.UNHEALTHY,
|
|
81
82
|
message=f"Process {self.pid} not found",
|
|
82
83
|
)
|
|
83
84
|
)
|
|
@@ -90,7 +91,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
90
91
|
HealthMetric(
|
|
91
92
|
name="process_exists",
|
|
92
93
|
value=False,
|
|
93
|
-
status=HealthStatus.
|
|
94
|
+
status=HealthStatus.UNHEALTHY,
|
|
94
95
|
message=f"Process {self.pid} is no longer running",
|
|
95
96
|
)
|
|
96
97
|
)
|
|
@@ -119,7 +120,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
119
120
|
HealthMetric(
|
|
120
121
|
name="process_exists",
|
|
121
122
|
value=False,
|
|
122
|
-
status=HealthStatus.
|
|
123
|
+
status=HealthStatus.UNHEALTHY,
|
|
123
124
|
message=f"Process {self.pid} no longer exists",
|
|
124
125
|
)
|
|
125
126
|
)
|
|
@@ -153,7 +154,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
153
154
|
status=(
|
|
154
155
|
HealthStatus.HEALTHY
|
|
155
156
|
if process_healthy
|
|
156
|
-
else HealthStatus.
|
|
157
|
+
else HealthStatus.UNHEALTHY
|
|
157
158
|
),
|
|
158
159
|
message=f"Process status: {status}",
|
|
159
160
|
)
|
|
@@ -179,9 +180,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
179
180
|
cpu_status = HealthStatus.HEALTHY
|
|
180
181
|
if cpu_percent > self.cpu_threshold:
|
|
181
182
|
cpu_status = (
|
|
182
|
-
HealthStatus.
|
|
183
|
+
HealthStatus.DEGRADED
|
|
183
184
|
if cpu_percent < self.cpu_threshold * 1.2
|
|
184
|
-
else HealthStatus.
|
|
185
|
+
else HealthStatus.UNHEALTHY
|
|
185
186
|
)
|
|
186
187
|
|
|
187
188
|
metrics.append(
|
|
@@ -213,9 +214,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
213
214
|
memory_status = HealthStatus.HEALTHY
|
|
214
215
|
if memory_mb > self.memory_threshold_mb:
|
|
215
216
|
memory_status = (
|
|
216
|
-
HealthStatus.
|
|
217
|
+
HealthStatus.DEGRADED
|
|
217
218
|
if memory_mb < self.memory_threshold_mb * 1.2
|
|
218
|
-
else HealthStatus.
|
|
219
|
+
else HealthStatus.UNHEALTHY
|
|
219
220
|
)
|
|
220
221
|
|
|
221
222
|
metrics.append(
|
|
@@ -256,9 +257,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
256
257
|
fd_status = HealthStatus.HEALTHY
|
|
257
258
|
if fd_count > self.fd_threshold:
|
|
258
259
|
fd_status = (
|
|
259
|
-
HealthStatus.
|
|
260
|
+
HealthStatus.DEGRADED
|
|
260
261
|
if fd_count < self.fd_threshold * 1.2
|
|
261
|
-
else HealthStatus.
|
|
262
|
+
else HealthStatus.UNHEALTHY
|
|
262
263
|
)
|
|
263
264
|
|
|
264
265
|
metrics.append(
|
|
@@ -5,7 +5,8 @@ Monitors system-wide resource usage including CPU, memory, and disk utilization.
|
|
|
5
5
|
|
|
6
6
|
from typing import Dict, List, Optional
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from ....core.enums import HealthStatus
|
|
9
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
import psutil
|
|
@@ -53,7 +54,7 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
53
54
|
HealthMetric(
|
|
54
55
|
name="psutil_availability",
|
|
55
56
|
value=False,
|
|
56
|
-
status=HealthStatus.
|
|
57
|
+
status=HealthStatus.DEGRADED,
|
|
57
58
|
message="psutil not available for resource monitoring",
|
|
58
59
|
)
|
|
59
60
|
)
|
|
@@ -182,9 +183,9 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
182
183
|
# Load is concerning if > cpu_count
|
|
183
184
|
load_status = HealthStatus.HEALTHY
|
|
184
185
|
if load1 > cpu_count:
|
|
185
|
-
load_status = HealthStatus.
|
|
186
|
+
load_status = HealthStatus.DEGRADED
|
|
186
187
|
if load1 > cpu_count * 1.5:
|
|
187
|
-
load_status = HealthStatus.
|
|
188
|
+
load_status = HealthStatus.UNHEALTHY
|
|
188
189
|
|
|
189
190
|
metrics.append(
|
|
190
191
|
HealthMetric(
|
|
@@ -220,8 +221,8 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
220
221
|
if value < threshold:
|
|
221
222
|
return HealthStatus.HEALTHY
|
|
222
223
|
if value < threshold * 1.1: # 10% above threshold
|
|
223
|
-
return HealthStatus.
|
|
224
|
-
return HealthStatus.
|
|
224
|
+
return HealthStatus.DEGRADED
|
|
225
|
+
return HealthStatus.UNHEALTHY
|
|
225
226
|
|
|
226
227
|
def get_resource_summary(self) -> Optional[Dict[str, float]]:
|
|
227
228
|
"""Get quick resource summary without full health check.
|
|
@@ -6,7 +6,8 @@ Monitors service-specific metrics like client connections, event processing, and
|
|
|
6
6
|
import time
|
|
7
7
|
from typing import Any, Dict, List
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from ....core.enums import HealthStatus
|
|
10
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class ServiceHealthService(BaseMonitoringService):
|
|
@@ -79,9 +80,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
79
80
|
|
|
80
81
|
# Determine status based on thresholds
|
|
81
82
|
if client_count > self.max_clients:
|
|
82
|
-
client_status = HealthStatus.
|
|
83
|
+
client_status = HealthStatus.UNHEALTHY
|
|
83
84
|
elif client_count > self.max_clients * 0.8:
|
|
84
|
-
client_status = HealthStatus.
|
|
85
|
+
client_status = HealthStatus.DEGRADED
|
|
85
86
|
else:
|
|
86
87
|
client_status = HealthStatus.HEALTHY
|
|
87
88
|
|
|
@@ -129,7 +130,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
129
130
|
# Determine status based on rate
|
|
130
131
|
rate_status = HealthStatus.HEALTHY
|
|
131
132
|
if event_rate == 0 and events_processed > 0:
|
|
132
|
-
rate_status = HealthStatus.
|
|
133
|
+
rate_status = HealthStatus.DEGRADED # Processing stopped
|
|
133
134
|
|
|
134
135
|
metrics.append(
|
|
135
136
|
HealthMetric(
|
|
@@ -157,9 +158,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
157
158
|
queue_size = self.service_stats["event_queue_size"]
|
|
158
159
|
queue_status = HealthStatus.HEALTHY
|
|
159
160
|
if queue_size > 1000:
|
|
160
|
-
queue_status = HealthStatus.
|
|
161
|
+
queue_status = HealthStatus.DEGRADED
|
|
161
162
|
if queue_size > 5000:
|
|
162
|
-
queue_status = HealthStatus.
|
|
163
|
+
queue_status = HealthStatus.UNHEALTHY
|
|
163
164
|
|
|
164
165
|
metrics.append(
|
|
165
166
|
HealthMetric(
|
|
@@ -191,9 +192,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
191
192
|
|
|
192
193
|
# Determine status based on rate
|
|
193
194
|
if error_rate > self.max_error_rate:
|
|
194
|
-
error_status = HealthStatus.
|
|
195
|
+
error_status = HealthStatus.UNHEALTHY
|
|
195
196
|
elif error_rate > self.max_error_rate * 0.5:
|
|
196
|
-
error_status = HealthStatus.
|
|
197
|
+
error_status = HealthStatus.DEGRADED
|
|
197
198
|
else:
|
|
198
199
|
error_status = HealthStatus.HEALTHY
|
|
199
200
|
|
|
@@ -213,7 +214,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
213
214
|
name="total_errors",
|
|
214
215
|
value=errors,
|
|
215
216
|
status=(
|
|
216
|
-
HealthStatus.HEALTHY if errors == 0 else HealthStatus.
|
|
217
|
+
HealthStatus.HEALTHY if errors == 0 else HealthStatus.DEGRADED
|
|
217
218
|
),
|
|
218
219
|
)
|
|
219
220
|
)
|
|
@@ -228,7 +229,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
228
229
|
status=(
|
|
229
230
|
HealthStatus.HEALTHY
|
|
230
231
|
if recent_errors == 0
|
|
231
|
-
else HealthStatus.
|
|
232
|
+
else HealthStatus.DEGRADED
|
|
232
233
|
),
|
|
233
234
|
)
|
|
234
235
|
)
|
|
@@ -263,9 +264,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
263
264
|
|
|
264
265
|
# Determine status based on staleness
|
|
265
266
|
if time_since_activity > self.stale_activity_seconds * 2:
|
|
266
|
-
activity_status = HealthStatus.
|
|
267
|
+
activity_status = HealthStatus.UNHEALTHY
|
|
267
268
|
elif time_since_activity > self.stale_activity_seconds:
|
|
268
|
-
activity_status = HealthStatus.
|
|
269
|
+
activity_status = HealthStatus.DEGRADED
|
|
269
270
|
else:
|
|
270
271
|
activity_status = HealthStatus.HEALTHY
|
|
271
272
|
|
|
@@ -282,7 +283,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
282
283
|
HealthMetric(
|
|
283
284
|
name="time_since_last_activity",
|
|
284
285
|
value=-1,
|
|
285
|
-
status=HealthStatus.
|
|
286
|
+
status=HealthStatus.DEGRADED,
|
|
286
287
|
message="No last activity recorded",
|
|
287
288
|
)
|
|
288
289
|
)
|
|
@@ -307,9 +308,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
307
308
|
|
|
308
309
|
# Determine status based on response time
|
|
309
310
|
if avg_time > 1000: # > 1 second
|
|
310
|
-
time_status = HealthStatus.
|
|
311
|
+
time_status = HealthStatus.UNHEALTHY
|
|
311
312
|
elif avg_time > 500: # > 500ms
|
|
312
|
-
time_status = HealthStatus.
|
|
313
|
+
time_status = HealthStatus.DEGRADED
|
|
313
314
|
else:
|
|
314
315
|
time_status = HealthStatus.HEALTHY
|
|
315
316
|
|
|
@@ -59,6 +59,7 @@ Note: ProcessStatus has been consolidated into ServiceState (core.enums) as of P
|
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
61
|
# Re-export data models and interfaces for convenience
|
|
62
|
+
from claude_mpm.core.enums import HealthStatus
|
|
62
63
|
from claude_mpm.services.core.interfaces.health import (
|
|
63
64
|
IHealthCheck,
|
|
64
65
|
IHealthCheckManager,
|
|
@@ -80,7 +81,6 @@ from claude_mpm.services.core.interfaces.stability import (
|
|
|
80
81
|
from claude_mpm.services.core.models.health import (
|
|
81
82
|
DeploymentHealth,
|
|
82
83
|
HealthCheckResult,
|
|
83
|
-
HealthStatus,
|
|
84
84
|
)
|
|
85
85
|
from claude_mpm.services.core.models.process import (
|
|
86
86
|
PROTECTED_PORT_RANGES,
|
|
@@ -26,10 +26,10 @@ import threading
|
|
|
26
26
|
from collections import defaultdict
|
|
27
27
|
from typing import Callable, Dict, List, Set
|
|
28
28
|
|
|
29
|
+
from claude_mpm.core.enums import HealthStatus
|
|
29
30
|
from claude_mpm.services.core.base import SyncBaseService
|
|
30
31
|
from claude_mpm.services.core.interfaces.health import IHealthCheckManager
|
|
31
32
|
from claude_mpm.services.core.interfaces.restart import ICrashDetector
|
|
32
|
-
from claude_mpm.services.core.models.health import HealthStatus
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class CrashDetector(SyncBaseService, ICrashDetector):
|
|
@@ -29,10 +29,11 @@ import time
|
|
|
29
29
|
import requests
|
|
30
30
|
from requests.exceptions import ConnectionError, RequestException, Timeout
|
|
31
31
|
|
|
32
|
+
from claude_mpm.core.enums import HealthStatus
|
|
32
33
|
from claude_mpm.services.core.base import SyncBaseService
|
|
33
34
|
from claude_mpm.services.core.interfaces.health import IHealthCheck
|
|
34
35
|
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
35
|
-
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
36
|
+
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class HttpHealthCheck(SyncBaseService, IHealthCheck):
|
|
@@ -22,10 +22,11 @@ USAGE:
|
|
|
22
22
|
|
|
23
23
|
import psutil
|
|
24
24
|
|
|
25
|
+
from claude_mpm.core.enums import HealthStatus
|
|
25
26
|
from claude_mpm.services.core.base import SyncBaseService
|
|
26
27
|
from claude_mpm.services.core.interfaces.health import IHealthCheck
|
|
27
28
|
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
28
|
-
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
29
|
+
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class ProcessHealthCheck(SyncBaseService, IHealthCheck):
|
|
@@ -28,10 +28,11 @@ import platform
|
|
|
28
28
|
|
|
29
29
|
import psutil
|
|
30
30
|
|
|
31
|
+
from claude_mpm.core.enums import HealthStatus
|
|
31
32
|
from claude_mpm.services.core.base import SyncBaseService
|
|
32
33
|
from claude_mpm.services.core.interfaces.health import IHealthCheck
|
|
33
34
|
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
34
|
-
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
35
|
+
from claude_mpm.services.core.models.health import HealthCheckResult
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class ResourceHealthCheck(SyncBaseService, IHealthCheck):
|
|
@@ -39,13 +39,13 @@ import threading
|
|
|
39
39
|
from collections import defaultdict
|
|
40
40
|
from typing import Callable, Dict, List, Optional
|
|
41
41
|
|
|
42
|
+
from claude_mpm.core.enums import HealthStatus
|
|
42
43
|
from claude_mpm.services.core.base import SyncBaseService
|
|
43
44
|
from claude_mpm.services.core.interfaces.health import IHealthCheckManager
|
|
44
45
|
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
45
46
|
from claude_mpm.services.core.models.health import (
|
|
46
47
|
DeploymentHealth,
|
|
47
48
|
HealthCheckResult,
|
|
48
|
-
HealthStatus,
|
|
49
49
|
)
|
|
50
50
|
from claude_mpm.services.local_ops.health_checks import (
|
|
51
51
|
HttpHealthCheck,
|
|
@@ -43,6 +43,7 @@ import time
|
|
|
43
43
|
from pathlib import Path
|
|
44
44
|
from typing import Optional, Set
|
|
45
45
|
|
|
46
|
+
from claude_mpm.core.enums import HealthStatus
|
|
46
47
|
from claude_mpm.services.core.base import SyncBaseService
|
|
47
48
|
from claude_mpm.services.core.interfaces.health import IHealthCheckManager
|
|
48
49
|
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
@@ -51,7 +52,6 @@ from claude_mpm.services.core.interfaces.restart import (
|
|
|
51
52
|
IRestartManager,
|
|
52
53
|
IRestartPolicy,
|
|
53
54
|
)
|
|
54
|
-
from claude_mpm.services.core.models.health import HealthStatus
|
|
55
55
|
from claude_mpm.services.core.models.restart import RestartHistory
|
|
56
56
|
|
|
57
57
|
|