claude-mpm 4.13.2__py3-none-any.whl → 4.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/BASE_ENGINEER.md +286 -0
- claude_mpm/agents/BASE_PM.md +48 -17
- claude_mpm/agents/OUTPUT_STYLE.md +329 -11
- claude_mpm/agents/PM_INSTRUCTIONS.md +227 -8
- claude_mpm/agents/agent_loader.py +17 -5
- claude_mpm/agents/frontmatter_validator.py +284 -253
- claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
- claude_mpm/agents/templates/api_qa.json +7 -1
- claude_mpm/agents/templates/clerk-ops.json +8 -1
- claude_mpm/agents/templates/code_analyzer.json +4 -1
- claude_mpm/agents/templates/dart_engineer.json +11 -1
- claude_mpm/agents/templates/data_engineer.json +11 -1
- claude_mpm/agents/templates/documentation.json +6 -1
- claude_mpm/agents/templates/engineer.json +18 -1
- claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
- claude_mpm/agents/templates/golang_engineer.json +11 -1
- claude_mpm/agents/templates/java_engineer.json +12 -2
- claude_mpm/agents/templates/local_ops_agent.json +1217 -6
- claude_mpm/agents/templates/nextjs_engineer.json +11 -1
- claude_mpm/agents/templates/ops.json +8 -1
- claude_mpm/agents/templates/php-engineer.json +11 -1
- claude_mpm/agents/templates/project_organizer.json +10 -3
- claude_mpm/agents/templates/prompt-engineer.json +5 -1
- claude_mpm/agents/templates/python_engineer.json +11 -1
- claude_mpm/agents/templates/qa.json +7 -1
- claude_mpm/agents/templates/react_engineer.json +11 -1
- claude_mpm/agents/templates/refactoring_engineer.json +8 -1
- claude_mpm/agents/templates/research.json +4 -1
- claude_mpm/agents/templates/ruby-engineer.json +11 -1
- claude_mpm/agents/templates/rust_engineer.json +11 -1
- claude_mpm/agents/templates/security.json +6 -1
- claude_mpm/agents/templates/svelte-engineer.json +225 -0
- claude_mpm/agents/templates/ticketing.json +6 -1
- claude_mpm/agents/templates/typescript_engineer.json +11 -1
- claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
- claude_mpm/agents/templates/version_control.json +8 -1
- claude_mpm/agents/templates/web_qa.json +7 -1
- claude_mpm/agents/templates/web_ui.json +11 -1
- claude_mpm/cli/__init__.py +34 -706
- claude_mpm/cli/commands/agent_manager.py +25 -12
- claude_mpm/cli/commands/agent_state_manager.py +186 -0
- claude_mpm/cli/commands/agents.py +204 -148
- claude_mpm/cli/commands/aggregate.py +7 -3
- claude_mpm/cli/commands/analyze.py +9 -4
- claude_mpm/cli/commands/analyze_code.py +7 -2
- claude_mpm/cli/commands/auto_configure.py +7 -9
- claude_mpm/cli/commands/config.py +47 -13
- claude_mpm/cli/commands/configure.py +294 -1788
- claude_mpm/cli/commands/configure_agent_display.py +261 -0
- claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
- claude_mpm/cli/commands/configure_hook_manager.py +225 -0
- claude_mpm/cli/commands/configure_models.py +18 -0
- claude_mpm/cli/commands/configure_navigation.py +167 -0
- claude_mpm/cli/commands/configure_paths.py +104 -0
- claude_mpm/cli/commands/configure_persistence.py +254 -0
- claude_mpm/cli/commands/configure_startup_manager.py +646 -0
- claude_mpm/cli/commands/configure_template_editor.py +497 -0
- claude_mpm/cli/commands/configure_validators.py +73 -0
- claude_mpm/cli/commands/local_deploy.py +537 -0
- claude_mpm/cli/commands/memory.py +54 -20
- claude_mpm/cli/commands/mpm_init.py +39 -25
- claude_mpm/cli/commands/mpm_init_handler.py +8 -3
- claude_mpm/cli/executor.py +202 -0
- claude_mpm/cli/helpers.py +105 -0
- claude_mpm/cli/interactive/__init__.py +3 -0
- claude_mpm/cli/interactive/skills_wizard.py +491 -0
- claude_mpm/cli/parsers/__init__.py +7 -1
- claude_mpm/cli/parsers/base_parser.py +98 -3
- claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
- claude_mpm/cli/shared/output_formatters.py +28 -19
- claude_mpm/cli/startup.py +481 -0
- claude_mpm/cli/utils.py +52 -1
- claude_mpm/commands/mpm-help.md +3 -0
- claude_mpm/commands/mpm-version.md +113 -0
- claude_mpm/commands/mpm.md +1 -0
- claude_mpm/config/agent_config.py +2 -2
- claude_mpm/config/model_config.py +428 -0
- claude_mpm/core/base_service.py +13 -12
- claude_mpm/core/enums.py +452 -0
- claude_mpm/core/factories.py +1 -1
- claude_mpm/core/instruction_reinforcement_hook.py +2 -1
- claude_mpm/core/interactive_session.py +9 -3
- claude_mpm/core/logging_config.py +6 -2
- claude_mpm/core/oneshot_session.py +8 -4
- claude_mpm/core/optimized_agent_loader.py +3 -3
- claude_mpm/core/output_style_manager.py +12 -192
- claude_mpm/core/service_registry.py +5 -1
- claude_mpm/core/types.py +2 -9
- claude_mpm/core/typing_utils.py +7 -6
- claude_mpm/dashboard/static/js/dashboard.js +0 -14
- claude_mpm/dashboard/templates/index.html +3 -41
- claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
- claude_mpm/hooks/instruction_reinforcement.py +7 -2
- claude_mpm/models/resume_log.py +340 -0
- claude_mpm/services/agents/auto_config_manager.py +10 -11
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
- claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
- claude_mpm/services/agents/deployment/agent_validator.py +17 -1
- claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
- claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
- claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
- claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
- claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
- claude_mpm/services/agents/local_template_manager.py +1 -1
- claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
- claude_mpm/services/agents/registry/modification_tracker.py +5 -2
- claude_mpm/services/command_handler_service.py +11 -5
- claude_mpm/services/core/interfaces/__init__.py +74 -2
- claude_mpm/services/core/interfaces/health.py +172 -0
- claude_mpm/services/core/interfaces/model.py +281 -0
- claude_mpm/services/core/interfaces/process.py +372 -0
- claude_mpm/services/core/interfaces/restart.py +307 -0
- claude_mpm/services/core/interfaces/stability.py +260 -0
- claude_mpm/services/core/models/__init__.py +33 -0
- claude_mpm/services/core/models/agent_config.py +12 -28
- claude_mpm/services/core/models/health.py +162 -0
- claude_mpm/services/core/models/process.py +235 -0
- claude_mpm/services/core/models/restart.py +302 -0
- claude_mpm/services/core/models/stability.py +264 -0
- claude_mpm/services/core/path_resolver.py +23 -7
- claude_mpm/services/diagnostics/__init__.py +2 -2
- claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
- claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
- claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
- claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
- claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
- claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
- claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
- claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
- claude_mpm/services/diagnostics/checks/mcp_services_check.py +36 -31
- claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
- claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
- claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
- claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
- claude_mpm/services/diagnostics/models.py +19 -24
- claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
- claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
- claude_mpm/services/infrastructure/monitoring/base.py +5 -13
- claude_mpm/services/infrastructure/monitoring/network.py +7 -6
- claude_mpm/services/infrastructure/monitoring/process.py +13 -12
- claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
- claude_mpm/services/infrastructure/monitoring/service.py +16 -15
- claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
- claude_mpm/services/local_ops/__init__.py +163 -0
- claude_mpm/services/local_ops/crash_detector.py +257 -0
- claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
- claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
- claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
- claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
- claude_mpm/services/local_ops/health_manager.py +430 -0
- claude_mpm/services/local_ops/log_monitor.py +396 -0
- claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
- claude_mpm/services/local_ops/process_manager.py +595 -0
- claude_mpm/services/local_ops/resource_monitor.py +331 -0
- claude_mpm/services/local_ops/restart_manager.py +401 -0
- claude_mpm/services/local_ops/restart_policy.py +387 -0
- claude_mpm/services/local_ops/state_manager.py +372 -0
- claude_mpm/services/local_ops/unified_manager.py +600 -0
- claude_mpm/services/mcp_config_manager.py +9 -4
- claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
- claude_mpm/services/mcp_gateway/core/base.py +18 -31
- claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +71 -24
- claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
- claude_mpm/services/memory_hook_service.py +4 -1
- claude_mpm/services/model/__init__.py +147 -0
- claude_mpm/services/model/base_provider.py +365 -0
- claude_mpm/services/model/claude_provider.py +412 -0
- claude_mpm/services/model/model_router.py +453 -0
- claude_mpm/services/model/ollama_provider.py +415 -0
- claude_mpm/services/monitor/daemon_manager.py +3 -2
- claude_mpm/services/monitor/handlers/dashboard.py +2 -1
- claude_mpm/services/monitor/handlers/hooks.py +2 -1
- claude_mpm/services/monitor/management/lifecycle.py +3 -2
- claude_mpm/services/monitor/server.py +2 -1
- claude_mpm/services/session_management_service.py +3 -2
- claude_mpm/services/session_manager.py +205 -1
- claude_mpm/services/shared/async_service_base.py +16 -27
- claude_mpm/services/shared/lifecycle_service_base.py +1 -14
- claude_mpm/services/socketio/handlers/__init__.py +5 -2
- claude_mpm/services/socketio/handlers/hook.py +13 -2
- claude_mpm/services/socketio/handlers/registry.py +4 -2
- claude_mpm/services/socketio/server/main.py +10 -8
- claude_mpm/services/subprocess_launcher_service.py +14 -5
- claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
- claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
- claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
- claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
- claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
- claude_mpm/services/unified/deployment_strategies/local.py +6 -5
- claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
- claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
- claude_mpm/services/unified/interfaces.py +3 -1
- claude_mpm/services/unified/unified_analyzer.py +14 -10
- claude_mpm/services/unified/unified_config.py +2 -1
- claude_mpm/services/unified/unified_deployment.py +9 -4
- claude_mpm/services/version_service.py +104 -1
- claude_mpm/skills/__init__.py +21 -0
- claude_mpm/skills/bundled/__init__.py +6 -0
- claude_mpm/skills/bundled/api-documentation.md +393 -0
- claude_mpm/skills/bundled/async-testing.md +571 -0
- claude_mpm/skills/bundled/code-review.md +143 -0
- claude_mpm/skills/bundled/database-migration.md +199 -0
- claude_mpm/skills/bundled/docker-containerization.md +194 -0
- claude_mpm/skills/bundled/express-local-dev.md +1429 -0
- claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
- claude_mpm/skills/bundled/git-workflow.md +414 -0
- claude_mpm/skills/bundled/imagemagick.md +204 -0
- claude_mpm/skills/bundled/json-data-handling.md +223 -0
- claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
- claude_mpm/skills/bundled/pdf.md +141 -0
- claude_mpm/skills/bundled/performance-profiling.md +567 -0
- claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
- claude_mpm/skills/bundled/security-scanning.md +327 -0
- claude_mpm/skills/bundled/systematic-debugging.md +473 -0
- claude_mpm/skills/bundled/test-driven-development.md +378 -0
- claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
- claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
- claude_mpm/skills/bundled/xlsx.md +157 -0
- claude_mpm/skills/registry.py +286 -0
- claude_mpm/skills/skill_manager.py +310 -0
- claude_mpm/tools/code_tree_analyzer.py +177 -141
- claude_mpm/tools/code_tree_events.py +4 -2
- claude_mpm/utils/agent_dependency_loader.py +2 -2
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +117 -8
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +238 -174
- claude_mpm/dashboard/static/css/code-tree.css +0 -1639
- claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
- claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
- claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
- claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
- claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
- claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
- claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
- claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
- claude_mpm/services/project/analyzer_refactored.py +0 -450
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Log Monitor for Claude MPM Framework
|
|
3
|
+
======================================
|
|
4
|
+
|
|
5
|
+
WHY: Provides real-time log file monitoring to detect error patterns that
|
|
6
|
+
indicate imminent crashes (OOM, exceptions, segfaults) BEFORE they occur.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Uses watchdog library for efficient OS-level file system
|
|
9
|
+
monitoring. Avoids polling by receiving file modification events from the OS.
|
|
10
|
+
|
|
11
|
+
ARCHITECTURE:
|
|
12
|
+
- Watchdog-based file system monitoring (OS-level events)
|
|
13
|
+
- Regex-based pattern matching for error detection
|
|
14
|
+
- Configurable error patterns with severity levels
|
|
15
|
+
- Rolling window of recent matches per deployment
|
|
16
|
+
- Callback system for pattern match alerts
|
|
17
|
+
- Thread-safe with proper locking
|
|
18
|
+
|
|
19
|
+
USAGE:
|
|
20
|
+
monitor = LogMonitor()
|
|
21
|
+
monitor.initialize()
|
|
22
|
+
|
|
23
|
+
# Add error patterns
|
|
24
|
+
monitor.add_pattern(r"OutOfMemoryError", severity="CRITICAL")
|
|
25
|
+
monitor.add_pattern(r"Exception:", severity="ERROR")
|
|
26
|
+
|
|
27
|
+
# Start monitoring a log file
|
|
28
|
+
monitor.start_monitoring(
|
|
29
|
+
log_file="/var/log/app.log",
|
|
30
|
+
deployment_id="my-app"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Get recent matches
|
|
34
|
+
matches = monitor.get_recent_matches(deployment_id, limit=10)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
import re
|
|
38
|
+
import threading
|
|
39
|
+
from collections import defaultdict
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Callable, Dict, List, Optional, Tuple
|
|
42
|
+
|
|
43
|
+
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
44
|
+
from watchdog.observers import Observer
|
|
45
|
+
|
|
46
|
+
from claude_mpm.core.logger import get_logger
|
|
47
|
+
from claude_mpm.services.core.base import SyncBaseService
|
|
48
|
+
from claude_mpm.services.core.interfaces.stability import ILogMonitor
|
|
49
|
+
from claude_mpm.services.core.models.stability import LogPatternMatch
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class LogFileHandler(FileSystemEventHandler):
|
|
53
|
+
"""
|
|
54
|
+
File system event handler for log file monitoring.
|
|
55
|
+
|
|
56
|
+
WHY: Receives OS-level file modification events and triggers
|
|
57
|
+
pattern matching on new log lines.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
log_file: str,
|
|
63
|
+
deployment_id: str,
|
|
64
|
+
on_new_lines: Callable[[str, List[str]], None],
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Initialize log file handler.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
log_file: Path to log file being monitored
|
|
71
|
+
deployment_id: Deployment identifier
|
|
72
|
+
on_new_lines: Callback for new lines: (deployment_id, lines)
|
|
73
|
+
"""
|
|
74
|
+
super().__init__()
|
|
75
|
+
self.log_file = Path(log_file).resolve()
|
|
76
|
+
self.deployment_id = deployment_id
|
|
77
|
+
self.on_new_lines = on_new_lines
|
|
78
|
+
self.last_position = 0
|
|
79
|
+
|
|
80
|
+
# Initialize to end of file
|
|
81
|
+
if self.log_file.exists():
|
|
82
|
+
self.last_position = self.log_file.stat().st_size
|
|
83
|
+
|
|
84
|
+
def on_modified(self, event: FileSystemEvent) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Handle file modification events.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
event: File system event
|
|
90
|
+
"""
|
|
91
|
+
if event.is_directory:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Check if this is our log file
|
|
95
|
+
event_path = Path(event.src_path).resolve()
|
|
96
|
+
if event_path != self.log_file:
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
# Read new lines
|
|
100
|
+
try:
|
|
101
|
+
new_lines = self._read_new_lines()
|
|
102
|
+
if new_lines:
|
|
103
|
+
self.on_new_lines(self.deployment_id, new_lines)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
# Log error but don't crash the monitoring thread
|
|
106
|
+
get_logger().error(f"Error reading new log lines: {e}")
|
|
107
|
+
|
|
108
|
+
def _read_new_lines(self) -> List[str]:
|
|
109
|
+
"""
|
|
110
|
+
Read new lines from log file since last read.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of new lines
|
|
114
|
+
"""
|
|
115
|
+
if not self.log_file.exists():
|
|
116
|
+
return []
|
|
117
|
+
|
|
118
|
+
new_lines = []
|
|
119
|
+
|
|
120
|
+
with Path(self.log_file).open(encoding="utf-8", errors="ignore") as f:
|
|
121
|
+
# Seek to last position
|
|
122
|
+
f.seek(self.last_position)
|
|
123
|
+
|
|
124
|
+
# Read new lines
|
|
125
|
+
for line in f:
|
|
126
|
+
new_lines.append(line.rstrip("\n\r"))
|
|
127
|
+
|
|
128
|
+
# Update position
|
|
129
|
+
self.last_position = f.tell()
|
|
130
|
+
|
|
131
|
+
return new_lines
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class LogMonitor(SyncBaseService, ILogMonitor):
|
|
135
|
+
"""
|
|
136
|
+
Real-time log file monitoring service.
|
|
137
|
+
|
|
138
|
+
WHY: Provides early warning of critical errors by monitoring log files
|
|
139
|
+
in real-time and detecting patterns that indicate imminent failures.
|
|
140
|
+
|
|
141
|
+
Thread Safety: All public methods are thread-safe with proper locking.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
# Default error patterns
|
|
145
|
+
DEFAULT_PATTERNS = [
|
|
146
|
+
(r"OutOfMemoryError", "CRITICAL"),
|
|
147
|
+
(r"Segmentation fault", "CRITICAL"),
|
|
148
|
+
(r"Exception:", "ERROR"),
|
|
149
|
+
(r"Traceback", "ERROR"),
|
|
150
|
+
(r"Error:", "ERROR"),
|
|
151
|
+
(r"FATAL", "CRITICAL"),
|
|
152
|
+
(r"Database connection failed", "ERROR"),
|
|
153
|
+
(r"Connection refused", "WARNING"),
|
|
154
|
+
(r"Connection timeout", "WARNING"),
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
def __init__(self, match_history_limit: int = 100):
|
|
158
|
+
"""
|
|
159
|
+
Initialize log monitor.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
match_history_limit: Number of matches to keep per deployment (default: 100)
|
|
163
|
+
"""
|
|
164
|
+
super().__init__("LogMonitor")
|
|
165
|
+
self.match_history_limit = match_history_limit
|
|
166
|
+
|
|
167
|
+
# Error patterns: List[(pattern, severity)]
|
|
168
|
+
self._patterns: List[Tuple[re.Pattern, str]] = []
|
|
169
|
+
|
|
170
|
+
# Add default patterns
|
|
171
|
+
for pattern, severity in self.DEFAULT_PATTERNS:
|
|
172
|
+
self._patterns.append((re.compile(pattern), severity))
|
|
173
|
+
|
|
174
|
+
# Watchdog observer and handlers
|
|
175
|
+
self._observer: Optional[Observer] = None
|
|
176
|
+
self._handlers: Dict[str, LogFileHandler] = {} # deployment_id -> handler
|
|
177
|
+
|
|
178
|
+
# Match history: deployment_id -> List[LogPatternMatch]
|
|
179
|
+
self._match_history: Dict[str, List[LogPatternMatch]] = defaultdict(list)
|
|
180
|
+
|
|
181
|
+
# Match callbacks
|
|
182
|
+
self._match_callbacks: List[Callable[[str, LogPatternMatch], None]] = []
|
|
183
|
+
|
|
184
|
+
# Thread safety
|
|
185
|
+
self._lock = threading.Lock()
|
|
186
|
+
|
|
187
|
+
def initialize(self) -> bool:
|
|
188
|
+
"""
|
|
189
|
+
Initialize the log monitor.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
True if initialization successful
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
# Create watchdog observer
|
|
196
|
+
self._observer = Observer()
|
|
197
|
+
self._observer.start()
|
|
198
|
+
|
|
199
|
+
self._initialized = True
|
|
200
|
+
self.log_info(
|
|
201
|
+
f"Log monitor initialized with {len(self._patterns)} patterns"
|
|
202
|
+
)
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
except Exception as e:
|
|
206
|
+
self.log_error(f"Failed to initialize: {e}")
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
def shutdown(self) -> None:
|
|
210
|
+
"""Shutdown log monitor and stop all monitoring."""
|
|
211
|
+
# Stop all monitoring
|
|
212
|
+
with self._lock:
|
|
213
|
+
deployment_ids = list(self._handlers.keys())
|
|
214
|
+
|
|
215
|
+
for deployment_id in deployment_ids:
|
|
216
|
+
self.stop_monitoring(deployment_id)
|
|
217
|
+
|
|
218
|
+
# Stop observer
|
|
219
|
+
if self._observer:
|
|
220
|
+
self._observer.stop()
|
|
221
|
+
self._observer.join(timeout=5.0)
|
|
222
|
+
|
|
223
|
+
self._shutdown = True
|
|
224
|
+
self.log_info("Log monitor shutdown complete")
|
|
225
|
+
|
|
226
|
+
def start_monitoring(self, log_file: str, deployment_id: str) -> None:
|
|
227
|
+
"""
|
|
228
|
+
Start monitoring a log file for error patterns.
|
|
229
|
+
|
|
230
|
+
WHY: Begins watching the log file for new entries. Uses OS-level
|
|
231
|
+
file system events for efficiency.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
log_file: Path to log file to monitor
|
|
235
|
+
deployment_id: Deployment identifier for callbacks
|
|
236
|
+
"""
|
|
237
|
+
log_path = Path(log_file).resolve()
|
|
238
|
+
|
|
239
|
+
if not log_path.exists():
|
|
240
|
+
self.log_warning(f"Log file does not exist: {log_file}")
|
|
241
|
+
# Still create handler - it will start monitoring when file is created
|
|
242
|
+
# return
|
|
243
|
+
|
|
244
|
+
with self._lock:
|
|
245
|
+
# Check if already monitoring
|
|
246
|
+
if deployment_id in self._handlers:
|
|
247
|
+
self.log_warning(f"Already monitoring logs for {deployment_id}")
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
# Create handler
|
|
251
|
+
handler = LogFileHandler(
|
|
252
|
+
log_file=str(log_path),
|
|
253
|
+
deployment_id=deployment_id,
|
|
254
|
+
on_new_lines=self._process_new_lines,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Schedule handler with observer
|
|
258
|
+
if self._observer:
|
|
259
|
+
# Watch the directory containing the log file
|
|
260
|
+
watch_dir = log_path.parent
|
|
261
|
+
self._observer.schedule(handler, str(watch_dir), recursive=False)
|
|
262
|
+
|
|
263
|
+
self._handlers[deployment_id] = handler
|
|
264
|
+
|
|
265
|
+
self.log_info(f"Started monitoring log file for {deployment_id}: {log_file}")
|
|
266
|
+
|
|
267
|
+
def stop_monitoring(self, deployment_id: str) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Stop monitoring a deployment's log file.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
deployment_id: Deployment identifier
|
|
273
|
+
"""
|
|
274
|
+
with self._lock:
|
|
275
|
+
handler = self._handlers.pop(deployment_id, None)
|
|
276
|
+
if handler and self._observer:
|
|
277
|
+
# Unschedule handler
|
|
278
|
+
self._observer.unschedule_all()
|
|
279
|
+
|
|
280
|
+
# Reschedule remaining handlers
|
|
281
|
+
for remaining_handler in self._handlers.values():
|
|
282
|
+
watch_dir = remaining_handler.log_file.parent
|
|
283
|
+
self._observer.schedule(
|
|
284
|
+
remaining_handler, str(watch_dir), recursive=False
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
if handler:
|
|
288
|
+
self.log_info(f"Stopped monitoring logs for {deployment_id}")
|
|
289
|
+
|
|
290
|
+
def add_pattern(self, pattern: str, severity: str = "ERROR") -> None:
|
|
291
|
+
"""
|
|
292
|
+
Add an error pattern to monitor.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
pattern: Regex pattern to match
|
|
296
|
+
severity: Error severity (ERROR, CRITICAL, WARNING)
|
|
297
|
+
"""
|
|
298
|
+
with self._lock:
|
|
299
|
+
compiled_pattern = re.compile(pattern)
|
|
300
|
+
self._patterns.append((compiled_pattern, severity))
|
|
301
|
+
|
|
302
|
+
self.log_debug(f"Added pattern: {pattern} (severity: {severity})")
|
|
303
|
+
|
|
304
|
+
def get_recent_matches(
|
|
305
|
+
self, deployment_id: str, limit: int = 10
|
|
306
|
+
) -> List[LogPatternMatch]:
|
|
307
|
+
"""
|
|
308
|
+
Get recent pattern matches for a deployment.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
deployment_id: Deployment identifier
|
|
312
|
+
limit: Maximum number of matches to return
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
List of LogPatternMatch objects, newest first
|
|
316
|
+
"""
|
|
317
|
+
with self._lock:
|
|
318
|
+
matches = self._match_history.get(deployment_id, [])
|
|
319
|
+
return list(reversed(matches[-limit:]))
|
|
320
|
+
|
|
321
|
+
def register_match_callback(
|
|
322
|
+
self, callback: Callable[[str, LogPatternMatch], None]
|
|
323
|
+
) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Register callback for pattern matches.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
callback: Function called with (deployment_id, match) when pattern detected
|
|
329
|
+
"""
|
|
330
|
+
with self._lock:
|
|
331
|
+
self._match_callbacks.append(callback)
|
|
332
|
+
self.log_debug(f"Registered match callback: {callback.__name__}")
|
|
333
|
+
|
|
334
|
+
def _process_new_lines(self, deployment_id: str, lines: List[str]) -> None:
|
|
335
|
+
"""
|
|
336
|
+
Process new log lines for pattern matching.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
deployment_id: Deployment identifier
|
|
340
|
+
lines: New log lines to process
|
|
341
|
+
"""
|
|
342
|
+
for line in lines:
|
|
343
|
+
# Check against all patterns
|
|
344
|
+
for pattern, severity in self._patterns:
|
|
345
|
+
if pattern.search(line):
|
|
346
|
+
# Create match
|
|
347
|
+
match = LogPatternMatch(
|
|
348
|
+
deployment_id=deployment_id,
|
|
349
|
+
pattern=pattern.pattern,
|
|
350
|
+
line=line,
|
|
351
|
+
severity=severity,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Add to history
|
|
355
|
+
with self._lock:
|
|
356
|
+
self._match_history[deployment_id].append(match)
|
|
357
|
+
|
|
358
|
+
# Trim history
|
|
359
|
+
if (
|
|
360
|
+
len(self._match_history[deployment_id])
|
|
361
|
+
> self.match_history_limit
|
|
362
|
+
):
|
|
363
|
+
self._match_history[deployment_id] = self._match_history[
|
|
364
|
+
deployment_id
|
|
365
|
+
][-self.match_history_limit :]
|
|
366
|
+
|
|
367
|
+
# Log match
|
|
368
|
+
self.log_warning(
|
|
369
|
+
f"Pattern matched in {deployment_id}: "
|
|
370
|
+
f"[{severity}] {pattern.pattern[:50]}"
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Trigger callbacks
|
|
374
|
+
self._trigger_match_callbacks(deployment_id, match)
|
|
375
|
+
|
|
376
|
+
# Only match first pattern per line
|
|
377
|
+
break
|
|
378
|
+
|
|
379
|
+
def _trigger_match_callbacks(
|
|
380
|
+
self, deployment_id: str, match: LogPatternMatch
|
|
381
|
+
) -> None:
|
|
382
|
+
"""
|
|
383
|
+
Trigger registered callbacks for pattern matches.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
deployment_id: Deployment that has a match
|
|
387
|
+
match: LogPatternMatch with pattern details
|
|
388
|
+
"""
|
|
389
|
+
for callback in self._match_callbacks:
|
|
390
|
+
try:
|
|
391
|
+
callback(deployment_id, match)
|
|
392
|
+
except Exception as e:
|
|
393
|
+
self.log_error(f"Error in match callback {callback.__name__}: {e}")
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
__all__ = ["LogMonitor"]
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory Leak Detector for Claude MPM Framework
|
|
3
|
+
==============================================
|
|
4
|
+
|
|
5
|
+
WHY: Detects memory leaks BEFORE they cause OOM crashes by analyzing memory
|
|
6
|
+
usage trends over time using linear regression slope analysis.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Uses rolling window of memory measurements with configurable
|
|
9
|
+
size and threshold. Calculates slope to detect sustained memory growth patterns.
|
|
10
|
+
|
|
11
|
+
ARCHITECTURE:
|
|
12
|
+
- Rolling window of (timestamp, memory_mb) measurements per deployment
|
|
13
|
+
- Slope-based leak detection: MB/minute growth rate
|
|
14
|
+
- Configurable thresholds and window sizes
|
|
15
|
+
- Callback system for leak detection alerts
|
|
16
|
+
- Thread-safe with proper locking
|
|
17
|
+
|
|
18
|
+
USAGE:
|
|
19
|
+
detector = MemoryLeakDetector(
|
|
20
|
+
leak_threshold_mb_per_minute=10.0,
|
|
21
|
+
window_size=100,
|
|
22
|
+
)
|
|
23
|
+
detector.initialize()
|
|
24
|
+
|
|
25
|
+
# Record memory usage periodically
|
|
26
|
+
detector.record_memory_usage(deployment_id, memory_mb)
|
|
27
|
+
|
|
28
|
+
# Check for leaks
|
|
29
|
+
trend = detector.analyze_trend(deployment_id)
|
|
30
|
+
if trend.is_leaking:
|
|
31
|
+
print(f"Leak detected! Slope: {trend.slope_mb_per_minute} MB/min")
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
import threading
|
|
35
|
+
from collections import defaultdict
|
|
36
|
+
from datetime import datetime, timezone
|
|
37
|
+
from typing import Callable, Dict, List, Tuple
|
|
38
|
+
|
|
39
|
+
from claude_mpm.services.core.base import SyncBaseService
|
|
40
|
+
from claude_mpm.services.core.interfaces.stability import IMemoryLeakDetector
|
|
41
|
+
from claude_mpm.services.core.models.stability import MemoryTrend
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class MemoryLeakDetector(SyncBaseService, IMemoryLeakDetector):
|
|
45
|
+
"""
|
|
46
|
+
Memory leak detection service using trend analysis.
|
|
47
|
+
|
|
48
|
+
WHY: Provides early warning of memory leaks by analyzing memory growth
|
|
49
|
+
patterns over time, enabling preemptive restarts before OOM crashes.
|
|
50
|
+
|
|
51
|
+
Algorithm:
|
|
52
|
+
1. Maintain rolling window of memory measurements
|
|
53
|
+
2. Calculate linear slope (MB per minute)
|
|
54
|
+
3. Detect leak if slope exceeds threshold (default: 10 MB/min)
|
|
55
|
+
|
|
56
|
+
Thread Safety: All public methods are thread-safe with proper locking.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
leak_threshold_mb_per_minute: float = 10.0,
|
|
62
|
+
window_size: int = 100,
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
Initialize memory leak detector.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
leak_threshold_mb_per_minute: Threshold for leak detection (default: 10.0)
|
|
69
|
+
window_size: Number of measurements to keep in rolling window (default: 100)
|
|
70
|
+
"""
|
|
71
|
+
super().__init__("MemoryLeakDetector")
|
|
72
|
+
self.leak_threshold = leak_threshold_mb_per_minute
|
|
73
|
+
self.window_size = window_size
|
|
74
|
+
|
|
75
|
+
# Memory measurements: deployment_id -> List[(timestamp, memory_mb)]
|
|
76
|
+
self._measurements: Dict[str, List[Tuple[datetime, float]]] = defaultdict(list)
|
|
77
|
+
|
|
78
|
+
# Thread safety
|
|
79
|
+
self._lock = threading.Lock()
|
|
80
|
+
|
|
81
|
+
# Leak detection callbacks
|
|
82
|
+
self._leak_callbacks: List[Callable[[str, MemoryTrend], None]] = []
|
|
83
|
+
|
|
84
|
+
def initialize(self) -> bool:
|
|
85
|
+
"""
|
|
86
|
+
Initialize the memory leak detector.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if initialization successful
|
|
90
|
+
"""
|
|
91
|
+
self._initialized = True
|
|
92
|
+
self.log_info(
|
|
93
|
+
f"Memory leak detector initialized "
|
|
94
|
+
f"(threshold={self.leak_threshold} MB/min, window={self.window_size})"
|
|
95
|
+
)
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
def shutdown(self) -> None:
|
|
99
|
+
"""Shutdown memory leak detector and clear data."""
|
|
100
|
+
with self._lock:
|
|
101
|
+
self._measurements.clear()
|
|
102
|
+
self._leak_callbacks.clear()
|
|
103
|
+
|
|
104
|
+
self._shutdown = True
|
|
105
|
+
self.log_info("Memory leak detector shutdown complete")
|
|
106
|
+
|
|
107
|
+
def record_memory_usage(self, deployment_id: str, memory_mb: float) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Record a memory usage measurement.
|
|
110
|
+
|
|
111
|
+
WHY: Builds historical data for trend analysis. Should be called
|
|
112
|
+
periodically (e.g., every 30s) to collect sufficient data points.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
deployment_id: Deployment identifier
|
|
116
|
+
memory_mb: Current memory usage in megabytes
|
|
117
|
+
"""
|
|
118
|
+
with self._lock:
|
|
119
|
+
# Add new measurement
|
|
120
|
+
timestamp = datetime.now(tz=timezone.utc)
|
|
121
|
+
self._measurements[deployment_id].append((timestamp, memory_mb))
|
|
122
|
+
|
|
123
|
+
# Trim to window size
|
|
124
|
+
if len(self._measurements[deployment_id]) > self.window_size:
|
|
125
|
+
self._measurements[deployment_id] = self._measurements[deployment_id][
|
|
126
|
+
-self.window_size :
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
self.log_debug(
|
|
130
|
+
f"Recorded memory usage for {deployment_id}: {memory_mb:.2f}MB "
|
|
131
|
+
f"({len(self._measurements[deployment_id])} measurements)"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def analyze_trend(self, deployment_id: str) -> MemoryTrend:
|
|
135
|
+
"""
|
|
136
|
+
Analyze memory usage trend for leak detection.
|
|
137
|
+
|
|
138
|
+
WHY: Computes slope of memory usage over time to detect sustained
|
|
139
|
+
growth patterns characteristic of memory leaks.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
deployment_id: Deployment identifier
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
MemoryTrend with slope analysis and leak detection result
|
|
146
|
+
|
|
147
|
+
Algorithm:
|
|
148
|
+
slope_mb_per_minute = (recent_memory - old_memory) / time_delta_minutes
|
|
149
|
+
is_leaking = slope_mb_per_minute > threshold
|
|
150
|
+
"""
|
|
151
|
+
with self._lock:
|
|
152
|
+
measurements = self._measurements.get(deployment_id, [])
|
|
153
|
+
|
|
154
|
+
# Need at least 2 measurements for trend analysis
|
|
155
|
+
if len(measurements) < 2:
|
|
156
|
+
return MemoryTrend(
|
|
157
|
+
deployment_id=deployment_id,
|
|
158
|
+
timestamps=[],
|
|
159
|
+
memory_mb=[],
|
|
160
|
+
slope_mb_per_minute=0.0,
|
|
161
|
+
is_leaking=False,
|
|
162
|
+
window_size=0,
|
|
163
|
+
threshold_mb_per_minute=self.leak_threshold,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Extract timestamps and memory values
|
|
167
|
+
timestamps = [ts for ts, _ in measurements]
|
|
168
|
+
memory_mb = [mem for _, mem in measurements]
|
|
169
|
+
|
|
170
|
+
# Calculate slope using simple linear trend
|
|
171
|
+
slope = self._calculate_slope(measurements)
|
|
172
|
+
|
|
173
|
+
# Detect leak if slope exceeds threshold
|
|
174
|
+
is_leaking = slope > self.leak_threshold
|
|
175
|
+
|
|
176
|
+
trend = MemoryTrend(
|
|
177
|
+
deployment_id=deployment_id,
|
|
178
|
+
timestamps=timestamps,
|
|
179
|
+
memory_mb=memory_mb,
|
|
180
|
+
slope_mb_per_minute=slope,
|
|
181
|
+
is_leaking=is_leaking,
|
|
182
|
+
window_size=len(measurements),
|
|
183
|
+
threshold_mb_per_minute=self.leak_threshold,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Trigger callbacks if leak detected
|
|
187
|
+
if is_leaking:
|
|
188
|
+
self.log_warning(
|
|
189
|
+
f"Memory leak detected for {deployment_id}: "
|
|
190
|
+
f"{slope:.2f} MB/min (threshold: {self.leak_threshold} MB/min)"
|
|
191
|
+
)
|
|
192
|
+
self._trigger_leak_callbacks(deployment_id, trend)
|
|
193
|
+
|
|
194
|
+
return trend
|
|
195
|
+
|
|
196
|
+
def is_leaking(self, deployment_id: str) -> bool:
|
|
197
|
+
"""
|
|
198
|
+
Check if deployment has a detected memory leak.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
True if leak detected (sustained memory growth)
|
|
202
|
+
"""
|
|
203
|
+
trend = self.analyze_trend(deployment_id)
|
|
204
|
+
return trend.is_leaking
|
|
205
|
+
|
|
206
|
+
def register_leak_callback(
|
|
207
|
+
self, callback: Callable[[str, MemoryTrend], None]
|
|
208
|
+
) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Register callback for leak detection events.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
callback: Function called with (deployment_id, trend) when leak detected
|
|
214
|
+
"""
|
|
215
|
+
with self._lock:
|
|
216
|
+
self._leak_callbacks.append(callback)
|
|
217
|
+
self.log_debug(f"Registered leak callback: {callback.__name__}")
|
|
218
|
+
|
|
219
|
+
def _calculate_slope(self, measurements: List[Tuple[datetime, float]]) -> float:
|
|
220
|
+
"""
|
|
221
|
+
Calculate memory growth slope using simple linear regression.
|
|
222
|
+
|
|
223
|
+
WHY: Linear slope provides a robust measure of sustained memory growth,
|
|
224
|
+
filtering out normal variations and temporary spikes.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
measurements: List of (timestamp, memory_mb) tuples
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Slope in MB per minute
|
|
231
|
+
|
|
232
|
+
Algorithm:
|
|
233
|
+
Simple two-point slope: (y2 - y1) / (x2 - x1)
|
|
234
|
+
Where x is time in minutes, y is memory in MB
|
|
235
|
+
"""
|
|
236
|
+
if len(measurements) < 2:
|
|
237
|
+
return 0.0
|
|
238
|
+
|
|
239
|
+
# Get first and last measurements
|
|
240
|
+
first_timestamp, first_memory = measurements[0]
|
|
241
|
+
last_timestamp, last_memory = measurements[-1]
|
|
242
|
+
|
|
243
|
+
# Calculate time delta in minutes
|
|
244
|
+
time_delta_seconds = (last_timestamp - first_timestamp).total_seconds()
|
|
245
|
+
time_delta_minutes = time_delta_seconds / 60.0
|
|
246
|
+
|
|
247
|
+
if time_delta_minutes == 0:
|
|
248
|
+
return 0.0
|
|
249
|
+
|
|
250
|
+
# Calculate slope (MB per minute)
|
|
251
|
+
memory_delta = last_memory - first_memory
|
|
252
|
+
return memory_delta / time_delta_minutes
|
|
253
|
+
|
|
254
|
+
def _trigger_leak_callbacks(self, deployment_id: str, trend: MemoryTrend) -> None:
|
|
255
|
+
"""
|
|
256
|
+
Trigger registered callbacks for leak detection.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
deployment_id: Deployment that has a leak
|
|
260
|
+
trend: MemoryTrend with leak analysis
|
|
261
|
+
"""
|
|
262
|
+
for callback in self._leak_callbacks:
|
|
263
|
+
try:
|
|
264
|
+
callback(deployment_id, trend)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
self.log_error(f"Error in leak callback {callback.__name__}: {e}")
|
|
267
|
+
|
|
268
|
+
def get_measurements(self, deployment_id: str) -> List[Tuple[datetime, float]]:
|
|
269
|
+
"""
|
|
270
|
+
Get all measurements for a deployment (for testing/debugging).
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
deployment_id: Deployment identifier
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
List of (timestamp, memory_mb) tuples
|
|
277
|
+
"""
|
|
278
|
+
with self._lock:
|
|
279
|
+
return list(self._measurements.get(deployment_id, []))
|
|
280
|
+
|
|
281
|
+
def clear_measurements(self, deployment_id: str) -> None:
|
|
282
|
+
"""
|
|
283
|
+
Clear measurements for a deployment (e.g., after restart).
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
deployment_id: Deployment identifier
|
|
287
|
+
"""
|
|
288
|
+
with self._lock:
|
|
289
|
+
if deployment_id in self._measurements:
|
|
290
|
+
del self._measurements[deployment_id]
|
|
291
|
+
self.log_debug(f"Cleared measurements for {deployment_id}")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
__all__ = ["MemoryLeakDetector"]
|