claude-mpm 4.7.4__py3-none-any.whl → 4.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/BASE_AGENT_TEMPLATE.md +118 -0
- claude_mpm/agents/BASE_ENGINEER.md +286 -0
- claude_mpm/agents/BASE_PM.md +106 -1
- claude_mpm/agents/OUTPUT_STYLE.md +329 -11
- claude_mpm/agents/PM_INSTRUCTIONS.md +397 -459
- claude_mpm/agents/agent_loader.py +17 -5
- claude_mpm/agents/frontmatter_validator.py +284 -253
- claude_mpm/agents/templates/README.md +465 -0
- claude_mpm/agents/templates/agent-manager.json +4 -1
- claude_mpm/agents/templates/agentic-coder-optimizer.json +13 -3
- claude_mpm/agents/templates/api_qa.json +11 -2
- claude_mpm/agents/templates/circuit_breakers.md +638 -0
- claude_mpm/agents/templates/clerk-ops.json +12 -2
- claude_mpm/agents/templates/code_analyzer.json +8 -2
- claude_mpm/agents/templates/content-agent.json +358 -0
- claude_mpm/agents/templates/dart_engineer.json +15 -2
- claude_mpm/agents/templates/data_engineer.json +15 -2
- claude_mpm/agents/templates/documentation.json +10 -2
- claude_mpm/agents/templates/engineer.json +21 -1
- claude_mpm/agents/templates/gcp_ops_agent.json +12 -2
- claude_mpm/agents/templates/git_file_tracking.md +584 -0
- claude_mpm/agents/templates/golang_engineer.json +270 -0
- claude_mpm/agents/templates/imagemagick.json +4 -1
- claude_mpm/agents/templates/java_engineer.json +346 -0
- claude_mpm/agents/templates/local_ops_agent.json +1227 -6
- claude_mpm/agents/templates/memory_manager.json +4 -1
- claude_mpm/agents/templates/nextjs_engineer.json +141 -133
- claude_mpm/agents/templates/ops.json +12 -2
- claude_mpm/agents/templates/php-engineer.json +270 -174
- claude_mpm/agents/templates/pm_examples.md +474 -0
- claude_mpm/agents/templates/pm_red_flags.md +240 -0
- claude_mpm/agents/templates/product_owner.json +338 -0
- claude_mpm/agents/templates/project_organizer.json +14 -4
- claude_mpm/agents/templates/prompt-engineer.json +13 -2
- claude_mpm/agents/templates/python_engineer.json +174 -81
- claude_mpm/agents/templates/qa.json +11 -2
- claude_mpm/agents/templates/react_engineer.json +16 -3
- claude_mpm/agents/templates/refactoring_engineer.json +12 -2
- claude_mpm/agents/templates/research.json +34 -21
- claude_mpm/agents/templates/response_format.md +583 -0
- claude_mpm/agents/templates/ruby-engineer.json +129 -192
- claude_mpm/agents/templates/rust_engineer.json +270 -0
- claude_mpm/agents/templates/security.json +10 -2
- claude_mpm/agents/templates/svelte-engineer.json +225 -0
- claude_mpm/agents/templates/ticketing.json +10 -2
- claude_mpm/agents/templates/typescript_engineer.json +116 -125
- claude_mpm/agents/templates/validation_templates.md +312 -0
- claude_mpm/agents/templates/vercel_ops_agent.json +12 -2
- claude_mpm/agents/templates/version_control.json +12 -2
- claude_mpm/agents/templates/web_qa.json +11 -2
- claude_mpm/agents/templates/web_ui.json +15 -2
- claude_mpm/cli/__init__.py +34 -614
- claude_mpm/cli/commands/agent_manager.py +25 -12
- claude_mpm/cli/commands/agent_state_manager.py +186 -0
- claude_mpm/cli/commands/agents.py +235 -148
- claude_mpm/cli/commands/agents_detect.py +380 -0
- claude_mpm/cli/commands/agents_recommend.py +309 -0
- claude_mpm/cli/commands/aggregate.py +7 -3
- claude_mpm/cli/commands/analyze.py +9 -4
- claude_mpm/cli/commands/analyze_code.py +7 -2
- claude_mpm/cli/commands/auto_configure.py +570 -0
- claude_mpm/cli/commands/config.py +47 -13
- claude_mpm/cli/commands/configure.py +419 -1571
- claude_mpm/cli/commands/configure_agent_display.py +261 -0
- claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
- claude_mpm/cli/commands/configure_hook_manager.py +225 -0
- claude_mpm/cli/commands/configure_models.py +18 -0
- claude_mpm/cli/commands/configure_navigation.py +167 -0
- claude_mpm/cli/commands/configure_paths.py +104 -0
- claude_mpm/cli/commands/configure_persistence.py +254 -0
- claude_mpm/cli/commands/configure_startup_manager.py +646 -0
- claude_mpm/cli/commands/configure_template_editor.py +497 -0
- claude_mpm/cli/commands/configure_validators.py +73 -0
- claude_mpm/cli/commands/local_deploy.py +537 -0
- claude_mpm/cli/commands/memory.py +54 -20
- claude_mpm/cli/commands/mpm_init.py +585 -196
- claude_mpm/cli/commands/mpm_init_handler.py +37 -3
- claude_mpm/cli/commands/search.py +170 -4
- claude_mpm/cli/commands/upgrade.py +152 -0
- claude_mpm/cli/executor.py +202 -0
- claude_mpm/cli/helpers.py +105 -0
- claude_mpm/cli/interactive/__init__.py +3 -0
- claude_mpm/cli/interactive/skills_wizard.py +491 -0
- claude_mpm/cli/parsers/__init__.py +7 -1
- claude_mpm/cli/parsers/agents_parser.py +9 -0
- claude_mpm/cli/parsers/auto_configure_parser.py +245 -0
- claude_mpm/cli/parsers/base_parser.py +110 -3
- claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
- claude_mpm/cli/parsers/mpm_init_parser.py +65 -5
- claude_mpm/cli/shared/output_formatters.py +28 -19
- claude_mpm/cli/startup.py +481 -0
- claude_mpm/cli/utils.py +52 -1
- claude_mpm/commands/mpm-agents-detect.md +168 -0
- claude_mpm/commands/mpm-agents-recommend.md +214 -0
- claude_mpm/commands/mpm-agents.md +75 -1
- claude_mpm/commands/mpm-auto-configure.md +217 -0
- claude_mpm/commands/mpm-help.md +163 -0
- claude_mpm/commands/mpm-init.md +148 -3
- claude_mpm/commands/mpm-version.md +113 -0
- claude_mpm/commands/mpm.md +1 -0
- claude_mpm/config/agent_config.py +2 -2
- claude_mpm/config/model_config.py +428 -0
- claude_mpm/constants.py +1 -0
- claude_mpm/core/base_service.py +13 -12
- claude_mpm/core/enums.py +452 -0
- claude_mpm/core/factories.py +1 -1
- claude_mpm/core/instruction_reinforcement_hook.py +2 -1
- claude_mpm/core/interactive_session.py +9 -3
- claude_mpm/core/log_manager.py +2 -0
- claude_mpm/core/logging_config.py +6 -2
- claude_mpm/core/oneshot_session.py +8 -4
- claude_mpm/core/optimized_agent_loader.py +3 -3
- claude_mpm/core/output_style_manager.py +12 -192
- claude_mpm/core/service_registry.py +5 -1
- claude_mpm/core/types.py +2 -9
- claude_mpm/core/typing_utils.py +7 -6
- claude_mpm/dashboard/static/js/dashboard.js +0 -14
- claude_mpm/dashboard/templates/index.html +3 -41
- claude_mpm/hooks/__init__.py +20 -0
- claude_mpm/hooks/claude_hooks/event_handlers.py +4 -2
- claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
- claude_mpm/hooks/claude_hooks/services/connection_manager_http.py +23 -2
- claude_mpm/hooks/failure_learning/__init__.py +60 -0
- claude_mpm/hooks/failure_learning/failure_detection_hook.py +235 -0
- claude_mpm/hooks/failure_learning/fix_detection_hook.py +217 -0
- claude_mpm/hooks/failure_learning/learning_extraction_hook.py +286 -0
- claude_mpm/hooks/instruction_reinforcement.py +7 -2
- claude_mpm/hooks/kuzu_enrichment_hook.py +263 -0
- claude_mpm/hooks/kuzu_memory_hook.py +37 -12
- claude_mpm/hooks/kuzu_response_hook.py +183 -0
- claude_mpm/models/resume_log.py +340 -0
- claude_mpm/services/agents/__init__.py +18 -5
- claude_mpm/services/agents/auto_config_manager.py +796 -0
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
- claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
- claude_mpm/services/agents/deployment/agent_validator.py +17 -1
- claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
- claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
- claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
- claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
- claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
- claude_mpm/services/agents/local_template_manager.py +1 -1
- claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
- claude_mpm/services/agents/observers.py +547 -0
- claude_mpm/services/agents/recommender.py +568 -0
- claude_mpm/services/agents/registry/modification_tracker.py +5 -2
- claude_mpm/services/command_handler_service.py +11 -5
- claude_mpm/services/core/__init__.py +33 -1
- claude_mpm/services/core/interfaces/__init__.py +90 -3
- claude_mpm/services/core/interfaces/agent.py +184 -0
- claude_mpm/services/core/interfaces/health.py +172 -0
- claude_mpm/services/core/interfaces/model.py +281 -0
- claude_mpm/services/core/interfaces/process.py +372 -0
- claude_mpm/services/core/interfaces/project.py +121 -0
- claude_mpm/services/core/interfaces/restart.py +307 -0
- claude_mpm/services/core/interfaces/stability.py +260 -0
- claude_mpm/services/core/memory_manager.py +11 -24
- claude_mpm/services/core/models/__init__.py +79 -0
- claude_mpm/services/core/models/agent_config.py +381 -0
- claude_mpm/services/core/models/health.py +162 -0
- claude_mpm/services/core/models/process.py +235 -0
- claude_mpm/services/core/models/restart.py +302 -0
- claude_mpm/services/core/models/stability.py +264 -0
- claude_mpm/services/core/models/toolchain.py +306 -0
- claude_mpm/services/core/path_resolver.py +23 -7
- claude_mpm/services/diagnostics/__init__.py +2 -2
- claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
- claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
- claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
- claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
- claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
- claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
- claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
- claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
- claude_mpm/services/diagnostics/checks/mcp_services_check.py +38 -33
- claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
- claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
- claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
- claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
- claude_mpm/services/diagnostics/models.py +19 -24
- claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
- claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
- claude_mpm/services/infrastructure/monitoring/base.py +5 -13
- claude_mpm/services/infrastructure/monitoring/network.py +7 -6
- claude_mpm/services/infrastructure/monitoring/process.py +13 -12
- claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
- claude_mpm/services/infrastructure/monitoring/service.py +16 -15
- claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
- claude_mpm/services/local_ops/__init__.py +163 -0
- claude_mpm/services/local_ops/crash_detector.py +257 -0
- claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
- claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
- claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
- claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
- claude_mpm/services/local_ops/health_manager.py +430 -0
- claude_mpm/services/local_ops/log_monitor.py +396 -0
- claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
- claude_mpm/services/local_ops/process_manager.py +595 -0
- claude_mpm/services/local_ops/resource_monitor.py +331 -0
- claude_mpm/services/local_ops/restart_manager.py +401 -0
- claude_mpm/services/local_ops/restart_policy.py +387 -0
- claude_mpm/services/local_ops/state_manager.py +372 -0
- claude_mpm/services/local_ops/unified_manager.py +600 -0
- claude_mpm/services/mcp_config_manager.py +9 -4
- claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
- claude_mpm/services/mcp_gateway/core/base.py +18 -31
- claude_mpm/services/mcp_gateway/main.py +30 -0
- claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +206 -32
- claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
- claude_mpm/services/mcp_gateway/tools/kuzu_memory_service.py +25 -5
- claude_mpm/services/mcp_service_verifier.py +1 -1
- claude_mpm/services/memory/failure_tracker.py +563 -0
- claude_mpm/services/memory_hook_service.py +165 -4
- claude_mpm/services/model/__init__.py +147 -0
- claude_mpm/services/model/base_provider.py +365 -0
- claude_mpm/services/model/claude_provider.py +412 -0
- claude_mpm/services/model/model_router.py +453 -0
- claude_mpm/services/model/ollama_provider.py +415 -0
- claude_mpm/services/monitor/daemon_manager.py +3 -2
- claude_mpm/services/monitor/handlers/dashboard.py +2 -1
- claude_mpm/services/monitor/handlers/hooks.py +2 -1
- claude_mpm/services/monitor/management/lifecycle.py +3 -2
- claude_mpm/services/monitor/server.py +2 -1
- claude_mpm/services/project/__init__.py +23 -0
- claude_mpm/services/project/detection_strategies.py +719 -0
- claude_mpm/services/project/toolchain_analyzer.py +581 -0
- claude_mpm/services/self_upgrade_service.py +342 -0
- claude_mpm/services/session_management_service.py +3 -2
- claude_mpm/services/session_manager.py +205 -1
- claude_mpm/services/shared/async_service_base.py +16 -27
- claude_mpm/services/shared/lifecycle_service_base.py +1 -14
- claude_mpm/services/socketio/handlers/__init__.py +5 -2
- claude_mpm/services/socketio/handlers/hook.py +13 -2
- claude_mpm/services/socketio/handlers/registry.py +4 -2
- claude_mpm/services/socketio/server/main.py +10 -8
- claude_mpm/services/subprocess_launcher_service.py +14 -5
- claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
- claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
- claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
- claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
- claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
- claude_mpm/services/unified/deployment_strategies/local.py +6 -5
- claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
- claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
- claude_mpm/services/unified/interfaces.py +3 -1
- claude_mpm/services/unified/unified_analyzer.py +14 -10
- claude_mpm/services/unified/unified_config.py +2 -1
- claude_mpm/services/unified/unified_deployment.py +9 -4
- claude_mpm/services/version_service.py +104 -1
- claude_mpm/skills/__init__.py +21 -0
- claude_mpm/skills/bundled/__init__.py +6 -0
- claude_mpm/skills/bundled/api-documentation.md +393 -0
- claude_mpm/skills/bundled/async-testing.md +571 -0
- claude_mpm/skills/bundled/code-review.md +143 -0
- claude_mpm/skills/bundled/database-migration.md +199 -0
- claude_mpm/skills/bundled/docker-containerization.md +194 -0
- claude_mpm/skills/bundled/express-local-dev.md +1429 -0
- claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
- claude_mpm/skills/bundled/git-workflow.md +414 -0
- claude_mpm/skills/bundled/imagemagick.md +204 -0
- claude_mpm/skills/bundled/json-data-handling.md +223 -0
- claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
- claude_mpm/skills/bundled/pdf.md +141 -0
- claude_mpm/skills/bundled/performance-profiling.md +567 -0
- claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
- claude_mpm/skills/bundled/security-scanning.md +327 -0
- claude_mpm/skills/bundled/systematic-debugging.md +473 -0
- claude_mpm/skills/bundled/test-driven-development.md +378 -0
- claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
- claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
- claude_mpm/skills/bundled/xlsx.md +157 -0
- claude_mpm/skills/registry.py +286 -0
- claude_mpm/skills/skill_manager.py +310 -0
- claude_mpm/storage/state_storage.py +15 -15
- claude_mpm/tools/code_tree_analyzer.py +177 -141
- claude_mpm/tools/code_tree_events.py +4 -2
- claude_mpm/utils/agent_dependency_loader.py +40 -20
- claude_mpm/utils/display_helper.py +260 -0
- claude_mpm/utils/git_analyzer.py +407 -0
- claude_mpm/utils/robust_installer.py +73 -19
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +129 -12
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +295 -193
- claude_mpm/dashboard/static/css/code-tree.css +0 -1639
- claude_mpm/dashboard/static/index-hub-backup.html +0 -713
- claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
- claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
- claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
- claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
- claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
- claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
- claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
- claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
- claude_mpm/services/project/analyzer_refactored.py +0 -450
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.7.4.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Project Analysis Interfaces for Claude MPM Framework
|
|
3
|
+
====================================================
|
|
4
|
+
|
|
5
|
+
WHY: This module contains interfaces for project analysis, toolchain detection,
|
|
6
|
+
and technology stack identification. These interfaces enable the auto-configuration
|
|
7
|
+
system to intelligently recommend agents based on detected project characteristics.
|
|
8
|
+
|
|
9
|
+
DESIGN DECISION: Project analysis interfaces are separated because they deal
|
|
10
|
+
with understanding the codebase structure, dependencies, and toolchain setup,
|
|
11
|
+
which requires different patterns than other service types.
|
|
12
|
+
|
|
13
|
+
Part of TSK-0054: Auto-Configuration Feature - Phase 1
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from ..models.toolchain import (
|
|
21
|
+
DeploymentTarget,
|
|
22
|
+
Framework,
|
|
23
|
+
LanguageDetection,
|
|
24
|
+
ToolchainAnalysis,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class IToolchainAnalyzer(ABC):
|
|
29
|
+
"""Interface for toolchain analysis operations.
|
|
30
|
+
|
|
31
|
+
WHY: Understanding project toolchain is essential for recommending appropriate
|
|
32
|
+
agents. This interface abstracts toolchain analysis to support different
|
|
33
|
+
project types, languages, and frameworks.
|
|
34
|
+
|
|
35
|
+
DESIGN DECISION: Separates language, framework, and deployment detection
|
|
36
|
+
into distinct methods to enable granular analysis and caching of results.
|
|
37
|
+
Each analysis type can be run independently based on needs.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def analyze_toolchain(self, project_path: Path) -> ToolchainAnalysis:
|
|
42
|
+
"""Analyze project toolchain and dependencies.
|
|
43
|
+
|
|
44
|
+
Performs comprehensive analysis of the project's technical stack including:
|
|
45
|
+
- Primary and secondary programming languages
|
|
46
|
+
- Framework and library dependencies
|
|
47
|
+
- Build and deployment configuration
|
|
48
|
+
- Development environment requirements
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
project_path: Path to the project root directory
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
ToolchainAnalysis: Complete analysis result with confidence scores
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
FileNotFoundError: If project_path does not exist
|
|
58
|
+
PermissionError: If project_path is not readable
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def detect_language(self, project_path: Path) -> LanguageDetection:
|
|
63
|
+
"""Detect primary and secondary languages used in the project.
|
|
64
|
+
|
|
65
|
+
Analyzes source files to determine:
|
|
66
|
+
- Primary programming language (highest code volume)
|
|
67
|
+
- Secondary languages (supporting code, scripts)
|
|
68
|
+
- Language versions if detectable
|
|
69
|
+
- Confidence level for each detection
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
project_path: Path to the project root directory
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
LanguageDetection: Detected languages with confidence scores
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
FileNotFoundError: If project_path does not exist
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def detect_frameworks(self, project_path: Path) -> List[Framework]:
|
|
83
|
+
"""Detect frameworks and their versions.
|
|
84
|
+
|
|
85
|
+
Identifies frameworks by analyzing:
|
|
86
|
+
- Dependency files (package.json, requirements.txt, etc.)
|
|
87
|
+
- Import statements in source code
|
|
88
|
+
- Configuration files specific to frameworks
|
|
89
|
+
- Project structure patterns
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
project_path: Path to the project root directory
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List[Framework]: List of detected frameworks with versions and types
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
FileNotFoundError: If project_path does not exist
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def detect_deployment_target(
|
|
103
|
+
self, project_path: Path
|
|
104
|
+
) -> Optional[DeploymentTarget]:
|
|
105
|
+
"""Detect intended deployment environment.
|
|
106
|
+
|
|
107
|
+
Analyzes configuration to identify deployment targets:
|
|
108
|
+
- Cloud platforms (AWS, GCP, Azure)
|
|
109
|
+
- Container orchestration (Kubernetes, Docker)
|
|
110
|
+
- Serverless platforms (Lambda, Cloud Functions)
|
|
111
|
+
- Traditional hosting (VPS, dedicated servers)
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
project_path: Path to the project root directory
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Optional[DeploymentTarget]: Detected deployment target or None if unclear
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
FileNotFoundError: If project_path does not exist
|
|
121
|
+
"""
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Restart Management Interfaces for Claude MPM Framework
|
|
3
|
+
========================================================
|
|
4
|
+
|
|
5
|
+
WHY: This module defines interfaces for auto-restart functionality with crash
|
|
6
|
+
detection, intelligent restart policies, and circuit breaker patterns.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Restart interfaces are separated to enable modular restart
|
|
9
|
+
management with different crash detection strategies and restart policies.
|
|
10
|
+
|
|
11
|
+
ARCHITECTURE:
|
|
12
|
+
- ICrashDetector: Interface for detecting process crashes and failures
|
|
13
|
+
- IRestartPolicy: Interface for restart decision logic with backoff
|
|
14
|
+
- IRestartManager: Interface for orchestrating the full restart workflow
|
|
15
|
+
|
|
16
|
+
USAGE:
|
|
17
|
+
crash_detector = CrashDetector(health_manager)
|
|
18
|
+
restart_policy = RestartPolicy(config)
|
|
19
|
+
restart_manager = RestartManager(
|
|
20
|
+
process_manager=process_manager,
|
|
21
|
+
health_manager=health_manager,
|
|
22
|
+
config=config
|
|
23
|
+
)
|
|
24
|
+
restart_manager.enable_auto_restart(deployment_id)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from abc import ABC, abstractmethod
|
|
28
|
+
from typing import TYPE_CHECKING, Callable, Optional
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from claude_mpm.services.core.models.restart import RestartHistory
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ICrashDetector(ABC):
|
|
35
|
+
"""
|
|
36
|
+
Interface for detecting process crashes and failures.
|
|
37
|
+
|
|
38
|
+
WHY: Crash detection requires monitoring health status changes, process
|
|
39
|
+
exits, and zombie states. This interface abstracts different detection
|
|
40
|
+
strategies to enable flexible crash monitoring.
|
|
41
|
+
|
|
42
|
+
DESIGN DECISION: Integrates with IHealthCheckManager via callbacks to
|
|
43
|
+
receive real-time status updates. Tracks crash history per deployment
|
|
44
|
+
to enable pattern detection.
|
|
45
|
+
|
|
46
|
+
Thread Safety: Implementations must be thread-safe for concurrent monitoring.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def register_crash_callback(self, callback: Callable[[str, str], None]) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Register a callback to be invoked when a crash is detected.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
callback: Function called with (deployment_id, reason)
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def start_monitoring(self, deployment_id: str) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Start monitoring a deployment for crashes.
|
|
62
|
+
|
|
63
|
+
WHY: Enables targeted monitoring for specific deployments.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
deployment_id: Unique deployment identifier
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ValueError: If deployment_id not found
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
@abstractmethod
|
|
73
|
+
def stop_monitoring(self, deployment_id: str) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Stop monitoring a deployment.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
deployment_id: Unique deployment identifier
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def is_monitoring(self, deployment_id: str) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Check if a deployment is being monitored.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
deployment_id: Unique deployment identifier
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
True if deployment is being monitored
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def get_crash_count(self, deployment_id: str) -> int:
|
|
95
|
+
"""
|
|
96
|
+
Get the number of crashes detected for a deployment.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
deployment_id: Unique deployment identifier
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Number of crashes detected
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class IRestartPolicy(ABC):
|
|
107
|
+
"""
|
|
108
|
+
Interface for restart decision logic with exponential backoff.
|
|
109
|
+
|
|
110
|
+
WHY: Restart policies prevent restart loops through exponential backoff,
|
|
111
|
+
max attempts, and circuit breaker patterns. This interface abstracts
|
|
112
|
+
the decision-making logic to enable different strategies.
|
|
113
|
+
|
|
114
|
+
DESIGN DECISION: Implements exponential backoff with configurable
|
|
115
|
+
parameters and circuit breaker state transitions (CLOSED → OPEN → HALF_OPEN).
|
|
116
|
+
|
|
117
|
+
Circuit Breaker States:
|
|
118
|
+
- CLOSED: Normal operation, restarts allowed
|
|
119
|
+
- OPEN: Circuit breaker tripped, restarts blocked
|
|
120
|
+
- HALF_OPEN: Testing if service recovered
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def should_restart(self, deployment_id: str) -> bool:
|
|
125
|
+
"""
|
|
126
|
+
Determine if a deployment should be restarted.
|
|
127
|
+
|
|
128
|
+
WHY: Central decision point that considers attempt count, circuit
|
|
129
|
+
breaker state, and backoff timing.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
deployment_id: Unique deployment identifier
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
True if restart should proceed
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def calculate_backoff(self, deployment_id: str) -> float:
|
|
140
|
+
"""
|
|
141
|
+
Calculate backoff time in seconds for next restart.
|
|
142
|
+
|
|
143
|
+
WHY: Implements exponential backoff to prevent restart storms.
|
|
144
|
+
Formula: min(initial * (multiplier ** (attempt - 1)), max_backoff)
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
deployment_id: Unique deployment identifier
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Backoff time in seconds (0 if first attempt)
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def record_restart_attempt(
|
|
155
|
+
self, deployment_id: str, success: bool, failure_reason: Optional[str] = None
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
Record a restart attempt and update circuit breaker state.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
deployment_id: Unique deployment identifier
|
|
162
|
+
success: Whether restart succeeded
|
|
163
|
+
failure_reason: Optional reason for failure
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
@abstractmethod
|
|
167
|
+
def reset_restart_history(self, deployment_id: str) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Reset restart history for a deployment.
|
|
170
|
+
|
|
171
|
+
WHY: Clears restart attempts after successful recovery or manual
|
|
172
|
+
intervention.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
deployment_id: Unique deployment identifier
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
@abstractmethod
|
|
179
|
+
def get_circuit_breaker_state(self, deployment_id: str) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Get current circuit breaker state.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
deployment_id: Unique deployment identifier
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Circuit breaker state (CLOSED, OPEN, HALF_OPEN)
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
@abstractmethod
|
|
191
|
+
def get_restart_attempt_count(self, deployment_id: str) -> int:
|
|
192
|
+
"""
|
|
193
|
+
Get number of restart attempts for a deployment.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
deployment_id: Unique deployment identifier
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Number of restart attempts
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class IRestartManager(ABC):
|
|
204
|
+
"""
|
|
205
|
+
Interface for orchestrating the complete restart workflow.
|
|
206
|
+
|
|
207
|
+
WHY: Restart management requires coordinating crash detection, policy
|
|
208
|
+
evaluation, process restart, and health verification. This interface
|
|
209
|
+
provides a high-level API for automatic and manual restarts.
|
|
210
|
+
|
|
211
|
+
DESIGN DECISION: Provides both automatic (background) and manual
|
|
212
|
+
(on-demand) restart operations. Integrates with all components:
|
|
213
|
+
CrashDetector, RestartPolicy, ProcessManager, and HealthCheckManager.
|
|
214
|
+
|
|
215
|
+
Restart Workflow:
|
|
216
|
+
1. Detect crash (via CrashDetector callback)
|
|
217
|
+
2. Check restart policy (max attempts, circuit breaker)
|
|
218
|
+
3. Wait for backoff period
|
|
219
|
+
4. Execute restart (preserve original StartConfig)
|
|
220
|
+
5. Verify health after restart
|
|
221
|
+
6. Record attempt and update circuit breaker
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
@abstractmethod
|
|
225
|
+
def enable_auto_restart(self, deployment_id: str) -> None:
|
|
226
|
+
"""
|
|
227
|
+
Enable automatic restarts for a deployment.
|
|
228
|
+
|
|
229
|
+
WHY: Enables hands-free recovery from crashes. Starts monitoring
|
|
230
|
+
via CrashDetector and registers restart callbacks.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
deployment_id: Unique deployment identifier
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If deployment_id not found
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
@abstractmethod
|
|
240
|
+
def disable_auto_restart(self, deployment_id: str) -> None:
|
|
241
|
+
"""
|
|
242
|
+
Disable automatic restarts for a deployment.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
deployment_id: Unique deployment identifier
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
@abstractmethod
|
|
249
|
+
def is_auto_restart_enabled(self, deployment_id: str) -> bool:
|
|
250
|
+
"""
|
|
251
|
+
Check if auto-restart is enabled for a deployment.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
deployment_id: Unique deployment identifier
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
True if auto-restart is enabled
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
@abstractmethod
|
|
261
|
+
def restart_deployment(self, deployment_id: str, manual: bool = False) -> bool:
|
|
262
|
+
"""
|
|
263
|
+
Restart a deployment (manual or automatic trigger).
|
|
264
|
+
|
|
265
|
+
WHY: Provides unified restart operation that respects policy
|
|
266
|
+
constraints and performs health verification.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
deployment_id: Unique deployment identifier
|
|
270
|
+
manual: If True, bypass some policy checks (e.g., circuit breaker)
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
True if restart succeeded
|
|
274
|
+
|
|
275
|
+
Raises:
|
|
276
|
+
ValueError: If deployment_id not found
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
@abstractmethod
|
|
280
|
+
def get_restart_history(self, deployment_id: str) -> Optional["RestartHistory"]:
|
|
281
|
+
"""
|
|
282
|
+
Get restart history for a deployment.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
deployment_id: Unique deployment identifier
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
RestartHistory if found, None otherwise
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
@abstractmethod
|
|
292
|
+
def clear_restart_history(self, deployment_id: str) -> None:
|
|
293
|
+
"""
|
|
294
|
+
Clear restart history and reset circuit breaker.
|
|
295
|
+
|
|
296
|
+
WHY: Allows manual intervention to clear failed restart state.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
deployment_id: Unique deployment identifier
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
__all__ = [
|
|
304
|
+
"ICrashDetector",
|
|
305
|
+
"IRestartManager",
|
|
306
|
+
"IRestartPolicy",
|
|
307
|
+
]
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stability Monitoring Interfaces for Claude MPM Framework
|
|
3
|
+
==========================================================
|
|
4
|
+
|
|
5
|
+
WHY: This module defines interfaces for proactive stability monitoring including
|
|
6
|
+
memory leak detection, log monitoring, and resource exhaustion prevention.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Separated from health checks to enable preventive monitoring
|
|
9
|
+
that triggers actions BEFORE crashes occur. Provides early warning systems.
|
|
10
|
+
|
|
11
|
+
ARCHITECTURE:
|
|
12
|
+
- IMemoryLeakDetector: Interface for memory leak detection using trend analysis
|
|
13
|
+
- ILogMonitor: Interface for real-time log file monitoring and pattern matching
|
|
14
|
+
- IResourceMonitor: Interface for comprehensive resource usage tracking
|
|
15
|
+
|
|
16
|
+
USAGE:
|
|
17
|
+
memory_detector = MemoryLeakDetector(leak_threshold_mb_per_minute=10.0)
|
|
18
|
+
log_monitor = LogMonitor(log_file="/var/log/app.log")
|
|
19
|
+
resource_monitor = ResourceMonitor(fd_threshold_percent=0.8)
|
|
20
|
+
|
|
21
|
+
# Integrate with health monitoring
|
|
22
|
+
health_manager.add_stability_monitors(
|
|
23
|
+
memory_detector=memory_detector,
|
|
24
|
+
log_monitor=log_monitor,
|
|
25
|
+
resource_monitor=resource_monitor,
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from abc import ABC, abstractmethod
|
|
30
|
+
from typing import Callable, List
|
|
31
|
+
|
|
32
|
+
from claude_mpm.services.core.models.stability import (
|
|
33
|
+
LogPatternMatch,
|
|
34
|
+
MemoryTrend,
|
|
35
|
+
ResourceUsage,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class IMemoryLeakDetector(ABC):
|
|
40
|
+
"""
|
|
41
|
+
Interface for memory leak detection using trend analysis.
|
|
42
|
+
|
|
43
|
+
WHY: Memory leaks are a common cause of process crashes. Early detection
|
|
44
|
+
enables preemptive restarts BEFORE the OOM killer terminates the process.
|
|
45
|
+
|
|
46
|
+
DESIGN DECISION: Uses slope-based trend analysis over a rolling window
|
|
47
|
+
to detect sustained memory growth patterns, filtering out normal variations.
|
|
48
|
+
|
|
49
|
+
Algorithm:
|
|
50
|
+
1. Maintain rolling window of memory measurements (timestamp, memory_mb)
|
|
51
|
+
2. Calculate linear regression slope (MB per minute)
|
|
52
|
+
3. Detect leak if slope exceeds threshold (default: 10 MB/minute)
|
|
53
|
+
4. Trigger alert when leak detected and memory > 80% limit
|
|
54
|
+
|
|
55
|
+
Thread Safety: Implementations must be thread-safe for concurrent access.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def record_memory_usage(self, deployment_id: str, memory_mb: float) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Record a memory usage measurement.
|
|
62
|
+
|
|
63
|
+
WHY: Builds historical data for trend analysis. Should be called
|
|
64
|
+
periodically (e.g., every 30s) to collect sufficient data points.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
deployment_id: Deployment identifier
|
|
68
|
+
memory_mb: Current memory usage in megabytes
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def analyze_trend(self, deployment_id: str) -> MemoryTrend:
|
|
73
|
+
"""
|
|
74
|
+
Analyze memory usage trend for leak detection.
|
|
75
|
+
|
|
76
|
+
WHY: Computes slope of memory usage over time to detect sustained
|
|
77
|
+
growth patterns characteristic of memory leaks.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
deployment_id: Deployment identifier
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
MemoryTrend with slope analysis and leak detection result
|
|
84
|
+
|
|
85
|
+
Algorithm:
|
|
86
|
+
slope_mb_per_minute = (recent_memory - old_memory) / time_delta_minutes
|
|
87
|
+
is_leaking = slope_mb_per_minute > threshold
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
@abstractmethod
|
|
91
|
+
def is_leaking(self, deployment_id: str) -> bool:
|
|
92
|
+
"""
|
|
93
|
+
Check if deployment has a detected memory leak.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
True if leak detected (sustained memory growth)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def register_leak_callback(
|
|
101
|
+
self, callback: Callable[[str, MemoryTrend], None]
|
|
102
|
+
) -> None:
|
|
103
|
+
"""
|
|
104
|
+
Register callback for leak detection events.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
callback: Function called with (deployment_id, trend) when leak detected
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ILogMonitor(ABC):
|
|
112
|
+
"""
|
|
113
|
+
Interface for real-time log file monitoring and pattern matching.
|
|
114
|
+
|
|
115
|
+
WHY: Application logs contain early warning signals (exceptions, OOM errors,
|
|
116
|
+
segfaults) that predict imminent crashes. Real-time monitoring enables
|
|
117
|
+
proactive intervention.
|
|
118
|
+
|
|
119
|
+
DESIGN DECISION: Uses watchdog library for efficient file system monitoring.
|
|
120
|
+
Avoids polling by receiving file modification events from the OS.
|
|
121
|
+
|
|
122
|
+
Pattern Matching:
|
|
123
|
+
- Regex-based patterns for flexibility
|
|
124
|
+
- Configurable patterns per deployment
|
|
125
|
+
- Built-in patterns for common errors:
|
|
126
|
+
* OutOfMemoryError
|
|
127
|
+
* Segmentation fault
|
|
128
|
+
* Exception: / Traceback
|
|
129
|
+
* Database connection errors
|
|
130
|
+
* Network timeouts
|
|
131
|
+
|
|
132
|
+
Thread Safety: Uses watchdog's thread-safe event handling.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def start_monitoring(self, log_file: str, deployment_id: str) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Start monitoring a log file for error patterns.
|
|
139
|
+
|
|
140
|
+
WHY: Begins watching the log file for new entries. Uses OS-level
|
|
141
|
+
file system events for efficiency.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
log_file: Path to log file to monitor
|
|
145
|
+
deployment_id: Deployment identifier for callbacks
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
@abstractmethod
|
|
149
|
+
def stop_monitoring(self, deployment_id: str) -> None:
|
|
150
|
+
"""
|
|
151
|
+
Stop monitoring a deployment's log file.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
deployment_id: Deployment identifier
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def add_pattern(self, pattern: str, severity: str = "ERROR") -> None:
|
|
159
|
+
"""
|
|
160
|
+
Add an error pattern to monitor.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
pattern: Regex pattern to match
|
|
164
|
+
severity: Error severity (ERROR, CRITICAL, WARNING)
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def get_recent_matches(
|
|
169
|
+
self, deployment_id: str, limit: int = 10
|
|
170
|
+
) -> List[LogPatternMatch]:
|
|
171
|
+
"""
|
|
172
|
+
Get recent pattern matches for a deployment.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
deployment_id: Deployment identifier
|
|
176
|
+
limit: Maximum number of matches to return
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of LogPatternMatch objects, newest first
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
@abstractmethod
|
|
183
|
+
def register_match_callback(
|
|
184
|
+
self, callback: Callable[[str, LogPatternMatch], None]
|
|
185
|
+
) -> None:
|
|
186
|
+
"""
|
|
187
|
+
Register callback for pattern matches.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
callback: Function called with (deployment_id, match) when pattern detected
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class IResourceMonitor(ABC):
|
|
195
|
+
"""
|
|
196
|
+
Interface for comprehensive resource usage monitoring.
|
|
197
|
+
|
|
198
|
+
WHY: Resource exhaustion (file descriptors, threads, connections, disk space)
|
|
199
|
+
causes crashes and degradation. Monitoring enables preemptive action at 80%
|
|
200
|
+
thresholds before hitting hard limits.
|
|
201
|
+
|
|
202
|
+
DESIGN DECISION: Extends basic resource health checks with:
|
|
203
|
+
- Higher granularity (more frequent checks)
|
|
204
|
+
- Percentage-based thresholds (80% of ulimit)
|
|
205
|
+
- Trend analysis for growth rate
|
|
206
|
+
- Integration with restart manager for preemptive restarts
|
|
207
|
+
|
|
208
|
+
Resource Types:
|
|
209
|
+
1. File Descriptors: Critical for I/O operations (Unix: ulimit -n)
|
|
210
|
+
2. Threads: Memory and scheduling overhead
|
|
211
|
+
3. Network Connections: Socket exhaustion
|
|
212
|
+
4. Disk Space: Working directory availability
|
|
213
|
+
|
|
214
|
+
Thread Safety: Implementations must be thread-safe.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
@abstractmethod
|
|
218
|
+
def check_resources(self, deployment_id: str) -> ResourceUsage:
|
|
219
|
+
"""
|
|
220
|
+
Check resource usage for a deployment.
|
|
221
|
+
|
|
222
|
+
WHY: Provides comprehensive snapshot of resource consumption across
|
|
223
|
+
all monitored resource types.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
deployment_id: Deployment identifier
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
ResourceUsage with current metrics and critical status
|
|
230
|
+
|
|
231
|
+
Raises:
|
|
232
|
+
ValueError: If deployment not found
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
@abstractmethod
|
|
236
|
+
def is_critical(self, deployment_id: str) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Check if any resource is at critical threshold (>80%).
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
True if any resource exceeds 80% of limit
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
@abstractmethod
|
|
245
|
+
def register_critical_callback(
|
|
246
|
+
self, callback: Callable[[str, ResourceUsage], None]
|
|
247
|
+
) -> None:
|
|
248
|
+
"""
|
|
249
|
+
Register callback for critical resource usage.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
callback: Function called with (deployment_id, usage) when critical
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
__all__ = [
|
|
257
|
+
"ILogMonitor",
|
|
258
|
+
"IMemoryLeakDetector",
|
|
259
|
+
"IResourceMonitor",
|
|
260
|
+
]
|