claude-mpm 3.3.2__py3-none-any.whl → 3.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. claude_mpm/cli/commands/memory.py +192 -14
  2. claude_mpm/cli/parser.py +13 -1
  3. claude_mpm/constants.py +1 -0
  4. claude_mpm/core/claude_runner.py +61 -0
  5. claude_mpm/core/config.py +161 -1
  6. claude_mpm/core/simple_runner.py +61 -0
  7. claude_mpm/hooks/builtin/mpm_command_hook.py +5 -5
  8. claude_mpm/hooks/claude_hooks/hook_handler.py +211 -4
  9. claude_mpm/hooks/claude_hooks/hook_wrapper.sh +10 -3
  10. claude_mpm/hooks/memory_integration_hook.py +51 -5
  11. claude_mpm/scripts/socketio_daemon.py +49 -9
  12. claude_mpm/scripts/socketio_server_manager.py +370 -45
  13. claude_mpm/services/__init__.py +41 -5
  14. claude_mpm/services/agent_memory_manager.py +541 -51
  15. claude_mpm/services/exceptions.py +677 -0
  16. claude_mpm/services/health_monitor.py +892 -0
  17. claude_mpm/services/memory_builder.py +341 -7
  18. claude_mpm/services/memory_optimizer.py +6 -2
  19. claude_mpm/services/project_analyzer.py +771 -0
  20. claude_mpm/services/recovery_manager.py +670 -0
  21. claude_mpm/services/socketio_server.py +653 -36
  22. claude_mpm/services/standalone_socketio_server.py +703 -34
  23. claude_mpm/services/version_control/git_operations.py +26 -0
  24. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/METADATA +34 -10
  25. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/RECORD +30 -44
  26. claude_mpm/agents/agent-template.yaml +0 -83
  27. claude_mpm/agents/test_fix_deployment/.claude-pm/config/project.json +0 -6
  28. claude_mpm/cli/README.md +0 -109
  29. claude_mpm/cli_module/refactoring_guide.md +0 -253
  30. claude_mpm/core/agent_registry.py.bak +0 -312
  31. claude_mpm/core/base_service.py.bak +0 -406
  32. claude_mpm/hooks/README.md +0 -97
  33. claude_mpm/orchestration/SUBPROCESS_DESIGN.md +0 -66
  34. claude_mpm/schemas/README_SECURITY.md +0 -92
  35. claude_mpm/schemas/agent_schema.json +0 -395
  36. claude_mpm/schemas/agent_schema_documentation.md +0 -181
  37. claude_mpm/schemas/agent_schema_security_notes.md +0 -165
  38. claude_mpm/schemas/examples/standard_workflow.json +0 -505
  39. claude_mpm/schemas/ticket_workflow_documentation.md +0 -482
  40. claude_mpm/schemas/ticket_workflow_schema.json +0 -590
  41. claude_mpm/services/framework_claude_md_generator/README.md +0 -92
  42. claude_mpm/services/parent_directory_manager/README.md +0 -83
  43. claude_mpm/services/version_control/VERSION +0 -1
  44. /claude_mpm/{web → dashboard}/open_dashboard.py +0 -0
  45. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/WHEEL +0 -0
  46. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/entry_points.txt +0 -0
  47. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/licenses/LICENSE +0 -0
  48. {claude_mpm-3.3.2.dist-info → claude_mpm-3.4.2.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ with other command modules like agents.py.
10
10
  """
11
11
 
12
12
  import json
13
+ import os
13
14
  from datetime import datetime
14
15
  from pathlib import Path
15
16
 
@@ -38,7 +39,11 @@ def manage_memory(args):
38
39
  try:
39
40
  # Load configuration for memory manager
40
41
  config = Config()
41
- memory_manager = AgentMemoryManager(config)
42
+ # Use CLAUDE_MPM_USER_PWD if available (when called via shell script),
43
+ # otherwise use current working directory
44
+ user_pwd = os.environ.get('CLAUDE_MPM_USER_PWD', os.getcwd())
45
+ current_dir = Path(user_pwd)
46
+ memory_manager = AgentMemoryManager(config, current_dir)
42
47
 
43
48
  if not args.memory_command:
44
49
  # No subcommand - show status
@@ -49,7 +54,7 @@ def manage_memory(args):
49
54
  _show_status(memory_manager)
50
55
 
51
56
  elif args.memory_command == "view":
52
- _view_memory(args, memory_manager)
57
+ _show_memories(args, memory_manager)
53
58
 
54
59
  elif args.memory_command == "add":
55
60
  _add_learning(args, memory_manager)
@@ -72,6 +77,15 @@ def manage_memory(args):
72
77
  elif args.memory_command == "show":
73
78
  _show_memories(args, memory_manager)
74
79
 
80
+ elif args.memory_command == "init":
81
+ _init_memory(args, memory_manager)
82
+
83
+ else:
84
+ logger.error(f"Unknown memory command: {args.memory_command}")
85
+ print(f"Unknown memory command: {args.memory_command}")
86
+ print("Available commands: init, status, view, add, clean, optimize, build, cross-ref, route, show")
87
+ return 1
88
+
75
89
  except Exception as e:
76
90
  logger.error(f"Error managing memory: {e}")
77
91
  print(f"❌ Error: {e}")
@@ -80,6 +94,71 @@ def manage_memory(args):
80
94
  return 0
81
95
 
82
96
 
97
+ def _init_memory(args, memory_manager):
98
+ """
99
+ Initialize project-specific memories via agent delegation.
100
+
101
+ WHY: When starting with a new project, agents need project-specific knowledge
102
+ beyond what automatic analysis provides. This command triggers an agent task
103
+ to comprehensively scan the project and create custom memories.
104
+
105
+ Args:
106
+ args: Command line arguments (unused but kept for consistency)
107
+ memory_manager: AgentMemoryManager instance
108
+ """
109
+ logger = get_logger("cli")
110
+
111
+ print("🚀 Initializing project-specific memories...")
112
+ print("=" * 80)
113
+ print()
114
+ print("This will analyze the project to:")
115
+ print(" 1. Scan project structure and documentation")
116
+ print(" 2. Analyze source code for patterns and conventions")
117
+ print(" 3. Create targeted memories for each agent type")
118
+ print(" 4. Add insights using 'claude-mpm memory add' commands")
119
+ print()
120
+ print("The analysis will cover:")
121
+ print(" • Project architecture and design patterns")
122
+ print(" • Coding conventions and standards")
123
+ print(" • Key modules and integration points")
124
+ print(" • Testing patterns and quality standards")
125
+ print(" • Performance considerations")
126
+ print(" • Domain-specific terminology")
127
+ print()
128
+ print("=" * 80)
129
+ print()
130
+ print("[Agent Task: Initialize Project-Specific Memories]")
131
+ print()
132
+ print("Please analyze this project and create custom memories for all agents.")
133
+ print()
134
+ print("Instructions:")
135
+ print("1. Scan the project structure, documentation, and source code")
136
+ print("2. Identify key patterns, conventions, and project-specific knowledge")
137
+ print("3. Create targeted memories for each agent type")
138
+ print("4. Use 'claude-mpm memory add <agent> <type> \"<content>\"' commands")
139
+ print()
140
+ print("Focus areas:")
141
+ print(" • Architectural patterns and design decisions")
142
+ print(" • Coding conventions from actual source code")
143
+ print(" • Key modules, APIs, and integration points")
144
+ print(" • Testing patterns and quality standards")
145
+ print(" • Performance considerations specific to this project")
146
+ print(" • Common pitfalls based on the codebase")
147
+ print(" • Domain-specific terminology and concepts")
148
+ print()
149
+ print("Example commands to use:")
150
+ print(' claude-mpm memory add engineer pattern "Use dependency injection with @inject"')
151
+ print(' claude-mpm memory add qa pattern "Test files follow test_<module>_<feature>.py"')
152
+ print(' claude-mpm memory add research context "Project uses microservices architecture"')
153
+ print()
154
+ print("Begin by examining the project structure and key files.")
155
+ print()
156
+ print("=" * 80)
157
+ print()
158
+ print("📝 Note: Copy the task above to execute the memory initialization process.")
159
+ print(" Use 'claude-mpm memory add' commands to add discovered insights.")
160
+
161
+
83
162
  def _show_status(memory_manager):
84
163
  """
85
164
  Show comprehensive memory system status.
@@ -113,7 +192,7 @@ def _show_status(memory_manager):
113
192
  print(f"🧠 Memory System Health: {health_emoji} {system_health}")
114
193
  print(f"📁 Memory Directory: {status.get('memory_directory', 'Unknown')}")
115
194
  print(f"🔧 System Enabled: {'Yes' if status.get('system_enabled', True) else 'No'}")
116
- print(f"📚 Auto Learning: {'Yes' if status.get('auto_learning', False) else 'No'}")
195
+ print(f"📚 Auto Learning: {'Yes' if status.get('auto_learning', True) else 'No'}")
117
196
  print(f"📊 Total Agents: {status.get('total_agents', 0)}")
118
197
  print(f"💾 Total Size: {status.get('total_size_kb', 0):.1f} KB")
119
198
  print()
@@ -143,7 +222,7 @@ def _show_status(memory_manager):
143
222
  sections = agent_info.get("sections", 0)
144
223
  items = agent_info.get("items", 0)
145
224
  last_modified = agent_info.get("last_modified", "Unknown")
146
- auto_learning = agent_info.get("auto_learning", False)
225
+ auto_learning = agent_info.get("auto_learning", True)
147
226
 
148
227
  # Format last modified time
149
228
  try:
@@ -474,24 +553,48 @@ def _show_memories(args, memory_manager):
474
553
  WHY: Users need to see agent memories in a readable format to understand
475
554
  what agents have learned and identify common patterns across agents.
476
555
 
556
+ DESIGN DECISION: Added --raw flag to output structured JSON data for
557
+ programmatic processing, enabling external tools and scripts to access
558
+ all agent memories in a structured format.
559
+
477
560
  Args:
478
- args: Command arguments with optional agent_id and format
561
+ args: Command arguments with optional agent_id, format, and raw flag
479
562
  memory_manager: AgentMemoryManager instance
480
563
  """
481
- print("🧠 Agent Memories Display")
482
- print("-" * 80)
483
-
484
564
  agent_id = getattr(args, 'agent_id', None)
485
- format_type = getattr(args, 'format', 'summary')
565
+ format_type = getattr(args, 'format', 'detailed')
566
+ raw_output = getattr(args, 'raw', False)
486
567
 
487
568
  try:
488
- if agent_id:
489
- _show_single_agent_memory(agent_id, format_type, memory_manager)
569
+ if raw_output:
570
+ # Output structured JSON data
571
+ if agent_id:
572
+ # Get single agent memory in raw format
573
+ _output_single_agent_raw(agent_id, memory_manager)
574
+ else:
575
+ # Get all agent memories in raw format
576
+ _output_all_memories_raw(memory_manager)
490
577
  else:
491
- _show_all_agent_memories(format_type, memory_manager)
578
+ # Normal user-friendly display
579
+ print("🧠 Agent Memories Display")
580
+ print("-" * 80)
492
581
 
582
+ if agent_id:
583
+ _show_single_agent_memory(agent_id, format_type, memory_manager)
584
+ else:
585
+ _show_all_agent_memories(format_type, memory_manager)
586
+
493
587
  except Exception as e:
494
- print(f"❌ Error showing memories: {e}")
588
+ if raw_output:
589
+ # Output error in JSON format for consistency
590
+ error_output = {
591
+ "success": False,
592
+ "error": str(e),
593
+ "timestamp": datetime.now().isoformat()
594
+ }
595
+ print(json.dumps(error_output, indent=2))
596
+ else:
597
+ print(f"❌ Error showing memories: {e}")
495
598
 
496
599
 
497
600
  def _show_single_agent_memory(agent_id, format_type, memory_manager):
@@ -779,4 +882,79 @@ def _display_bulk_optimization_results(result):
779
882
  print(f" {agent_id}: {reduction}% reduction{status}")
780
883
  else:
781
884
  error = agent_result.get("error", "Unknown error")
782
- print(f" {agent_id}: ❌ {error}")
885
+ print(f" {agent_id}: ❌ {error}")
886
+
887
+
888
+ def _output_all_memories_raw(memory_manager):
889
+ """
890
+ Output all agent memories in raw JSON format.
891
+
892
+ WHY: Provides programmatic access to all agent memories for external tools,
893
+ scripts, or APIs that need to process or analyze the complete memory state.
894
+
895
+ Args:
896
+ memory_manager: AgentMemoryManager instance
897
+ """
898
+ try:
899
+ raw_data = memory_manager.get_all_memories_raw()
900
+ print(json.dumps(raw_data, indent=2, ensure_ascii=False))
901
+ except Exception as e:
902
+ error_output = {
903
+ "success": False,
904
+ "error": f"Failed to retrieve all memories: {str(e)}",
905
+ "timestamp": datetime.now().isoformat()
906
+ }
907
+ print(json.dumps(error_output, indent=2))
908
+
909
+
910
+ def _output_single_agent_raw(agent_id, memory_manager):
911
+ """
912
+ Output single agent memory in raw JSON format.
913
+
914
+ WHY: Provides programmatic access to a specific agent's memory for
915
+ targeted analysis or processing by external tools.
916
+
917
+ Args:
918
+ agent_id: ID of the agent to retrieve memory for
919
+ memory_manager: AgentMemoryManager instance
920
+ """
921
+ try:
922
+ # Get all memories and extract the specific agent
923
+ all_memories = memory_manager.get_all_memories_raw()
924
+
925
+ if not all_memories.get("success", False):
926
+ error_output = {
927
+ "success": False,
928
+ "error": all_memories.get("error", "Failed to retrieve memories"),
929
+ "timestamp": datetime.now().isoformat()
930
+ }
931
+ print(json.dumps(error_output, indent=2))
932
+ return
933
+
934
+ agents = all_memories.get("agents", {})
935
+ if agent_id not in agents:
936
+ error_output = {
937
+ "success": False,
938
+ "error": f"No memory found for agent: {agent_id}",
939
+ "available_agents": list(agents.keys()),
940
+ "timestamp": datetime.now().isoformat()
941
+ }
942
+ print(json.dumps(error_output, indent=2))
943
+ return
944
+
945
+ # Return single agent data with metadata
946
+ single_agent_output = {
947
+ "success": True,
948
+ "timestamp": all_memories["timestamp"],
949
+ "agent": agents[agent_id]
950
+ }
951
+
952
+ print(json.dumps(single_agent_output, indent=2, ensure_ascii=False))
953
+
954
+ except Exception as e:
955
+ error_output = {
956
+ "success": False,
957
+ "error": f"Failed to retrieve memory for agent {agent_id}: {str(e)}",
958
+ "timestamp": datetime.now().isoformat()
959
+ }
960
+ print(json.dumps(error_output, indent=2))
claude_mpm/cli/parser.py CHANGED
@@ -366,6 +366,12 @@ def create_parser(prog_name: str = "claude-mpm", version: str = "0.0.0") -> argp
366
366
  metavar="SUBCOMMAND"
367
367
  )
368
368
 
369
+ # Init command
370
+ init_parser = memory_subparsers.add_parser(
371
+ MemoryCommands.INIT.value,
372
+ help="Initialize project-specific memories via PM agent"
373
+ )
374
+
369
375
  # Status command
370
376
  status_parser = memory_subparsers.add_parser(
371
377
  MemoryCommands.STATUS.value,
@@ -379,7 +385,8 @@ def create_parser(prog_name: str = "claude-mpm", version: str = "0.0.0") -> argp
379
385
  )
380
386
  view_parser.add_argument(
381
387
  "agent_id",
382
- help="Agent ID to view memory for"
388
+ nargs="?",
389
+ help="Agent ID to view memory for (optional - shows all agents if not provided)"
383
390
  )
384
391
 
385
392
  # Add command
@@ -468,6 +475,11 @@ def create_parser(prog_name: str = "claude-mpm", version: str = "0.0.0") -> argp
468
475
  default="summary",
469
476
  help="Display format: summary (default), detailed, or full"
470
477
  )
478
+ show_parser.add_argument(
479
+ "--raw",
480
+ action="store_true",
481
+ help="Output raw memory content in JSON format for programmatic processing"
482
+ )
471
483
 
472
484
  return parser
473
485
 
claude_mpm/constants.py CHANGED
@@ -58,6 +58,7 @@ class AgentCommands(str, Enum):
58
58
 
59
59
  class MemoryCommands(str, Enum):
60
60
  """Memory subcommand constants."""
61
+ INIT = "init"
61
62
  STATUS = "status"
62
63
  VIEW = "view"
63
64
  ADD = "add"
@@ -13,10 +13,14 @@ import uuid
13
13
  try:
14
14
  from claude_mpm.services.agent_deployment import AgentDeploymentService
15
15
  from claude_mpm.services.ticket_manager import TicketManager
16
+ from claude_mpm.services.hook_service import HookService
17
+ from claude_mpm.core.config import Config
16
18
  from claude_mpm.core.logger import get_logger, get_project_logger, ProjectLogger
17
19
  except ImportError:
18
20
  from claude_mpm.services.agent_deployment import AgentDeploymentService
19
21
  from claude_mpm.services.ticket_manager import TicketManager
22
+ from claude_mpm.services.hook_service import HookService
23
+ from claude_mpm.core.config import Config
20
24
  from claude_mpm.core.logger import get_logger, get_project_logger, ProjectLogger
21
25
 
22
26
 
@@ -76,6 +80,11 @@ class ClaudeRunner:
76
80
  self.ticket_manager = None
77
81
  self.enable_tickets = False
78
82
 
83
+ # Initialize hook service and register memory hooks
84
+ self.config = Config()
85
+ self.hook_service = HookService(self.config)
86
+ self._register_memory_hooks()
87
+
79
88
  # Load system instructions
80
89
  self.system_instructions = self._load_system_instructions()
81
90
 
@@ -741,6 +750,58 @@ class ClaudeRunner:
741
750
  except Exception as e:
742
751
  self.logger.debug(f"Failed to log session event: {e}")
743
752
 
753
+ def _register_memory_hooks(self):
754
+ """Register memory integration hooks with the hook service.
755
+
756
+ WHY: This activates the memory system by registering hooks that automatically
757
+ inject agent memory before delegation and extract learnings after delegation.
758
+ This is the critical connection point between the memory system and the CLI.
759
+
760
+ DESIGN DECISION: We register hooks here instead of in __init__ to ensure
761
+ all services are initialized first. Hooks are only registered if the memory
762
+ system is enabled in configuration.
763
+ """
764
+ try:
765
+ # Only register if memory system is enabled
766
+ if not self.config.get('memory.enabled', True):
767
+ self.logger.debug("Memory system disabled - skipping hook registration")
768
+ return
769
+
770
+ # Import hook classes (lazy import to avoid circular dependencies)
771
+ from claude_mpm.hooks.memory_integration_hook import (
772
+ MemoryPreDelegationHook,
773
+ MemoryPostDelegationHook
774
+ )
775
+
776
+ # Register pre-delegation hook for memory injection
777
+ pre_hook = MemoryPreDelegationHook(self.config)
778
+ success = self.hook_service.register_hook(pre_hook)
779
+ if success:
780
+ self.logger.info(f"✅ Registered memory pre-delegation hook (priority: {pre_hook.priority})")
781
+ else:
782
+ self.logger.warning("❌ Failed to register memory pre-delegation hook")
783
+
784
+ # Register post-delegation hook if auto-learning is enabled
785
+ if self.config.get('memory.auto_learning', True): # Default to True now
786
+ post_hook = MemoryPostDelegationHook(self.config)
787
+ success = self.hook_service.register_hook(post_hook)
788
+ if success:
789
+ self.logger.info(f"✅ Registered memory post-delegation hook (priority: {post_hook.priority})")
790
+ else:
791
+ self.logger.warning("❌ Failed to register memory post-delegation hook")
792
+ else:
793
+ self.logger.info("ℹ️ Auto-learning disabled - skipping post-delegation hook")
794
+
795
+ # Log summary of registered hooks
796
+ hooks = self.hook_service.list_hooks()
797
+ pre_count = len(hooks.get('pre_delegation', []))
798
+ post_count = len(hooks.get('post_delegation', []))
799
+ self.logger.info(f"📋 Hook Service initialized: {pre_count} pre-delegation, {post_count} post-delegation hooks")
800
+
801
+ except Exception as e:
802
+ self.logger.error(f"❌ Failed to register memory hooks: {e}")
803
+ # Don't fail the entire initialization - memory system is optional
804
+
744
805
  def _launch_subprocess_interactive(self, cmd: list, env: dict):
745
806
  """Launch Claude as a subprocess with PTY for interactive mode."""
746
807
  import pty
claude_mpm/core/config.py CHANGED
@@ -166,9 +166,38 @@ class Config:
166
166
  # Health monitoring
167
167
  "enable_health_monitoring": True,
168
168
  "health_check_interval": 30,
169
+ "health_history_size": 100,
170
+ "health_aggregation_window": 300,
169
171
  # Metrics
170
172
  "enable_metrics": True,
171
173
  "metrics_interval": 60,
174
+ # Advanced health monitoring thresholds
175
+ "health_thresholds": {
176
+ "cpu_percent": 80.0,
177
+ "memory_mb": 500,
178
+ "file_descriptors": 1000,
179
+ "max_clients": 1000,
180
+ "max_error_rate": 0.1,
181
+ "network_timeout": 2.0
182
+ },
183
+ # Automatic recovery configuration
184
+ "recovery": {
185
+ "enabled": True,
186
+ "check_interval": 60,
187
+ "max_recovery_attempts": 5,
188
+ "recovery_timeout": 30,
189
+ "circuit_breaker": {
190
+ "failure_threshold": 5,
191
+ "timeout_seconds": 300,
192
+ "success_threshold": 3
193
+ },
194
+ "strategy": {
195
+ "warning_threshold": 2,
196
+ "critical_threshold": 1,
197
+ "failure_window_seconds": 300,
198
+ "min_recovery_interval": 60
199
+ }
200
+ },
172
201
  # Service management
173
202
  "graceful_shutdown_timeout": 30,
174
203
  "startup_timeout": 60,
@@ -231,7 +260,7 @@ class Config:
231
260
  # Agent Memory System configuration
232
261
  "memory": {
233
262
  "enabled": True, # Master switch for memory system
234
- "auto_learning": False, # Automatic learning extraction
263
+ "auto_learning": True, # Automatic learning extraction (changed default to True)
235
264
  "limits": {
236
265
  "default_size_kb": 8, # Default file size limit
237
266
  "max_sections": 10, # Maximum sections per file
@@ -247,6 +276,47 @@ class Config:
247
276
  "auto_learning": True # Enable auto learning
248
277
  }
249
278
  }
279
+ },
280
+ # Socket.IO server health and recovery configuration
281
+ "socketio_server": {
282
+ "host": "localhost",
283
+ "port": 8765,
284
+ "enable_health_monitoring": True,
285
+ "enable_recovery": True,
286
+ "health_monitoring": {
287
+ "check_interval": 30,
288
+ "history_size": 100,
289
+ "aggregation_window": 300,
290
+ "thresholds": {
291
+ "cpu_percent": 80.0,
292
+ "memory_mb": 500,
293
+ "file_descriptors": 1000,
294
+ "max_clients": 1000,
295
+ "max_error_rate": 0.1
296
+ }
297
+ },
298
+ "recovery": {
299
+ "enabled": True,
300
+ "max_attempts": 5,
301
+ "timeout": 30,
302
+ "circuit_breaker": {
303
+ "failure_threshold": 5,
304
+ "timeout_seconds": 300,
305
+ "success_threshold": 3
306
+ },
307
+ "strategy": {
308
+ "warning_threshold": 2,
309
+ "critical_threshold": 1,
310
+ "failure_window_seconds": 300,
311
+ "min_recovery_interval": 60
312
+ },
313
+ "actions": {
314
+ "log_warning": True,
315
+ "clear_connections": True,
316
+ "restart_service": True,
317
+ "emergency_stop": True
318
+ }
319
+ }
250
320
  }
251
321
  }
252
322
 
@@ -254,6 +324,9 @@ class Config:
254
324
  for key, default_value in defaults.items():
255
325
  if key not in self._config:
256
326
  self._config[key] = default_value
327
+
328
+ # Validate health and recovery configuration
329
+ self._validate_health_recovery_config()
257
330
 
258
331
  def get(self, key: str, default: Any = None) -> Any:
259
332
  """Get configuration value."""
@@ -349,6 +422,93 @@ class Config:
349
422
  """Check if configuration contains a key."""
350
423
  return self.get(key) is not None
351
424
 
425
+ def _validate_health_recovery_config(self) -> None:
426
+ """Validate health monitoring and recovery configuration."""
427
+ try:
428
+ # Validate health thresholds
429
+ thresholds = self.get('health_thresholds', {})
430
+ if thresholds.get('cpu_percent', 0) < 0 or thresholds.get('cpu_percent', 0) > 100:
431
+ logger.warning("CPU threshold should be between 0-100, using default 80")
432
+ self.set('health_thresholds.cpu_percent', 80.0)
433
+
434
+ if thresholds.get('memory_mb', 0) <= 0:
435
+ logger.warning("Memory threshold should be positive, using default 500MB")
436
+ self.set('health_thresholds.memory_mb', 500)
437
+
438
+ if thresholds.get('max_error_rate', 0) < 0 or thresholds.get('max_error_rate', 0) > 1:
439
+ logger.warning("Error rate threshold should be between 0-1, using default 0.1")
440
+ self.set('health_thresholds.max_error_rate', 0.1)
441
+
442
+ # Validate recovery configuration
443
+ recovery_config = self.get('recovery', {})
444
+ if recovery_config.get('max_recovery_attempts', 0) <= 0:
445
+ logger.warning("Max recovery attempts should be positive, using default 5")
446
+ self.set('recovery.max_recovery_attempts', 5)
447
+
448
+ # Validate circuit breaker configuration
449
+ cb_config = recovery_config.get('circuit_breaker', {})
450
+ if cb_config.get('failure_threshold', 0) <= 0:
451
+ logger.warning("Circuit breaker failure threshold should be positive, using default 5")
452
+ self.set('recovery.circuit_breaker.failure_threshold', 5)
453
+
454
+ if cb_config.get('timeout_seconds', 0) <= 0:
455
+ logger.warning("Circuit breaker timeout should be positive, using default 300")
456
+ self.set('recovery.circuit_breaker.timeout_seconds', 300)
457
+
458
+ except Exception as e:
459
+ logger.error(f"Error validating health/recovery configuration: {e}")
460
+
461
+ def get_health_monitoring_config(self) -> Dict[str, Any]:
462
+ """Get health monitoring configuration with defaults."""
463
+ base_config = {
464
+ 'enabled': self.get('enable_health_monitoring', True),
465
+ 'check_interval': self.get('health_check_interval', 30),
466
+ 'history_size': self.get('health_history_size', 100),
467
+ 'aggregation_window': self.get('health_aggregation_window', 300),
468
+ 'thresholds': self.get('health_thresholds', {
469
+ 'cpu_percent': 80.0,
470
+ 'memory_mb': 500,
471
+ 'file_descriptors': 1000,
472
+ 'max_clients': 1000,
473
+ 'max_error_rate': 0.1,
474
+ 'network_timeout': 2.0
475
+ })
476
+ }
477
+
478
+ # Merge with socketio-specific config if available
479
+ socketio_config = self.get('socketio_server.health_monitoring', {})
480
+ if socketio_config:
481
+ base_config.update(socketio_config)
482
+
483
+ return base_config
484
+
485
+ def get_recovery_config(self) -> Dict[str, Any]:
486
+ """Get recovery configuration with defaults."""
487
+ base_config = self.get('recovery', {
488
+ 'enabled': True,
489
+ 'check_interval': 60,
490
+ 'max_recovery_attempts': 5,
491
+ 'recovery_timeout': 30,
492
+ 'circuit_breaker': {
493
+ 'failure_threshold': 5,
494
+ 'timeout_seconds': 300,
495
+ 'success_threshold': 3
496
+ },
497
+ 'strategy': {
498
+ 'warning_threshold': 2,
499
+ 'critical_threshold': 1,
500
+ 'failure_window_seconds': 300,
501
+ 'min_recovery_interval': 60
502
+ }
503
+ })
504
+
505
+ # Merge with socketio-specific config if available
506
+ socketio_config = self.get('socketio_server.recovery', {})
507
+ if socketio_config:
508
+ base_config = self._config_mgr.merge_configs(base_config, socketio_config)
509
+
510
+ return base_config
511
+
352
512
  def __repr__(self) -> str:
353
513
  """String representation of configuration."""
354
514
  return f"<Config({len(self._config)} keys)>"