claude-mpm 3.9.9__py3-none-any.whl → 3.9.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/templates/memory_manager.json +155 -0
  3. claude_mpm/cli/__init__.py +15 -2
  4. claude_mpm/cli/commands/__init__.py +3 -0
  5. claude_mpm/cli/commands/mcp.py +280 -134
  6. claude_mpm/cli/commands/run_guarded.py +511 -0
  7. claude_mpm/cli/parser.py +8 -2
  8. claude_mpm/config/experimental_features.py +219 -0
  9. claude_mpm/config/memory_guardian_yaml.py +335 -0
  10. claude_mpm/constants.py +1 -0
  11. claude_mpm/core/memory_aware_runner.py +353 -0
  12. claude_mpm/services/infrastructure/context_preservation.py +537 -0
  13. claude_mpm/services/infrastructure/graceful_degradation.py +616 -0
  14. claude_mpm/services/infrastructure/health_monitor.py +775 -0
  15. claude_mpm/services/infrastructure/memory_dashboard.py +479 -0
  16. claude_mpm/services/infrastructure/memory_guardian.py +189 -15
  17. claude_mpm/services/infrastructure/restart_protection.py +642 -0
  18. claude_mpm/services/infrastructure/state_manager.py +774 -0
  19. claude_mpm/services/mcp_gateway/__init__.py +11 -11
  20. claude_mpm/services/mcp_gateway/core/__init__.py +2 -2
  21. claude_mpm/services/mcp_gateway/core/interfaces.py +10 -9
  22. claude_mpm/services/mcp_gateway/main.py +35 -5
  23. claude_mpm/services/mcp_gateway/manager.py +334 -0
  24. claude_mpm/services/mcp_gateway/registry/service_registry.py +4 -8
  25. claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
  26. claude_mpm/services/mcp_gateway/server/{mcp_server.py → mcp_gateway.py} +60 -59
  27. claude_mpm/services/mcp_gateway/tools/base_adapter.py +1 -2
  28. claude_mpm/services/ticket_manager.py +8 -8
  29. claude_mpm/services/ticket_manager_di.py +5 -5
  30. claude_mpm/storage/__init__.py +9 -0
  31. claude_mpm/storage/state_storage.py +556 -0
  32. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/METADATA +25 -2
  33. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/RECORD +37 -24
  34. claude_mpm/services/mcp_gateway/server/mcp_server_simple.py +0 -444
  35. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/WHEEL +0 -0
  36. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/entry_points.txt +0 -0
  37. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/licenses/LICENSE +0 -0
  38. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,353 @@
1
+ """Memory-aware Claude runner with automatic restart capabilities.
2
+
3
+ This runner extends ClaudeRunner to add memory monitoring and automatic
4
+ restart capabilities when memory thresholds are exceeded.
5
+
6
+ Design Principles:
7
+ - Seamless integration with existing ClaudeRunner
8
+ - State preservation across restarts
9
+ - Configurable memory thresholds and policies
10
+ - Minimal overhead when monitoring is disabled
11
+ """
12
+
13
+ import asyncio
14
+ import json
15
+ import os
16
+ import sys
17
+ import time
18
+ from pathlib import Path
19
+ from typing import Optional, Dict, Any, List
20
+ from datetime import datetime
21
+
22
+ from claude_mpm.core.claude_runner import ClaudeRunner
23
+ from claude_mpm.core.logging_config import get_logger
24
+ from claude_mpm.services.infrastructure.memory_guardian import MemoryGuardian
25
+ from claude_mpm.services.infrastructure.state_manager import StateManager
26
+ from claude_mpm.config.memory_guardian_config import (
27
+ MemoryGuardianConfig,
28
+ MemoryThresholds,
29
+ RestartPolicy,
30
+ MonitoringConfig,
31
+ get_default_config
32
+ )
33
+
34
+
35
+ class MemoryAwareClaudeRunner(ClaudeRunner):
36
+ """Claude runner with memory monitoring and automatic restart capabilities.
37
+
38
+ This class extends ClaudeRunner to add memory monitoring through the
39
+ MemoryGuardian service. When memory thresholds are exceeded, it can
40
+ automatically restart Claude Code while preserving conversation state.
41
+
42
+ WHY: Large conversation histories in .claude.json can consume 2GB+ of memory,
43
+ causing system instability. This runner monitors memory usage and performs
44
+ controlled restarts with state preservation.
45
+
46
+ DESIGN DECISION: We extend ClaudeRunner rather than wrap it to maintain
47
+ full compatibility with existing code while adding new capabilities.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ enable_tickets: bool = True,
53
+ log_level: str = "OFF",
54
+ claude_args: Optional[list] = None,
55
+ launch_method: str = "subprocess", # Default to subprocess for monitoring
56
+ enable_websocket: bool = False,
57
+ websocket_port: int = 8765,
58
+ memory_config: Optional[MemoryGuardianConfig] = None,
59
+ enable_monitoring: bool = True,
60
+ state_dir: Optional[Path] = None
61
+ ):
62
+ """Initialize memory-aware Claude runner.
63
+
64
+ Args:
65
+ enable_tickets: Enable ticket extraction
66
+ log_level: Logging level
67
+ claude_args: Additional arguments for Claude
68
+ launch_method: Launch method (subprocess required for monitoring)
69
+ enable_websocket: Enable WebSocket server
70
+ websocket_port: WebSocket server port
71
+ memory_config: Memory guardian configuration
72
+ enable_monitoring: Enable memory monitoring
73
+ state_dir: Directory for state preservation
74
+ """
75
+ # Force subprocess mode if monitoring is enabled
76
+ if enable_monitoring and launch_method == "exec":
77
+ launch_method = "subprocess"
78
+ get_logger(__name__).info(
79
+ "Switching to subprocess launch method for memory monitoring"
80
+ )
81
+
82
+ # Initialize parent
83
+ super().__init__(
84
+ enable_tickets=enable_tickets,
85
+ log_level=log_level,
86
+ claude_args=claude_args,
87
+ launch_method=launch_method,
88
+ enable_websocket=enable_websocket,
89
+ websocket_port=websocket_port
90
+ )
91
+
92
+ # Memory monitoring configuration
93
+ self.enable_monitoring = enable_monitoring
94
+ self.memory_config = memory_config or get_default_config()
95
+ self.memory_config.enabled = enable_monitoring
96
+
97
+ # State management
98
+ self.state_dir = state_dir or Path.home() / ".claude-mpm" / "state"
99
+ self.state_manager: Optional[StateManager] = None
100
+ self.memory_guardian: Optional[MemoryGuardian] = None
101
+
102
+ # Monitoring state
103
+ self.monitoring_task: Optional[asyncio.Task] = None
104
+ self.restart_count = 0
105
+ self.max_restarts = 3 # Default max restarts
106
+
107
+ self.logger.info(f"Memory-aware runner initialized with monitoring: {enable_monitoring}")
108
+
109
+ def run_interactive_with_monitoring(
110
+ self,
111
+ initial_context: Optional[str] = None,
112
+ memory_threshold: Optional[float] = None,
113
+ check_interval: Optional[int] = None,
114
+ max_restarts: Optional[int] = None,
115
+ enable_state_preservation: bool = True
116
+ ):
117
+ """Run Claude in interactive mode with memory monitoring.
118
+
119
+ This method wraps the standard interactive session with memory monitoring
120
+ and automatic restart capabilities.
121
+
122
+ Args:
123
+ initial_context: Optional initial context
124
+ memory_threshold: Override memory threshold in MB
125
+ check_interval: Override check interval in seconds
126
+ max_restarts: Maximum number of automatic restarts
127
+ enable_state_preservation: Enable state preservation across restarts
128
+ """
129
+ if not self.enable_monitoring:
130
+ # Fall back to standard interactive mode
131
+ self.logger.info("Memory monitoring disabled, using standard interactive mode")
132
+ return self.run_interactive(initial_context)
133
+
134
+ # Update configuration with overrides
135
+ if memory_threshold:
136
+ self.memory_config.thresholds.critical = memory_threshold
137
+ self.memory_config.thresholds.emergency = memory_threshold * 1.2
138
+
139
+ if check_interval:
140
+ self.memory_config.monitoring.normal_interval = check_interval
141
+
142
+ if max_restarts is not None:
143
+ self.max_restarts = max_restarts
144
+ self.memory_config.restart_policy.max_attempts = max_restarts
145
+
146
+ # Run async monitoring loop
147
+ try:
148
+ asyncio.run(self._run_with_monitoring_async(
149
+ initial_context,
150
+ enable_state_preservation
151
+ ))
152
+ except KeyboardInterrupt:
153
+ self.logger.info("Interactive session interrupted by user")
154
+ except Exception as e:
155
+ self.logger.error(f"Error in monitored session: {e}")
156
+ raise
157
+
158
+ async def _run_with_monitoring_async(
159
+ self,
160
+ initial_context: Optional[str],
161
+ enable_state_preservation: bool
162
+ ):
163
+ """Async implementation of monitored interactive session.
164
+
165
+ This method sets up the memory monitoring infrastructure and manages
166
+ the Claude subprocess lifecycle with automatic restarts.
167
+ """
168
+ try:
169
+ # Initialize services
170
+ await self._initialize_monitoring_services(enable_state_preservation)
171
+
172
+ # Display monitoring information
173
+ self._display_monitoring_info()
174
+
175
+ # Start monitoring loop
176
+ while self.restart_count <= self.max_restarts:
177
+ try:
178
+ # Run Claude subprocess
179
+ await self._run_monitored_subprocess(initial_context)
180
+
181
+ # If we exit normally, break the loop
182
+ break
183
+
184
+ except MemoryThresholdExceeded as e:
185
+ self.logger.warning(f"Memory threshold exceeded: {e}")
186
+
187
+ if self.restart_count >= self.max_restarts:
188
+ self.logger.error("Maximum restart attempts reached")
189
+ break
190
+
191
+ # Perform controlled restart
192
+ await self._perform_controlled_restart()
193
+ self.restart_count += 1
194
+
195
+ # Clear initial context after first run
196
+ initial_context = None
197
+
198
+ except Exception as e:
199
+ self.logger.error(f"Unexpected error in monitored session: {e}")
200
+ break
201
+
202
+ finally:
203
+ # Cleanup
204
+ await self._cleanup_monitoring_services()
205
+
206
+ async def _initialize_monitoring_services(self, enable_state_preservation: bool):
207
+ """Initialize memory guardian and state manager services."""
208
+ self.logger.info("Initializing monitoring services")
209
+
210
+ # Initialize state manager if enabled
211
+ if enable_state_preservation:
212
+ self.state_manager = StateManager(self.state_dir)
213
+ await self.state_manager.initialize()
214
+ self.logger.info("State preservation enabled")
215
+
216
+ # Initialize memory guardian
217
+ self.memory_guardian = MemoryGuardian(self.memory_config)
218
+ if self.state_manager:
219
+ self.memory_guardian.set_state_manager(self.state_manager)
220
+ await self.memory_guardian.initialize()
221
+
222
+ self.logger.info("Monitoring services initialized")
223
+
224
+ async def _cleanup_monitoring_services(self):
225
+ """Cleanup monitoring services."""
226
+ self.logger.info("Cleaning up monitoring services")
227
+
228
+ if self.memory_guardian:
229
+ await self.memory_guardian.shutdown()
230
+
231
+ if self.state_manager:
232
+ await self.state_manager.shutdown()
233
+
234
+ async def _run_monitored_subprocess(self, initial_context: Optional[str]):
235
+ """Run Claude subprocess with memory monitoring.
236
+
237
+ This method launches Claude as a subprocess and monitors its memory usage,
238
+ triggering restarts when thresholds are exceeded.
239
+ """
240
+ # Set up the process command
241
+ cmd = self._build_claude_command(initial_context)
242
+
243
+ # Configure memory guardian with our command
244
+ self.memory_config.process_command = cmd
245
+ self.memory_config.working_directory = os.getcwd()
246
+
247
+ # Start the process through memory guardian
248
+ success = await self.memory_guardian.start_process()
249
+ if not success:
250
+ raise RuntimeError("Failed to start Claude process")
251
+
252
+ # Start memory monitoring
253
+ self.memory_guardian.start_monitoring()
254
+
255
+ # Wait for process to complete or be restarted
256
+ while self.memory_guardian.process_state.value == "running":
257
+ await asyncio.sleep(1)
258
+
259
+ # Check for memory threshold exceeded
260
+ if self.memory_guardian.memory_state.value in ["critical", "emergency"]:
261
+ raise MemoryThresholdExceeded(
262
+ f"Memory state: {self.memory_guardian.memory_state.value}, "
263
+ f"Current: {self.memory_guardian.memory_stats.current_mb:.2f}MB"
264
+ )
265
+
266
+ async def _perform_controlled_restart(self):
267
+ """Perform a controlled restart with state preservation."""
268
+ self.logger.info(f"Performing controlled restart (attempt {self.restart_count + 1}/{self.max_restarts})")
269
+
270
+ # Capture state before restart
271
+ if self.state_manager:
272
+ state = await self.state_manager.capture_state(
273
+ restart_reason=f"Memory threshold exceeded (restart {self.restart_count + 1})"
274
+ )
275
+ if state:
276
+ await self.state_manager.persist_state(state)
277
+ self.logger.info("State captured and persisted")
278
+
279
+ # Restart through memory guardian
280
+ success = await self.memory_guardian.restart_process(
281
+ reason=f"Memory threshold exceeded (automatic restart {self.restart_count + 1})"
282
+ )
283
+
284
+ if not success:
285
+ raise RuntimeError("Failed to restart Claude process")
286
+
287
+ # Restore state after restart
288
+ if self.state_manager:
289
+ restored = await self.state_manager.restore_state()
290
+ if restored:
291
+ self.logger.info("State restored successfully")
292
+
293
+ def _build_claude_command(self, initial_context: Optional[str]) -> List[str]:
294
+ """Build the Claude command line.
295
+
296
+ Returns:
297
+ List of command arguments
298
+ """
299
+ # Find Claude CLI executable
300
+ claude_cli = "claude" # Assume it's in PATH
301
+
302
+ # Build command
303
+ cmd = [claude_cli]
304
+
305
+ # Add context if provided
306
+ if initial_context:
307
+ # Save context to temp file
308
+ import tempfile
309
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
310
+ f.write(initial_context)
311
+ context_file = f.name
312
+
313
+ cmd.extend(['--context-file', context_file])
314
+
315
+ # Add any additional Claude arguments
316
+ if self.claude_args:
317
+ cmd.extend(self.claude_args)
318
+
319
+ return cmd
320
+
321
+ def _display_monitoring_info(self):
322
+ """Display memory monitoring configuration to user."""
323
+ print("\n" + "="*60)
324
+ print("🛡️ Memory Guardian Active")
325
+ print("="*60)
326
+ print(f"Memory Thresholds:")
327
+ print(f" • Warning: {self.memory_config.thresholds.warning:,.0f} MB")
328
+ print(f" • Critical: {self.memory_config.thresholds.critical:,.0f} MB")
329
+ print(f" • Emergency: {self.memory_config.thresholds.emergency:,.0f} MB")
330
+ print(f"Monitoring:")
331
+ print(f" • Check Interval: {self.memory_config.monitoring.normal_interval} seconds")
332
+ print(f" • Max Restarts: {self.max_restarts}")
333
+ print(f" • State Preservation: {'Enabled' if self.state_manager else 'Disabled'}")
334
+ print("="*60 + "\n")
335
+
336
+ def get_monitoring_status(self) -> Dict[str, Any]:
337
+ """Get current monitoring status.
338
+
339
+ Returns:
340
+ Dictionary containing monitoring status
341
+ """
342
+ if not self.memory_guardian:
343
+ return {
344
+ 'enabled': False,
345
+ 'message': 'Memory monitoring not initialized'
346
+ }
347
+
348
+ return self.memory_guardian.get_status()
349
+
350
+
351
+ class MemoryThresholdExceeded(Exception):
352
+ """Exception raised when memory threshold is exceeded."""
353
+ pass