claude-mpm 3.9.9__py3-none-any.whl → 3.9.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/templates/memory_manager.json +155 -0
  3. claude_mpm/cli/__init__.py +15 -2
  4. claude_mpm/cli/commands/__init__.py +3 -0
  5. claude_mpm/cli/commands/mcp.py +280 -134
  6. claude_mpm/cli/commands/run_guarded.py +511 -0
  7. claude_mpm/cli/parser.py +8 -2
  8. claude_mpm/config/experimental_features.py +219 -0
  9. claude_mpm/config/memory_guardian_yaml.py +335 -0
  10. claude_mpm/constants.py +1 -0
  11. claude_mpm/core/memory_aware_runner.py +353 -0
  12. claude_mpm/services/infrastructure/context_preservation.py +537 -0
  13. claude_mpm/services/infrastructure/graceful_degradation.py +616 -0
  14. claude_mpm/services/infrastructure/health_monitor.py +775 -0
  15. claude_mpm/services/infrastructure/memory_dashboard.py +479 -0
  16. claude_mpm/services/infrastructure/memory_guardian.py +189 -15
  17. claude_mpm/services/infrastructure/restart_protection.py +642 -0
  18. claude_mpm/services/infrastructure/state_manager.py +774 -0
  19. claude_mpm/services/mcp_gateway/__init__.py +11 -11
  20. claude_mpm/services/mcp_gateway/core/__init__.py +2 -2
  21. claude_mpm/services/mcp_gateway/core/interfaces.py +10 -9
  22. claude_mpm/services/mcp_gateway/main.py +35 -5
  23. claude_mpm/services/mcp_gateway/manager.py +334 -0
  24. claude_mpm/services/mcp_gateway/registry/service_registry.py +4 -8
  25. claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
  26. claude_mpm/services/mcp_gateway/server/{mcp_server.py → mcp_gateway.py} +60 -59
  27. claude_mpm/services/mcp_gateway/tools/base_adapter.py +1 -2
  28. claude_mpm/services/ticket_manager.py +8 -8
  29. claude_mpm/services/ticket_manager_di.py +5 -5
  30. claude_mpm/storage/__init__.py +9 -0
  31. claude_mpm/storage/state_storage.py +556 -0
  32. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/METADATA +25 -2
  33. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/RECORD +37 -24
  34. claude_mpm/services/mcp_gateway/server/mcp_server_simple.py +0 -444
  35. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/WHEEL +0 -0
  36. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/entry_points.txt +0 -0
  37. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/licenses/LICENSE +0 -0
  38. {claude_mpm-3.9.9.dist-info → claude_mpm-3.9.11.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,10 @@ from claude_mpm.utils.platform_memory import (
37
37
  check_memory_pressure,
38
38
  MemoryInfo
39
39
  )
40
+ from claude_mpm.services.infrastructure.state_manager import StateManager
41
+ from claude_mpm.services.infrastructure.restart_protection import RestartProtection
42
+ from claude_mpm.services.infrastructure.health_monitor import HealthMonitor
43
+ from claude_mpm.services.infrastructure.graceful_degradation import GracefulDegradation
40
44
 
41
45
 
42
46
  class MemoryState(Enum):
@@ -156,6 +160,14 @@ class MemoryGuardian(BaseService):
156
160
  self.state_save_hooks: List[Callable[[Dict[str, Any]], None]] = []
157
161
  self.state_restore_hooks: List[Callable[[Dict[str, Any]], None]] = []
158
162
 
163
+ # State manager integration
164
+ self.state_manager: Optional[StateManager] = None
165
+
166
+ # Safety services integration
167
+ self.restart_protection: Optional[RestartProtection] = None
168
+ self.health_monitor: Optional[HealthMonitor] = None
169
+ self.graceful_degradation: Optional[GracefulDegradation] = None
170
+
159
171
  # Statistics
160
172
  self.start_time = time.time()
161
173
  self.total_restarts = 0
@@ -175,6 +187,13 @@ class MemoryGuardian(BaseService):
175
187
  try:
176
188
  self.log_info("Initializing Memory Guardian service")
177
189
 
190
+ # Initialize state manager
191
+ self.state_manager = StateManager()
192
+ await self.state_manager.initialize()
193
+
194
+ # Initialize safety services
195
+ await self._initialize_safety_services()
196
+
178
197
  # Load persisted state if available
179
198
  if self.config.persist_state and self.config.state_file:
180
199
  self._load_state()
@@ -210,6 +229,18 @@ class MemoryGuardian(BaseService):
210
229
  if self.config.persist_state and self.config.state_file:
211
230
  self._save_state()
212
231
 
232
+ # Shutdown state manager
233
+ if self.state_manager:
234
+ await self.state_manager.shutdown()
235
+
236
+ # Shutdown safety services
237
+ if self.restart_protection:
238
+ await self.restart_protection.shutdown()
239
+ if self.health_monitor:
240
+ await self.health_monitor.shutdown()
241
+ if self.graceful_degradation:
242
+ await self.graceful_degradation.shutdown()
243
+
213
244
  # Terminate process if running
214
245
  if self.process and self.process_state == ProcessState.RUNNING:
215
246
  await self.terminate_process()
@@ -254,6 +285,10 @@ class MemoryGuardian(BaseService):
254
285
  self.process_pid = self.process.pid
255
286
  self.process_state = ProcessState.RUNNING
256
287
 
288
+ # Update health monitor with new process
289
+ if self.health_monitor:
290
+ self.health_monitor.set_monitored_process(self.process_pid)
291
+
257
292
  # Reset failure counter on successful start
258
293
  self.consecutive_failures = 0
259
294
 
@@ -290,24 +325,56 @@ class MemoryGuardian(BaseService):
290
325
  """
291
326
  self.log_info(f"Initiating process restart: {reason}")
292
327
 
293
- # Check restart attempts
294
- if not self._can_restart():
295
- self.log_error("Maximum restart attempts exceeded")
296
- self.process_state = ProcessState.FAILED
297
- return False
298
-
299
- # Apply cooldown if needed
300
- cooldown = self._get_restart_cooldown()
301
- if cooldown > 0:
302
- self.log_info(f"Applying restart cooldown of {cooldown} seconds")
303
- await asyncio.sleep(cooldown)
328
+ # Use restart protection if available
329
+ if self.restart_protection:
330
+ allowed, protection_reason = self.restart_protection.should_allow_restart(
331
+ self.memory_stats.current_mb
332
+ )
333
+ if not allowed:
334
+ self.log_error(f"Restart blocked by protection: {protection_reason}")
335
+
336
+ # Trigger graceful degradation if available
337
+ if self.graceful_degradation:
338
+ await self.graceful_degradation.disable_feature(
339
+ "automated_monitoring",
340
+ f"Restart protection triggered: {protection_reason}"
341
+ )
342
+
343
+ self.process_state = ProcessState.FAILED
344
+ return False
345
+
346
+ # Get backoff from restart protection
347
+ backoff = self.restart_protection.get_backoff_seconds(
348
+ self.restart_protection.statistics.consecutive_failures + 1
349
+ )
350
+ if backoff > 0:
351
+ self.log_info(f"Applying restart backoff of {backoff:.1f} seconds")
352
+ await asyncio.sleep(backoff)
353
+ else:
354
+ # Fallback to original logic
355
+ if not self._can_restart():
356
+ self.log_error("Maximum restart attempts exceeded")
357
+ self.process_state = ProcessState.FAILED
358
+ return False
359
+
360
+ # Apply cooldown if needed
361
+ cooldown = self._get_restart_cooldown()
362
+ if cooldown > 0:
363
+ self.log_info(f"Applying restart cooldown of {cooldown} seconds")
364
+ await asyncio.sleep(cooldown)
304
365
 
305
366
  # Record restart attempt
306
367
  memory_mb = self.memory_stats.current_mb
307
368
  self.process_state = ProcessState.RESTARTING
308
369
 
309
- # Save state before restart
310
- await self._trigger_state_save()
370
+ # Save state before restart using StateManager
371
+ if self.state_manager:
372
+ state = await self.state_manager.capture_state(restart_reason=reason)
373
+ if state:
374
+ await self.state_manager.persist_state(state)
375
+ else:
376
+ # Fallback to hook-based preservation
377
+ await self._trigger_state_save()
311
378
 
312
379
  # Terminate existing process
313
380
  if self.process:
@@ -325,13 +392,29 @@ class MemoryGuardian(BaseService):
325
392
  )
326
393
  self.restart_attempts.append(attempt)
327
394
 
395
+ # Record in restart protection service
396
+ if self.restart_protection:
397
+ backoff_applied = self.restart_protection.get_backoff_seconds(
398
+ self.restart_protection.statistics.consecutive_failures + 1
399
+ ) if not success else 0
400
+ self.restart_protection.record_restart(
401
+ reason=reason,
402
+ memory_mb=memory_mb,
403
+ success=success,
404
+ backoff_applied=backoff_applied
405
+ )
406
+
328
407
  if success:
329
408
  self.total_restarts += 1
330
409
  self.last_restart_time = time.time()
331
410
  self.log_info("Process restarted successfully")
332
411
 
333
- # Restore state after restart
334
- await self._trigger_state_restore()
412
+ # Restore state after restart using StateManager
413
+ if self.state_manager:
414
+ await self.state_manager.restore_state()
415
+ else:
416
+ # Fallback to hook-based restoration
417
+ await self._trigger_state_restore()
335
418
  else:
336
419
  self.consecutive_failures += 1
337
420
  self.log_error("Process restart failed")
@@ -457,6 +540,10 @@ class MemoryGuardian(BaseService):
457
540
  # Update statistics
458
541
  self.memory_stats.update(memory_mb)
459
542
 
543
+ # Record memory sample for trend analysis
544
+ if self.restart_protection:
545
+ self.restart_protection.record_memory_sample(memory_mb)
546
+
460
547
  # Determine memory state
461
548
  old_state = self.memory_state
462
549
 
@@ -702,6 +789,15 @@ class MemoryGuardian(BaseService):
702
789
  self.state_restore_hooks.append(hook)
703
790
  self.log_debug(f"Added state restore hook: {hook.__name__}")
704
791
 
792
+ def set_state_manager(self, state_manager: StateManager) -> None:
793
+ """Set the state manager for state preservation.
794
+
795
+ Args:
796
+ state_manager: StateManager instance to use
797
+ """
798
+ self.state_manager = state_manager
799
+ self.log_info("State manager configured for Memory Guardian")
800
+
705
801
  def get_state(self) -> Dict[str, Any]:
706
802
  """Get current service state.
707
803
 
@@ -721,6 +817,84 @@ class MemoryGuardian(BaseService):
721
817
  'monitoring': self.monitoring
722
818
  }
723
819
 
820
+ async def _initialize_safety_services(self) -> None:
821
+ """Initialize safety and protection services."""
822
+ try:
823
+ # Initialize restart protection
824
+ self.restart_protection = RestartProtection(
825
+ max_restarts_per_hour=5,
826
+ max_consecutive_failures=3,
827
+ base_backoff_seconds=1,
828
+ max_backoff_seconds=60,
829
+ state_file=Path(self.config.state_file).parent / "restart_protection.json" if self.config.state_file else None
830
+ )
831
+ await self.restart_protection.initialize()
832
+ self.log_info("Restart protection service initialized")
833
+
834
+ # Initialize health monitor
835
+ self.health_monitor = HealthMonitor(
836
+ cpu_threshold_percent=80,
837
+ memory_threshold_percent=90,
838
+ disk_threshold_percent=90,
839
+ min_disk_space_gb=1.0,
840
+ check_interval_seconds=30
841
+ )
842
+ await self.health_monitor.initialize()
843
+ self.log_info("Health monitor service initialized")
844
+
845
+ # Initialize graceful degradation
846
+ self.graceful_degradation = GracefulDegradation(
847
+ enable_notifications=True,
848
+ log_degradation_events=True,
849
+ state_file=Path(self.config.state_file).parent / "degradation.json" if self.config.state_file else None
850
+ )
851
+ await self.graceful_degradation.initialize()
852
+ self.log_info("Graceful degradation service initialized")
853
+
854
+ # Check initial health
855
+ valid, message = await self.health_monitor.validate_before_start()
856
+ if not valid:
857
+ self.log_warning(f"System health check warning: {message}")
858
+ await self.graceful_degradation.degrade_feature(
859
+ "automated_monitoring",
860
+ message,
861
+ "reduced monitoring frequency"
862
+ )
863
+
864
+ except Exception as e:
865
+ self.log_error(f"Failed to initialize safety services: {e}")
866
+ # Continue without safety services - graceful degradation
867
+ self.log_warning("Continuing without safety services")
868
+
869
+ def set_restart_protection(self, restart_protection: RestartProtection) -> None:
870
+ """Set the restart protection service.
871
+
872
+ Args:
873
+ restart_protection: RestartProtection instance to use
874
+ """
875
+ self.restart_protection = restart_protection
876
+ self.log_info("Restart protection configured for Memory Guardian")
877
+
878
+ def set_health_monitor(self, health_monitor: HealthMonitor) -> None:
879
+ """Set the health monitor service.
880
+
881
+ Args:
882
+ health_monitor: HealthMonitor instance to use
883
+ """
884
+ self.health_monitor = health_monitor
885
+ if self.process_pid:
886
+ self.health_monitor.set_monitored_process(self.process_pid)
887
+ self.log_info("Health monitor configured for Memory Guardian")
888
+
889
+ def set_graceful_degradation(self, graceful_degradation: GracefulDegradation) -> None:
890
+ """Set the graceful degradation service.
891
+
892
+ Args:
893
+ graceful_degradation: GracefulDegradation instance to use
894
+ """
895
+ self.graceful_degradation = graceful_degradation
896
+ self.log_info("Graceful degradation configured for Memory Guardian")
897
+
724
898
  def get_status(self) -> Dict[str, Any]:
725
899
  """Get current service status.
726
900