claude-mpm 4.13.1__py3-none-any.whl → 4.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of claude-mpm might be problematic. Click here for more details.

Files changed (50) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/PM_INSTRUCTIONS.md +68 -0
  3. claude_mpm/cli/__init__.py +10 -0
  4. claude_mpm/cli/commands/local_deploy.py +536 -0
  5. claude_mpm/cli/parsers/base_parser.py +7 -0
  6. claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
  7. claude_mpm/commands/mpm-agents-detect.md +168 -0
  8. claude_mpm/commands/mpm-agents-recommend.md +214 -0
  9. claude_mpm/commands/mpm-agents.md +75 -1
  10. claude_mpm/commands/mpm-auto-configure.md +217 -0
  11. claude_mpm/commands/mpm-help.md +160 -0
  12. claude_mpm/config/model_config.py +428 -0
  13. claude_mpm/core/interactive_session.py +3 -0
  14. claude_mpm/services/core/interfaces/__init__.py +74 -2
  15. claude_mpm/services/core/interfaces/health.py +172 -0
  16. claude_mpm/services/core/interfaces/model.py +281 -0
  17. claude_mpm/services/core/interfaces/process.py +372 -0
  18. claude_mpm/services/core/interfaces/restart.py +307 -0
  19. claude_mpm/services/core/interfaces/stability.py +260 -0
  20. claude_mpm/services/core/models/__init__.py +35 -0
  21. claude_mpm/services/core/models/health.py +189 -0
  22. claude_mpm/services/core/models/process.py +258 -0
  23. claude_mpm/services/core/models/restart.py +302 -0
  24. claude_mpm/services/core/models/stability.py +264 -0
  25. claude_mpm/services/local_ops/__init__.py +163 -0
  26. claude_mpm/services/local_ops/crash_detector.py +257 -0
  27. claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
  28. claude_mpm/services/local_ops/health_checks/http_check.py +223 -0
  29. claude_mpm/services/local_ops/health_checks/process_check.py +235 -0
  30. claude_mpm/services/local_ops/health_checks/resource_check.py +254 -0
  31. claude_mpm/services/local_ops/health_manager.py +430 -0
  32. claude_mpm/services/local_ops/log_monitor.py +396 -0
  33. claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
  34. claude_mpm/services/local_ops/process_manager.py +595 -0
  35. claude_mpm/services/local_ops/resource_monitor.py +331 -0
  36. claude_mpm/services/local_ops/restart_manager.py +401 -0
  37. claude_mpm/services/local_ops/restart_policy.py +387 -0
  38. claude_mpm/services/local_ops/state_manager.py +371 -0
  39. claude_mpm/services/local_ops/unified_manager.py +600 -0
  40. claude_mpm/services/model/__init__.py +147 -0
  41. claude_mpm/services/model/base_provider.py +365 -0
  42. claude_mpm/services/model/claude_provider.py +412 -0
  43. claude_mpm/services/model/model_router.py +453 -0
  44. claude_mpm/services/model/ollama_provider.py +415 -0
  45. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/METADATA +1 -1
  46. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/RECORD +50 -15
  47. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/WHEEL +0 -0
  48. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/entry_points.txt +0 -0
  49. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/licenses/LICENSE +0 -0
  50. {claude_mpm-4.13.1.dist-info → claude_mpm-4.14.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,371 @@
1
+ """
2
+ Deployment State Manager for Claude MPM Framework
3
+ =================================================
4
+
5
+ WHY: Provides persistent state tracking for local deployments with atomic
6
+ operations, file locking, and corruption recovery. Critical for preventing
7
+ orphaned processes and ensuring deployment reliability.
8
+
9
+ DESIGN DECISION: Uses JSON file storage with filelock for simplicity and
10
+ portability. File-based storage is sufficient for local deployments and
11
+ doesn't require external dependencies.
12
+
13
+ ARCHITECTURE:
14
+ - Thread-safe operations with file locking
15
+ - Atomic read-modify-write cycles
16
+ - Automatic corruption detection and recovery
17
+ - Process validation using psutil
18
+
19
+ USAGE:
20
+ manager = DeploymentStateManager(state_file_path)
21
+ manager.add_deployment(deployment_state)
22
+ deployments = manager.get_all_deployments()
23
+ manager.cleanup_dead_pids()
24
+ """
25
+
26
+ import json
27
+ from pathlib import Path
28
+ from typing import Dict, List, Optional
29
+
30
+ import psutil
31
+ from filelock import FileLock
32
+
33
+ from claude_mpm.services.core.base import SyncBaseService
34
+ from claude_mpm.services.core.interfaces.process import IDeploymentStateManager
35
+ from claude_mpm.services.core.models.process import DeploymentState, ProcessStatus
36
+
37
+
38
+ class StateCorruptionError(Exception):
39
+ """Raised when state file is corrupted and cannot be recovered."""
40
+
41
+
42
+ class DeploymentStateManager(SyncBaseService, IDeploymentStateManager):
43
+ """
44
+ Manages persistent deployment state with atomic operations.
45
+
46
+ WHY: Deployment state must survive restarts and be accessible to
47
+ multiple processes. This manager ensures consistency with file locking
48
+ and provides corruption recovery.
49
+
50
+ Thread Safety: All public methods use file locking for atomicity.
51
+ """
52
+
53
+ def __init__(self, state_file_path: str):
54
+ """
55
+ Initialize state manager.
56
+
57
+ Args:
58
+ state_file_path: Path to JSON state file
59
+
60
+ Raises:
61
+ ValueError: If state_file_path is invalid
62
+ """
63
+ super().__init__("DeploymentStateManager")
64
+
65
+ self.state_file = Path(state_file_path)
66
+ self.lock_file = Path(str(state_file_path) + ".lock")
67
+
68
+ # Create single FileLock instance for re-entrant locking
69
+ # WHY: Using the same lock instance allows re-entrant calls
70
+ # (e.g., add_deployment -> load_state) without deadlock
71
+ self._file_lock = FileLock(str(self.lock_file), timeout=10)
72
+
73
+ # Ensure parent directory exists
74
+ self.state_file.parent.mkdir(parents=True, exist_ok=True)
75
+
76
+ # Initialize empty state if file doesn't exist
77
+ if not self.state_file.exists():
78
+ self._write_state({})
79
+
80
+ self.log_info(f"Initialized state manager with file: {self.state_file}")
81
+
82
+ def initialize(self) -> bool:
83
+ """
84
+ Initialize the state manager.
85
+
86
+ Returns:
87
+ True if initialization successful
88
+ """
89
+ try:
90
+ # Validate state file can be read
91
+ self.load_state()
92
+ self._initialized = True
93
+ return True
94
+ except Exception as e:
95
+ self.log_error(f"Failed to initialize: {e}")
96
+ return False
97
+
98
+ def shutdown(self) -> None:
99
+ """Shutdown state manager (no resources to clean up)."""
100
+ self._shutdown = True
101
+ self.log_info("State manager shutdown complete")
102
+
103
+ def load_state(self) -> Dict[str, DeploymentState]:
104
+ """
105
+ Load all deployment states from file.
106
+
107
+ Returns:
108
+ Dictionary mapping deployment_id to DeploymentState
109
+
110
+ Raises:
111
+ StateCorruptionError: If state file is corrupted beyond recovery
112
+ """
113
+ with self._file_lock:
114
+ try:
115
+ if not self.state_file.exists():
116
+ return {}
117
+
118
+ with self.state_file.open() as f:
119
+ data = json.load(f)
120
+
121
+ # Convert dict entries to DeploymentState objects
122
+ states = {}
123
+ for deployment_id, state_dict in data.items():
124
+ try:
125
+ states[deployment_id] = DeploymentState.from_dict(state_dict)
126
+ except Exception as e:
127
+ self.log_warning(
128
+ f"Skipping corrupted state entry {deployment_id}: {e}"
129
+ )
130
+
131
+ return states
132
+
133
+ except json.JSONDecodeError as e:
134
+ self.log_error(f"State file corrupted: {e}")
135
+ # Attempt recovery by backing up and creating fresh state
136
+ backup_path = self.state_file.with_suffix(".json.corrupted")
137
+ self.state_file.rename(backup_path)
138
+ self.log_warning(f"Backed up corrupted state to {backup_path}")
139
+ self._write_state({})
140
+ return {}
141
+
142
+ except Exception as e:
143
+ raise StateCorruptionError(f"Failed to load state: {e}") from e
144
+
145
+ def save_state(self, states: Dict[str, DeploymentState]) -> None:
146
+ """
147
+ Save all deployment states to file.
148
+
149
+ Args:
150
+ states: Dictionary mapping deployment_id to DeploymentState
151
+
152
+ Raises:
153
+ IOError: If state file cannot be written
154
+ """
155
+ with self._file_lock:
156
+ self._write_state(states)
157
+
158
+ def _write_state(self, states: Dict[str, DeploymentState]) -> None:
159
+ """
160
+ Internal method to write state without locking.
161
+
162
+ WHY: Allows caller to handle locking for atomic operations.
163
+
164
+ Args:
165
+ states: States to write (can be dict or DeploymentState dict)
166
+ """
167
+ # Convert DeploymentState objects to dicts
168
+ data = {}
169
+ for deployment_id, state in states.items():
170
+ if isinstance(state, DeploymentState):
171
+ data[deployment_id] = state.to_dict()
172
+ else:
173
+ data[deployment_id] = state
174
+
175
+ # Atomic write: write to temp file then rename
176
+ temp_file = self.state_file.with_suffix(".tmp")
177
+ try:
178
+ with temp_file.open("w") as f:
179
+ json.dump(data, f, indent=2)
180
+ temp_file.replace(self.state_file)
181
+ except Exception as e:
182
+ if temp_file.exists():
183
+ temp_file.unlink()
184
+ raise OSError(f"Failed to write state: {e}") from e
185
+
186
+ def get_deployment(self, deployment_id: str) -> Optional[DeploymentState]:
187
+ """
188
+ Get a specific deployment by ID.
189
+
190
+ Args:
191
+ deployment_id: Unique deployment identifier
192
+
193
+ Returns:
194
+ DeploymentState if found, None otherwise
195
+ """
196
+ states = self.load_state()
197
+ return states.get(deployment_id)
198
+
199
+ def get_all_deployments(self) -> List[DeploymentState]:
200
+ """
201
+ Get all tracked deployments.
202
+
203
+ Returns:
204
+ List of all DeploymentState objects
205
+ """
206
+ states = self.load_state()
207
+ return list(states.values())
208
+
209
+ def get_deployments_by_status(self, status: ProcessStatus) -> List[DeploymentState]:
210
+ """
211
+ Get all deployments with a specific status.
212
+
213
+ Args:
214
+ status: ProcessStatus to filter by
215
+
216
+ Returns:
217
+ List of matching DeploymentState objects
218
+ """
219
+ states = self.load_state()
220
+ return [s for s in states.values() if s.status == status]
221
+
222
+ def get_deployment_by_port(self, port: int) -> Optional[DeploymentState]:
223
+ """
224
+ Get deployment using a specific port.
225
+
226
+ Args:
227
+ port: Port number to search for
228
+
229
+ Returns:
230
+ DeploymentState if found, None otherwise
231
+ """
232
+ states = self.load_state()
233
+ for state in states.values():
234
+ if state.port == port:
235
+ return state
236
+ return None
237
+
238
+ def get_deployments_by_project(
239
+ self, working_directory: str
240
+ ) -> List[DeploymentState]:
241
+ """
242
+ Get all deployments for a specific project directory.
243
+
244
+ Args:
245
+ working_directory: Project directory path
246
+
247
+ Returns:
248
+ List of matching DeploymentState objects
249
+ """
250
+ # Normalize path for comparison
251
+ normalized_dir = str(Path(working_directory).absolute())
252
+ states = self.load_state()
253
+ return [
254
+ s
255
+ for s in states.values()
256
+ if str(Path(s.working_directory).absolute()) == normalized_dir
257
+ ]
258
+
259
+ def add_deployment(self, deployment: DeploymentState) -> None:
260
+ """
261
+ Add or update a deployment in state.
262
+
263
+ Args:
264
+ deployment: DeploymentState to add/update
265
+
266
+ Raises:
267
+ IOError: If state cannot be persisted
268
+ """
269
+ with self._file_lock:
270
+ states = self.load_state()
271
+ states[deployment.deployment_id] = deployment
272
+ self._write_state(states)
273
+ self.log_debug(f"Added deployment: {deployment.deployment_id}")
274
+
275
+ def remove_deployment(self, deployment_id: str) -> bool:
276
+ """
277
+ Remove a deployment from state.
278
+
279
+ Args:
280
+ deployment_id: Unique deployment identifier
281
+
282
+ Returns:
283
+ True if deployment was removed, False if not found
284
+
285
+ Raises:
286
+ IOError: If state cannot be persisted
287
+ """
288
+ with self._file_lock:
289
+ states = self.load_state()
290
+ if deployment_id in states:
291
+ del states[deployment_id]
292
+ self._write_state(states)
293
+ self.log_debug(f"Removed deployment: {deployment_id}")
294
+ return True
295
+ return False
296
+
297
+ def update_deployment_status(
298
+ self, deployment_id: str, status: ProcessStatus
299
+ ) -> bool:
300
+ """
301
+ Update the status of a deployment.
302
+
303
+ Args:
304
+ deployment_id: Unique deployment identifier
305
+ status: New ProcessStatus
306
+
307
+ Returns:
308
+ True if updated, False if deployment not found
309
+
310
+ Raises:
311
+ IOError: If state cannot be persisted
312
+ """
313
+ with self._file_lock:
314
+ states = self.load_state()
315
+ if deployment_id in states:
316
+ states[deployment_id].status = status
317
+ self._write_state(states)
318
+ self.log_debug(f"Updated status for {deployment_id}: {status.value}")
319
+ return True
320
+ return False
321
+
322
+ def cleanup_dead_pids(self) -> int:
323
+ """
324
+ Remove deployments with dead process IDs.
325
+
326
+ WHY: Processes may crash or be killed externally. This method cleans
327
+ up stale state entries for processes that no longer exist.
328
+
329
+ Returns:
330
+ Number of dead PIDs cleaned up
331
+
332
+ Raises:
333
+ IOError: If state cannot be persisted
334
+ """
335
+ with self._file_lock:
336
+ states = self.load_state()
337
+ cleaned_count = 0
338
+
339
+ for deployment_id, state in list(states.items()):
340
+ if not self._is_pid_alive(state.process_id):
341
+ self.log_info(
342
+ f"Cleaning dead PID {state.process_id} for {deployment_id}"
343
+ )
344
+ del states[deployment_id]
345
+ cleaned_count += 1
346
+
347
+ if cleaned_count > 0:
348
+ self._write_state(states)
349
+ self.log_info(f"Cleaned up {cleaned_count} dead PIDs")
350
+
351
+ return cleaned_count
352
+
353
+ def _is_pid_alive(self, pid: int) -> bool:
354
+ """
355
+ Check if a process ID is alive.
356
+
357
+ Args:
358
+ pid: Process ID to check
359
+
360
+ Returns:
361
+ True if process exists and is running
362
+ """
363
+ try:
364
+ process = psutil.Process(pid)
365
+ # Check if process still exists and is not a zombie
366
+ return process.is_running() and process.status() != psutil.STATUS_ZOMBIE
367
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
368
+ return False
369
+
370
+
371
+ __all__ = ["DeploymentStateManager", "StateCorruptionError"]