claude-mpm 4.13.2__py3-none-any.whl → 4.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of claude-mpm might be problematic. Click here for more details.

Files changed (44) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/cli/__init__.py +10 -0
  3. claude_mpm/cli/commands/local_deploy.py +536 -0
  4. claude_mpm/cli/parsers/base_parser.py +7 -0
  5. claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
  6. claude_mpm/config/model_config.py +428 -0
  7. claude_mpm/core/interactive_session.py +3 -0
  8. claude_mpm/services/core/interfaces/__init__.py +74 -2
  9. claude_mpm/services/core/interfaces/health.py +172 -0
  10. claude_mpm/services/core/interfaces/model.py +281 -0
  11. claude_mpm/services/core/interfaces/process.py +372 -0
  12. claude_mpm/services/core/interfaces/restart.py +307 -0
  13. claude_mpm/services/core/interfaces/stability.py +260 -0
  14. claude_mpm/services/core/models/__init__.py +35 -0
  15. claude_mpm/services/core/models/health.py +189 -0
  16. claude_mpm/services/core/models/process.py +258 -0
  17. claude_mpm/services/core/models/restart.py +302 -0
  18. claude_mpm/services/core/models/stability.py +264 -0
  19. claude_mpm/services/local_ops/__init__.py +163 -0
  20. claude_mpm/services/local_ops/crash_detector.py +257 -0
  21. claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
  22. claude_mpm/services/local_ops/health_checks/http_check.py +223 -0
  23. claude_mpm/services/local_ops/health_checks/process_check.py +235 -0
  24. claude_mpm/services/local_ops/health_checks/resource_check.py +254 -0
  25. claude_mpm/services/local_ops/health_manager.py +430 -0
  26. claude_mpm/services/local_ops/log_monitor.py +396 -0
  27. claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
  28. claude_mpm/services/local_ops/process_manager.py +595 -0
  29. claude_mpm/services/local_ops/resource_monitor.py +331 -0
  30. claude_mpm/services/local_ops/restart_manager.py +401 -0
  31. claude_mpm/services/local_ops/restart_policy.py +387 -0
  32. claude_mpm/services/local_ops/state_manager.py +371 -0
  33. claude_mpm/services/local_ops/unified_manager.py +600 -0
  34. claude_mpm/services/model/__init__.py +147 -0
  35. claude_mpm/services/model/base_provider.py +365 -0
  36. claude_mpm/services/model/claude_provider.py +412 -0
  37. claude_mpm/services/model/model_router.py +453 -0
  38. claude_mpm/services/model/ollama_provider.py +415 -0
  39. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/METADATA +1 -1
  40. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/RECORD +44 -12
  41. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/WHEEL +0 -0
  42. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/entry_points.txt +0 -0
  43. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/licenses/LICENSE +0 -0
  44. {claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,258 @@
1
+ """
2
+ Process Management Data Models for Claude MPM Framework
3
+ ========================================================
4
+
5
+ WHY: This module defines data structures for process management operations,
6
+ including process status, deployment state, and runtime information.
7
+
8
+ DESIGN DECISION: Uses dataclasses for immutability and type safety. Provides
9
+ serialization methods for state persistence.
10
+
11
+ ARCHITECTURE:
12
+ - ProcessStatus: Enum of process lifecycle states
13
+ - DeploymentState: Complete deployment information for persistence
14
+ - ProcessInfo: Runtime process information
15
+ - StartConfig: Configuration for spawning new processes
16
+ """
17
+
18
+ import json
19
+ from dataclasses import asdict, dataclass, field
20
+ from datetime import datetime
21
+ from enum import Enum
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional
24
+
25
+
26
+ class ProcessStatus(Enum):
27
+ """
28
+ Process lifecycle status.
29
+
30
+ WHY: Explicit status tracking enables proper state machine management
31
+ and prevents invalid state transitions.
32
+
33
+ States:
34
+ STARTING: Process is being spawned
35
+ RUNNING: Process is actively running
36
+ STOPPING: Process is shutting down
37
+ STOPPED: Process has stopped cleanly
38
+ CRASHED: Process terminated unexpectedly
39
+ UNKNOWN: Process state cannot be determined
40
+ """
41
+
42
+ STARTING = "starting"
43
+ RUNNING = "running"
44
+ STOPPING = "stopping"
45
+ STOPPED = "stopped"
46
+ CRASHED = "crashed"
47
+ UNKNOWN = "unknown"
48
+
49
+ def is_active(self) -> bool:
50
+ """Check if status represents an active process."""
51
+ return self in (ProcessStatus.STARTING, ProcessStatus.RUNNING)
52
+
53
+ def is_terminal(self) -> bool:
54
+ """Check if status represents a terminal state."""
55
+ return self in (ProcessStatus.STOPPED, ProcessStatus.CRASHED)
56
+
57
+
58
+ @dataclass
59
+ class DeploymentState:
60
+ """
61
+ Complete deployment state for persistence.
62
+
63
+ WHY: Contains all information needed to track, manage, and restart
64
+ a deployment. Serializable to JSON for state file storage.
65
+
66
+ Attributes:
67
+ deployment_id: Unique identifier for this deployment
68
+ process_id: OS process ID (PID)
69
+ command: Command and arguments used to start process
70
+ working_directory: Working directory for the process
71
+ environment: Environment variables (beyond inherited ones)
72
+ port: Primary port used by the process (if applicable)
73
+ started_at: Timestamp when process was started
74
+ status: Current ProcessStatus
75
+ metadata: Additional deployment-specific information
76
+ """
77
+
78
+ deployment_id: str
79
+ process_id: int
80
+ command: List[str]
81
+ working_directory: str
82
+ environment: Dict[str, str] = field(default_factory=dict)
83
+ port: Optional[int] = None
84
+ started_at: datetime = field(default_factory=datetime.now)
85
+ status: ProcessStatus = ProcessStatus.STARTING
86
+ metadata: Dict[str, Any] = field(default_factory=dict)
87
+
88
+ def to_dict(self) -> Dict[str, Any]:
89
+ """
90
+ Convert to dictionary for JSON serialization.
91
+
92
+ Returns:
93
+ Dictionary representation with datetime converted to ISO format
94
+ """
95
+ data = asdict(self)
96
+ data["started_at"] = self.started_at.isoformat()
97
+ data["status"] = self.status.value
98
+ return data
99
+
100
+ @classmethod
101
+ def from_dict(cls, data: Dict[str, Any]) -> "DeploymentState":
102
+ """
103
+ Create DeploymentState from dictionary.
104
+
105
+ Args:
106
+ data: Dictionary from JSON deserialization
107
+
108
+ Returns:
109
+ DeploymentState instance
110
+ """
111
+ # Convert ISO string to datetime
112
+ if isinstance(data.get("started_at"), str):
113
+ data["started_at"] = datetime.fromisoformat(data["started_at"])
114
+
115
+ # Convert status string to enum
116
+ if isinstance(data.get("status"), str):
117
+ data["status"] = ProcessStatus(data["status"])
118
+
119
+ return cls(**data)
120
+
121
+ def to_json(self) -> str:
122
+ """
123
+ Serialize to JSON string.
124
+
125
+ Returns:
126
+ JSON string representation
127
+ """
128
+ return json.dumps(self.to_dict(), indent=2)
129
+
130
+ @classmethod
131
+ def from_json(cls, json_str: str) -> "DeploymentState":
132
+ """
133
+ Deserialize from JSON string.
134
+
135
+ Args:
136
+ json_str: JSON string
137
+
138
+ Returns:
139
+ DeploymentState instance
140
+ """
141
+ return cls.from_dict(json.loads(json_str))
142
+
143
+
144
+ @dataclass
145
+ class ProcessInfo:
146
+ """
147
+ Runtime process information.
148
+
149
+ WHY: Provides real-time process status including resource usage and
150
+ health information. Separate from DeploymentState to avoid mixing
151
+ persistent state with transient runtime data.
152
+
153
+ Attributes:
154
+ deployment_id: Unique deployment identifier
155
+ process_id: OS process ID
156
+ status: Current ProcessStatus
157
+ port: Port the process is using
158
+ uptime_seconds: How long the process has been running
159
+ memory_mb: Current memory usage in megabytes
160
+ cpu_percent: Current CPU usage percentage
161
+ is_responding: Whether the process responds to health checks
162
+ error_message: Error message if status is CRASHED
163
+ """
164
+
165
+ deployment_id: str
166
+ process_id: int
167
+ status: ProcessStatus
168
+ port: Optional[int] = None
169
+ uptime_seconds: float = 0.0
170
+ memory_mb: float = 0.0
171
+ cpu_percent: float = 0.0
172
+ is_responding: bool = False
173
+ error_message: Optional[str] = None
174
+
175
+ def to_dict(self) -> Dict[str, Any]:
176
+ """Convert to dictionary."""
177
+ data = asdict(self)
178
+ data["status"] = self.status.value
179
+ return data
180
+
181
+
182
+ @dataclass
183
+ class StartConfig:
184
+ """
185
+ Configuration for starting a new process.
186
+
187
+ WHY: Encapsulates all parameters needed to spawn a process. Provides
188
+ validation and sensible defaults.
189
+
190
+ Attributes:
191
+ command: Command and arguments to execute
192
+ working_directory: Working directory for the process
193
+ environment: Environment variables to set (beyond inherited)
194
+ port: Preferred port for the process
195
+ auto_find_port: If True, find alternative port if preferred is unavailable
196
+ metadata: Additional deployment metadata
197
+ deployment_id: Optional explicit deployment ID (generated if not provided)
198
+ """
199
+
200
+ command: List[str]
201
+ working_directory: str
202
+ environment: Dict[str, str] = field(default_factory=dict)
203
+ port: Optional[int] = None
204
+ auto_find_port: bool = True
205
+ metadata: Dict[str, Any] = field(default_factory=dict)
206
+ deployment_id: Optional[str] = None
207
+
208
+ def __post_init__(self):
209
+ """Validate configuration after initialization."""
210
+ if not self.command:
211
+ raise ValueError("Command cannot be empty")
212
+
213
+ if not self.working_directory:
214
+ raise ValueError("Working directory must be specified")
215
+
216
+ # Ensure working_directory is absolute
217
+ self.working_directory = str(Path(self.working_directory).absolute())
218
+
219
+ # Validate port range if specified
220
+ if self.port is not None:
221
+ if not (1024 <= self.port <= 65535):
222
+ raise ValueError(f"Port must be between 1024-65535, got {self.port}")
223
+
224
+ def to_dict(self) -> Dict[str, Any]:
225
+ """Convert to dictionary."""
226
+ return asdict(self)
227
+
228
+
229
+ # Port range constants
230
+ PROTECTED_PORT_RANGES = [
231
+ (8765, 8785), # Claude MPM services (WebSocket, SocketIO, monitors)
232
+ ]
233
+
234
+
235
+ def is_port_protected(port: int) -> bool:
236
+ """
237
+ Check if a port is in a protected range.
238
+
239
+ WHY: Prevents local-ops-agent from interfering with Claude MPM
240
+ system services.
241
+
242
+ Args:
243
+ port: Port number to check
244
+
245
+ Returns:
246
+ True if port is protected
247
+ """
248
+ return any(start <= port <= end for start, end in PROTECTED_PORT_RANGES)
249
+
250
+
251
+ __all__ = [
252
+ "PROTECTED_PORT_RANGES",
253
+ "DeploymentState",
254
+ "ProcessInfo",
255
+ "ProcessStatus",
256
+ "StartConfig",
257
+ "is_port_protected",
258
+ ]
@@ -0,0 +1,302 @@
1
+ """
2
+ Restart Management Data Models for Claude MPM Framework
3
+ ========================================================
4
+
5
+ WHY: This module defines data structures for auto-restart operations,
6
+ including restart attempts, history tracking, and circuit breaker states.
7
+
8
+ DESIGN DECISION: Uses dataclasses for immutability and type safety. Provides
9
+ serialization methods for state persistence across service restarts.
10
+
11
+ ARCHITECTURE:
12
+ - CircuitBreakerState: Enum of circuit breaker states
13
+ - RestartAttempt: Single restart attempt record
14
+ - RestartHistory: Complete restart history for a deployment
15
+ - RestartConfig: Configuration for restart policies
16
+ """
17
+
18
+ from dataclasses import asdict, dataclass, field
19
+ from datetime import datetime, timezone
20
+ from enum import Enum
21
+ from typing import Any, Dict, List, Optional
22
+
23
+
24
+ class CircuitBreakerState(Enum):
25
+ """
26
+ Circuit breaker state for restart management.
27
+
28
+ WHY: Circuit breaker prevents infinite restart loops by blocking
29
+ restarts after repeated failures within a time window.
30
+
31
+ States:
32
+ CLOSED: Normal operation, restarts allowed
33
+ OPEN: Circuit breaker tripped, restarts blocked
34
+ HALF_OPEN: Testing if service recovered (allows one restart)
35
+ """
36
+
37
+ CLOSED = "closed"
38
+ OPEN = "open"
39
+ HALF_OPEN = "half_open"
40
+
41
+ def allows_restart(self) -> bool:
42
+ """Check if state allows restart attempts."""
43
+ return self in (CircuitBreakerState.CLOSED, CircuitBreakerState.HALF_OPEN)
44
+
45
+
46
+ @dataclass
47
+ class RestartAttempt:
48
+ """
49
+ Record of a single restart attempt.
50
+
51
+ WHY: Tracks detailed information about each restart attempt to enable
52
+ debugging and policy decisions (exponential backoff, circuit breaker).
53
+
54
+ Attributes:
55
+ attempt_number: Sequential attempt number (1-based)
56
+ deployment_id: Unique deployment identifier
57
+ started_at: When the restart was initiated
58
+ completed_at: When the restart finished (None if in progress)
59
+ success: Whether the restart succeeded
60
+ failure_reason: Optional reason for failure
61
+ backoff_seconds: Backoff time before this attempt
62
+ """
63
+
64
+ attempt_number: int
65
+ deployment_id: str
66
+ started_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
67
+ completed_at: Optional[datetime] = None
68
+ success: bool = False
69
+ failure_reason: Optional[str] = None
70
+ backoff_seconds: float = 0.0
71
+
72
+ def to_dict(self) -> Dict[str, Any]:
73
+ """
74
+ Convert to dictionary for JSON serialization.
75
+
76
+ Returns:
77
+ Dictionary representation with datetime converted to ISO format
78
+ """
79
+ return {
80
+ "attempt_number": self.attempt_number,
81
+ "deployment_id": self.deployment_id,
82
+ "started_at": self.started_at.isoformat(),
83
+ "completed_at": (
84
+ self.completed_at.isoformat() if self.completed_at else None
85
+ ),
86
+ "success": self.success,
87
+ "failure_reason": self.failure_reason,
88
+ "backoff_seconds": self.backoff_seconds,
89
+ }
90
+
91
+ @classmethod
92
+ def from_dict(cls, data: Dict[str, Any]) -> "RestartAttempt":
93
+ """
94
+ Create RestartAttempt from dictionary.
95
+
96
+ Args:
97
+ data: Dictionary from JSON deserialization
98
+
99
+ Returns:
100
+ RestartAttempt instance
101
+ """
102
+ # Convert ISO strings to datetime
103
+ if isinstance(data.get("started_at"), str):
104
+ data["started_at"] = datetime.fromisoformat(data["started_at"])
105
+
106
+ if data.get("completed_at") and isinstance(data["completed_at"], str):
107
+ data["completed_at"] = datetime.fromisoformat(data["completed_at"])
108
+
109
+ return cls(**data)
110
+
111
+
112
+ @dataclass
113
+ class RestartHistory:
114
+ """
115
+ Complete restart history for a deployment.
116
+
117
+ WHY: Maintains restart attempt history, circuit breaker state, and
118
+ failure window tracking to enable intelligent restart policies.
119
+
120
+ Attributes:
121
+ deployment_id: Unique deployment identifier
122
+ attempts: List of restart attempts (newest first)
123
+ circuit_breaker_state: Current circuit breaker state
124
+ last_failure_window_start: Start of current failure window
125
+ failure_count_in_window: Number of failures in current window
126
+ """
127
+
128
+ deployment_id: str
129
+ attempts: List[RestartAttempt] = field(default_factory=list)
130
+ circuit_breaker_state: CircuitBreakerState = CircuitBreakerState.CLOSED
131
+ last_failure_window_start: Optional[datetime] = None
132
+ failure_count_in_window: int = 0
133
+
134
+ def to_dict(self) -> Dict[str, Any]:
135
+ """
136
+ Convert to dictionary for JSON serialization.
137
+
138
+ Returns:
139
+ Dictionary representation
140
+ """
141
+ return {
142
+ "deployment_id": self.deployment_id,
143
+ "attempts": [attempt.to_dict() for attempt in self.attempts],
144
+ "circuit_breaker_state": self.circuit_breaker_state.value,
145
+ "last_failure_window_start": (
146
+ self.last_failure_window_start.isoformat()
147
+ if self.last_failure_window_start
148
+ else None
149
+ ),
150
+ "failure_count_in_window": self.failure_count_in_window,
151
+ }
152
+
153
+ @classmethod
154
+ def from_dict(cls, data: Dict[str, Any]) -> "RestartHistory":
155
+ """
156
+ Create RestartHistory from dictionary.
157
+
158
+ Args:
159
+ data: Dictionary from JSON deserialization
160
+
161
+ Returns:
162
+ RestartHistory instance
163
+ """
164
+ # Convert circuit breaker state string to enum
165
+ if isinstance(data.get("circuit_breaker_state"), str):
166
+ data["circuit_breaker_state"] = CircuitBreakerState(
167
+ data["circuit_breaker_state"]
168
+ )
169
+
170
+ # Convert ISO string to datetime
171
+ if data.get("last_failure_window_start") and isinstance(
172
+ data["last_failure_window_start"], str
173
+ ):
174
+ data["last_failure_window_start"] = datetime.fromisoformat(
175
+ data["last_failure_window_start"]
176
+ )
177
+
178
+ # Convert attempt dicts to RestartAttempt objects
179
+ if isinstance(data.get("attempts"), list):
180
+ data["attempts"] = [
181
+ (
182
+ RestartAttempt.from_dict(attempt)
183
+ if isinstance(attempt, dict)
184
+ else attempt
185
+ )
186
+ for attempt in data["attempts"]
187
+ ]
188
+
189
+ return cls(**data)
190
+
191
+ def get_latest_attempt(self) -> Optional[RestartAttempt]:
192
+ """
193
+ Get the most recent restart attempt.
194
+
195
+ Returns:
196
+ Latest RestartAttempt if any, None otherwise
197
+ """
198
+ return self.attempts[0] if self.attempts else None
199
+
200
+ def get_attempt_count(self) -> int:
201
+ """
202
+ Get total number of restart attempts.
203
+
204
+ Returns:
205
+ Number of attempts
206
+ """
207
+ return len(self.attempts)
208
+
209
+ def get_consecutive_failures(self) -> int:
210
+ """
211
+ Get number of consecutive failures from the most recent attempt.
212
+
213
+ Returns:
214
+ Count of consecutive failures
215
+ """
216
+ count = 0
217
+ for attempt in self.attempts:
218
+ if not attempt.success:
219
+ count += 1
220
+ else:
221
+ break
222
+ return count
223
+
224
+
225
+ @dataclass
226
+ class RestartConfig:
227
+ """
228
+ Configuration for restart policies.
229
+
230
+ WHY: Encapsulates all restart policy parameters to enable flexible
231
+ configuration and testing.
232
+
233
+ Attributes:
234
+ max_attempts: Maximum restart attempts before giving up
235
+ initial_backoff_seconds: Initial backoff time (doubles each attempt)
236
+ max_backoff_seconds: Maximum backoff cap (default: 5 minutes)
237
+ backoff_multiplier: Backoff multiplier for exponential backoff
238
+ circuit_breaker_threshold: Failures to trip circuit breaker
239
+ circuit_breaker_window_seconds: Time window for failure counting
240
+ circuit_breaker_reset_seconds: Cooldown before resetting breaker
241
+ health_check_timeout_seconds: Time to wait for health check after restart
242
+ """
243
+
244
+ max_attempts: int = 5
245
+ initial_backoff_seconds: float = 2.0
246
+ max_backoff_seconds: float = 300.0 # 5 minutes
247
+ backoff_multiplier: float = 2.0
248
+ circuit_breaker_threshold: int = 3 # failures to trip breaker
249
+ circuit_breaker_window_seconds: int = 300 # 5 minute window
250
+ circuit_breaker_reset_seconds: int = 600 # 10 minute cooldown
251
+ health_check_timeout_seconds: int = 30 # wait for health check
252
+
253
+ def __post_init__(self):
254
+ """Validate configuration after initialization."""
255
+ if self.max_attempts < 1:
256
+ raise ValueError("max_attempts must be >= 1")
257
+
258
+ if self.initial_backoff_seconds < 0:
259
+ raise ValueError("initial_backoff_seconds must be >= 0")
260
+
261
+ if self.max_backoff_seconds < self.initial_backoff_seconds:
262
+ raise ValueError("max_backoff_seconds must be >= initial_backoff_seconds")
263
+
264
+ if self.backoff_multiplier < 1.0:
265
+ raise ValueError("backoff_multiplier must be >= 1.0")
266
+
267
+ if self.circuit_breaker_threshold < 1:
268
+ raise ValueError("circuit_breaker_threshold must be >= 1")
269
+
270
+ if self.circuit_breaker_window_seconds < 1:
271
+ raise ValueError("circuit_breaker_window_seconds must be >= 1")
272
+
273
+ if self.circuit_breaker_reset_seconds < 1:
274
+ raise ValueError("circuit_breaker_reset_seconds must be >= 1")
275
+
276
+ if self.health_check_timeout_seconds < 1:
277
+ raise ValueError("health_check_timeout_seconds must be >= 1")
278
+
279
+ def to_dict(self) -> Dict[str, Any]:
280
+ """Convert to dictionary."""
281
+ return asdict(self)
282
+
283
+ @classmethod
284
+ def from_dict(cls, data: Dict[str, Any]) -> "RestartConfig":
285
+ """
286
+ Create RestartConfig from dictionary.
287
+
288
+ Args:
289
+ data: Dictionary from JSON deserialization
290
+
291
+ Returns:
292
+ RestartConfig instance
293
+ """
294
+ return cls(**data)
295
+
296
+
297
+ __all__ = [
298
+ "CircuitBreakerState",
299
+ "RestartAttempt",
300
+ "RestartConfig",
301
+ "RestartHistory",
302
+ ]