claude-mpm 4.5.6__py3-none-any.whl → 4.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__init__.py +20 -5
  3. claude_mpm/agents/BASE_OPS.md +10 -0
  4. claude_mpm/agents/PM_INSTRUCTIONS.md +28 -4
  5. claude_mpm/agents/agent_loader.py +19 -2
  6. claude_mpm/agents/base_agent_loader.py +5 -5
  7. claude_mpm/agents/templates/agent-manager.json +3 -3
  8. claude_mpm/agents/templates/agentic-coder-optimizer.json +3 -3
  9. claude_mpm/agents/templates/api_qa.json +1 -1
  10. claude_mpm/agents/templates/clerk-ops.json +3 -3
  11. claude_mpm/agents/templates/code_analyzer.json +3 -3
  12. claude_mpm/agents/templates/dart_engineer.json +294 -0
  13. claude_mpm/agents/templates/data_engineer.json +3 -3
  14. claude_mpm/agents/templates/documentation.json +2 -2
  15. claude_mpm/agents/templates/engineer.json +2 -2
  16. claude_mpm/agents/templates/gcp_ops_agent.json +2 -2
  17. claude_mpm/agents/templates/imagemagick.json +1 -1
  18. claude_mpm/agents/templates/local_ops_agent.json +363 -49
  19. claude_mpm/agents/templates/memory_manager.json +2 -2
  20. claude_mpm/agents/templates/nextjs_engineer.json +2 -2
  21. claude_mpm/agents/templates/ops.json +2 -2
  22. claude_mpm/agents/templates/php-engineer.json +1 -1
  23. claude_mpm/agents/templates/project_organizer.json +1 -1
  24. claude_mpm/agents/templates/prompt-engineer.json +6 -4
  25. claude_mpm/agents/templates/python_engineer.json +2 -2
  26. claude_mpm/agents/templates/qa.json +1 -1
  27. claude_mpm/agents/templates/react_engineer.json +3 -3
  28. claude_mpm/agents/templates/refactoring_engineer.json +3 -3
  29. claude_mpm/agents/templates/research.json +2 -2
  30. claude_mpm/agents/templates/security.json +2 -2
  31. claude_mpm/agents/templates/ticketing.json +2 -2
  32. claude_mpm/agents/templates/typescript_engineer.json +2 -2
  33. claude_mpm/agents/templates/vercel_ops_agent.json +2 -2
  34. claude_mpm/agents/templates/version_control.json +2 -2
  35. claude_mpm/agents/templates/web_qa.json +6 -6
  36. claude_mpm/agents/templates/web_ui.json +3 -3
  37. claude_mpm/cli/__init__.py +49 -19
  38. claude_mpm/cli/commands/configure.py +591 -7
  39. claude_mpm/cli/parsers/configure_parser.py +5 -0
  40. claude_mpm/core/__init__.py +53 -17
  41. claude_mpm/core/config.py +1 -1
  42. claude_mpm/core/log_manager.py +7 -0
  43. claude_mpm/hooks/claude_hooks/response_tracking.py +16 -11
  44. claude_mpm/hooks/claude_hooks/services/connection_manager_http.py +9 -11
  45. claude_mpm/services/__init__.py +140 -156
  46. claude_mpm/services/agents/deployment/deployment_config_loader.py +21 -0
  47. claude_mpm/services/agents/loading/base_agent_manager.py +12 -2
  48. claude_mpm/services/async_session_logger.py +112 -96
  49. claude_mpm/services/claude_session_logger.py +63 -61
  50. claude_mpm/services/mcp_config_manager.py +328 -38
  51. claude_mpm/services/mcp_gateway/__init__.py +98 -94
  52. claude_mpm/services/monitor/event_emitter.py +1 -1
  53. claude_mpm/services/orphan_detection.py +791 -0
  54. claude_mpm/services/project_port_allocator.py +601 -0
  55. claude_mpm/services/response_tracker.py +17 -6
  56. claude_mpm/services/session_manager.py +176 -0
  57. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/METADATA +1 -1
  58. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/RECORD +62 -58
  59. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/WHEEL +0 -0
  60. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/entry_points.txt +0 -0
  61. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/licenses/LICENSE +0 -0
  62. {claude_mpm-4.5.6.dist-info → claude_mpm-4.5.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,791 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Orphan Detection Service
4
+ ========================
5
+
6
+ Detects and manages orphaned deployment processes across different deployment methods.
7
+ Provides safe cleanup capabilities with multiple safety checks to prevent accidental
8
+ termination of active services.
9
+
10
+ Part of local-ops agent improvements for process lifecycle management.
11
+
12
+ WHY: Deployments can leave orphaned processes when:
13
+ - PM2 processes outlive their parent
14
+ - Docker containers keep running after deployment fails
15
+ - State files reference dead processes
16
+ - Projects are deleted but processes remain
17
+
18
+ SAFETY PHILOSOPHY:
19
+ - Never kill processes without verification
20
+ - Require manual confirmation for high-severity cases
21
+ - Preserve Claude MPM/MCP services at all costs
22
+ - Respect process ownership boundaries
23
+ - Implement multiple safety checks before any action
24
+
25
+ DESIGN DECISIONS:
26
+ - Multi-method support: PM2, Docker, native processes
27
+ - Severity levels: low, medium, high (affects confirmation requirements)
28
+ - Age-based protection: Never touch processes < 1 minute old
29
+ - Protected port ranges: Claude Code services (8765-8785)
30
+ - Ownership verification: Cross-reference with state files
31
+ """
32
+
33
+ import json
34
+ import subprocess
35
+ import time
36
+ from datetime import datetime, timezone
37
+ from enum import Enum
38
+ from pathlib import Path
39
+ from typing import Any, Dict, List, Optional, Set, Tuple
40
+
41
+ import psutil
42
+
43
+ from .core.base import SyncBaseService
44
+ from .port_manager import PortManager
45
+
46
+
47
+ class OrphanSeverity(Enum):
48
+ """Severity levels for orphaned processes."""
49
+
50
+ LOW = "low" # Safe to auto-cleanup (e.g., old test processes)
51
+ MEDIUM = "medium" # Needs user awareness (e.g., untracked deployments)
52
+ HIGH = "high" # Requires explicit confirmation (e.g., running production services)
53
+
54
+
55
+ class OrphanType(Enum):
56
+ """Types of orphaned resources."""
57
+
58
+ DEAD_PID = "dead_pid" # State file references dead process
59
+ DELETED_PROJECT = "deleted_project" # Process for non-existent project
60
+ UNTRACKED_PROCESS = "untracked_process" # Process on managed port without state
61
+ PM2_ORPHAN = "pm2_orphan" # PM2 process not in any state file
62
+ DOCKER_ORPHAN = "docker_orphan" # Docker container not in any state file
63
+ STALE_DEPLOYMENT = "stale_deployment" # Deployment hasn't been updated in days
64
+
65
+
66
+ class OrphanInfo:
67
+ """Information about an orphaned resource."""
68
+
69
+ def __init__(
70
+ self,
71
+ orphan_type: OrphanType,
72
+ severity: OrphanSeverity,
73
+ description: str,
74
+ details: Dict[str, Any],
75
+ cleanup_action: Optional[str] = None,
76
+ ):
77
+ """
78
+ Initialize orphan info.
79
+
80
+ Args:
81
+ orphan_type: Type of orphan
82
+ severity: Severity level
83
+ description: Human-readable description
84
+ details: Additional details (PID, port, etc.)
85
+ cleanup_action: Suggested cleanup action
86
+ """
87
+ self.orphan_type = orphan_type
88
+ self.severity = severity
89
+ self.description = description
90
+ self.details = details
91
+ self.cleanup_action = cleanup_action
92
+ self.detected_at = datetime.now(timezone.utc)
93
+
94
+ def to_dict(self) -> Dict[str, Any]:
95
+ """Convert to dictionary for serialization."""
96
+ return {
97
+ "type": self.orphan_type.value,
98
+ "severity": self.severity.value,
99
+ "description": self.description,
100
+ "details": self.details,
101
+ "cleanup_action": self.cleanup_action,
102
+ "detected_at": self.detected_at.isoformat(),
103
+ }
104
+
105
+
106
+ class OrphanDetectionService(SyncBaseService):
107
+ """
108
+ Service for detecting and managing orphaned deployment processes.
109
+
110
+ Capabilities:
111
+ - Scan for orphaned PM2 processes
112
+ - Scan for orphaned Docker containers
113
+ - Detect untracked processes on managed ports
114
+ - Verify state file integrity
115
+ - Safe cleanup with multiple safety checks
116
+ """
117
+
118
+ # Minimum process age before considering for cleanup (safety measure)
119
+ MIN_PROCESS_AGE_SECONDS = 60 # 1 minute
120
+
121
+ # Protected port ranges (Claude Code services)
122
+ PROTECTED_PORT_RANGES = [(8765, 8785)]
123
+
124
+ # Protected process patterns
125
+ PROTECTED_PATTERNS = [
126
+ "claude-mpm",
127
+ "claude_mpm",
128
+ "socketio_daemon",
129
+ "mcp-",
130
+ "monitor",
131
+ ]
132
+
133
+ # Port range for user projects
134
+ USER_PORT_RANGE_START = 3000
135
+ USER_PORT_RANGE_END = 9999
136
+
137
+ def __init__(self, project_root: Optional[Path] = None):
138
+ """
139
+ Initialize the orphan detection service.
140
+
141
+ Args:
142
+ project_root: Project directory (default: current working directory)
143
+ """
144
+ super().__init__(service_name="OrphanDetectionService")
145
+
146
+ self.project_root = (project_root or Path.cwd()).resolve()
147
+ self.state_dir = self.project_root / ".claude-mpm"
148
+ self.state_file = self.state_dir / "deployment-state.json"
149
+
150
+ # Global registry
151
+ self.global_registry_dir = Path.home() / ".claude-mpm"
152
+ self.global_registry_file = (
153
+ self.global_registry_dir / "global-port-registry.json"
154
+ )
155
+
156
+ # Port manager for process checks
157
+ self.port_manager = PortManager(project_root=self.project_root)
158
+
159
+ def initialize(self) -> bool:
160
+ """
161
+ Initialize the service.
162
+
163
+ Returns:
164
+ True if initialization successful
165
+ """
166
+ try:
167
+ self._initialized = True
168
+ self.log_info("OrphanDetectionService initialized successfully")
169
+ return True
170
+ except Exception as e:
171
+ self.log_error(f"Failed to initialize: {e}")
172
+ return False
173
+
174
+ def shutdown(self) -> None:
175
+ """Shutdown the service gracefully."""
176
+ self._shutdown = True
177
+ self.log_info("OrphanDetectionService shutdown")
178
+
179
+ def _is_protected_process(self, cmdline: str) -> bool:
180
+ """
181
+ Check if process is protected (Claude MPM services).
182
+
183
+ Args:
184
+ cmdline: Process command line
185
+
186
+ Returns:
187
+ True if process is protected
188
+ """
189
+ cmdline_lower = cmdline.lower()
190
+ return any(pattern in cmdline_lower for pattern in self.PROTECTED_PATTERNS)
191
+
192
+ def _is_protected_port(self, port: int) -> bool:
193
+ """
194
+ Check if port is in protected range.
195
+
196
+ Args:
197
+ port: Port number
198
+
199
+ Returns:
200
+ True if port is protected
201
+ """
202
+ for start, end in self.PROTECTED_PORT_RANGES:
203
+ if start <= port <= end:
204
+ return True
205
+ return False
206
+
207
+ def _get_process_age(self, pid: int) -> Optional[float]:
208
+ """
209
+ Get process age in seconds.
210
+
211
+ Args:
212
+ pid: Process ID
213
+
214
+ Returns:
215
+ Age in seconds or None if process not found
216
+ """
217
+ try:
218
+ process = psutil.Process(pid)
219
+ create_time = process.create_time()
220
+ return time.time() - create_time
221
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
222
+ return None
223
+
224
+ def _is_process_safe_to_kill(self, pid: int, cmdline: str) -> Tuple[bool, str]:
225
+ """
226
+ Check if a process is safe to kill.
227
+
228
+ Args:
229
+ pid: Process ID
230
+ cmdline: Process command line
231
+
232
+ Returns:
233
+ Tuple of (is_safe, reason)
234
+ """
235
+ # Check if protected
236
+ if self._is_protected_process(cmdline):
237
+ return False, "Protected Claude MPM/MCP service"
238
+
239
+ # Check process age
240
+ age = self._get_process_age(pid)
241
+ if age is None:
242
+ return False, "Cannot determine process age"
243
+
244
+ if age < self.MIN_PROCESS_AGE_SECONDS:
245
+ return (
246
+ False,
247
+ f"Process too young ({age:.1f}s < {self.MIN_PROCESS_AGE_SECONDS}s)",
248
+ )
249
+
250
+ return True, "Safe to cleanup"
251
+
252
+ def scan_dead_pids(self) -> List[OrphanInfo]:
253
+ """
254
+ Scan for dead PIDs in state files.
255
+
256
+ Returns:
257
+ List of orphaned state entries
258
+ """
259
+ orphans = []
260
+
261
+ try:
262
+ if not self.state_file.exists():
263
+ return orphans
264
+
265
+ with open(self.state_file) as f:
266
+ state = json.load(f)
267
+
268
+ deployments = state.get("deployments", {})
269
+
270
+ for service_name, deployment in deployments.items():
271
+ pid = deployment.get("pid")
272
+
273
+ if not pid:
274
+ continue
275
+
276
+ # Check if process exists
277
+ if not psutil.pid_exists(pid):
278
+ orphans.append(
279
+ OrphanInfo(
280
+ orphan_type=OrphanType.DEAD_PID,
281
+ severity=OrphanSeverity.LOW,
282
+ description=f"State file references dead process (PID: {pid})",
283
+ details={
284
+ "service_name": service_name,
285
+ "pid": pid,
286
+ "port": deployment.get("port"),
287
+ "state_file": str(self.state_file),
288
+ },
289
+ cleanup_action="Remove from state file",
290
+ )
291
+ )
292
+
293
+ except Exception as e:
294
+ self.log_error(f"Error scanning dead PIDs: {e}")
295
+
296
+ return orphans
297
+
298
+ def scan_deleted_projects(self) -> List[OrphanInfo]:
299
+ """
300
+ Scan global registry for projects that no longer exist.
301
+
302
+ Returns:
303
+ List of orphaned project entries
304
+ """
305
+ orphans = []
306
+
307
+ try:
308
+ if not self.global_registry_file.exists():
309
+ return orphans
310
+
311
+ with open(self.global_registry_file) as f:
312
+ registry = json.load(f)
313
+
314
+ allocations = registry.get("allocations", {})
315
+
316
+ for port_str, allocation in allocations.items():
317
+ project_path = Path(allocation.get("project_path", ""))
318
+
319
+ # Check if project directory exists
320
+ if not project_path.exists():
321
+ orphans.append(
322
+ OrphanInfo(
323
+ orphan_type=OrphanType.DELETED_PROJECT,
324
+ severity=OrphanSeverity.MEDIUM,
325
+ description="Port allocated to deleted project",
326
+ details={
327
+ "port": int(port_str),
328
+ "project_path": str(project_path),
329
+ "service_name": allocation.get("service_name"),
330
+ },
331
+ cleanup_action="Remove from global registry",
332
+ )
333
+ )
334
+
335
+ except Exception as e:
336
+ self.log_error(f"Error scanning deleted projects: {e}")
337
+
338
+ return orphans
339
+
340
+ def scan_untracked_processes(self) -> List[OrphanInfo]:
341
+ """
342
+ Scan for processes on managed ports without state tracking.
343
+
344
+ Returns:
345
+ List of untracked processes
346
+ """
347
+ orphans = []
348
+
349
+ try:
350
+ # Load global registry to know which ports are managed
351
+ managed_ports = set()
352
+ if self.global_registry_file.exists():
353
+ with open(self.global_registry_file) as f:
354
+ registry = json.load(f)
355
+ managed_ports = set(
356
+ int(p) for p in registry.get("allocations", {}).keys()
357
+ )
358
+
359
+ # Scan all network connections
360
+ for conn in psutil.net_connections(kind="inet"):
361
+ if conn.status != "LISTEN":
362
+ continue
363
+
364
+ port = conn.laddr.port
365
+
366
+ # Skip if not in user port range
367
+ if not (self.USER_PORT_RANGE_START <= port <= self.USER_PORT_RANGE_END):
368
+ continue
369
+
370
+ # Skip protected ports
371
+ if self._is_protected_port(port):
372
+ continue
373
+
374
+ # Check if port is tracked in global registry
375
+ if port not in managed_ports:
376
+ try:
377
+ process = psutil.Process(conn.pid)
378
+ cmdline = " ".join(process.cmdline())
379
+
380
+ # Skip protected processes
381
+ if self._is_protected_process(cmdline):
382
+ continue
383
+
384
+ orphans.append(
385
+ OrphanInfo(
386
+ orphan_type=OrphanType.UNTRACKED_PROCESS,
387
+ severity=OrphanSeverity.MEDIUM,
388
+ description=f"Process on port {port} not tracked in state files",
389
+ details={
390
+ "pid": conn.pid,
391
+ "port": port,
392
+ "process_name": process.name(),
393
+ "cmdline": cmdline[:100],
394
+ },
395
+ cleanup_action="Investigate and add to state or cleanup",
396
+ )
397
+ )
398
+
399
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
400
+ pass
401
+
402
+ except Exception as e:
403
+ self.log_error(f"Error scanning untracked processes: {e}")
404
+
405
+ return orphans
406
+
407
+ def scan_pm2_orphans(self) -> List[OrphanInfo]:
408
+ """
409
+ Scan for orphaned PM2 processes.
410
+
411
+ Returns:
412
+ List of orphaned PM2 processes
413
+ """
414
+ orphans = []
415
+
416
+ try:
417
+ # Get all PM2 processes
418
+ result = subprocess.run(
419
+ ["pm2", "jlist"],
420
+ capture_output=True,
421
+ text=True,
422
+ timeout=5,
423
+ check=False,
424
+ )
425
+
426
+ if result.returncode != 0:
427
+ self.log_debug("PM2 not available or no processes")
428
+ return orphans
429
+
430
+ pm2_processes = json.loads(result.stdout)
431
+
432
+ # Load all state files to find tracked PM2 processes
433
+ tracked_pm2_names = self._get_tracked_pm2_processes()
434
+
435
+ for proc in pm2_processes:
436
+ name = proc.get("name")
437
+ pid = proc.get("pid")
438
+
439
+ # Skip if tracked in any state file
440
+ if name in tracked_pm2_names:
441
+ continue
442
+
443
+ # Skip protected processes
444
+ script = proc.get("pm2_env", {}).get("pm_exec_path", "")
445
+ if self._is_protected_process(script):
446
+ continue
447
+
448
+ orphans.append(
449
+ OrphanInfo(
450
+ orphan_type=OrphanType.PM2_ORPHAN,
451
+ severity=OrphanSeverity.HIGH, # High severity - running service
452
+ description=f"PM2 process '{name}' not tracked in any state file",
453
+ details={
454
+ "pm2_name": name,
455
+ "pid": pid,
456
+ "status": proc.get("pm2_env", {}).get("status"),
457
+ "restart_count": proc.get("pm2_env", {}).get(
458
+ "restart_time", 0
459
+ ),
460
+ },
461
+ cleanup_action="pm2 delete {name}",
462
+ )
463
+ )
464
+
465
+ except subprocess.TimeoutExpired:
466
+ self.log_warning("PM2 command timed out")
467
+ except json.JSONDecodeError:
468
+ self.log_warning("Failed to parse PM2 output")
469
+ except Exception as e:
470
+ self.log_error(f"Error scanning PM2 orphans: {e}")
471
+
472
+ return orphans
473
+
474
+ def scan_docker_orphans(self) -> List[OrphanInfo]:
475
+ """
476
+ Scan for orphaned Docker containers.
477
+
478
+ Returns:
479
+ List of orphaned Docker containers
480
+ """
481
+ orphans = []
482
+
483
+ try:
484
+ # Get all running Docker containers
485
+ result = subprocess.run(
486
+ ["docker", "ps", "--format", "{{json .}}"],
487
+ capture_output=True,
488
+ text=True,
489
+ timeout=5,
490
+ check=False,
491
+ )
492
+
493
+ if result.returncode != 0:
494
+ self.log_debug("Docker not available or no containers")
495
+ return orphans
496
+
497
+ # Load tracked Docker containers
498
+ tracked_containers = self._get_tracked_docker_containers()
499
+
500
+ for line in result.stdout.strip().split("\n"):
501
+ if not line:
502
+ continue
503
+
504
+ try:
505
+ container = json.loads(line)
506
+ container_id = container.get("ID")
507
+ container_name = container.get("Names")
508
+
509
+ # Skip if tracked
510
+ if (
511
+ container_id in tracked_containers
512
+ or container_name in tracked_containers
513
+ ):
514
+ continue
515
+
516
+ # Skip protected containers
517
+ if any(
518
+ pattern in container_name.lower()
519
+ for pattern in self.PROTECTED_PATTERNS
520
+ ):
521
+ continue
522
+
523
+ orphans.append(
524
+ OrphanInfo(
525
+ orphan_type=OrphanType.DOCKER_ORPHAN,
526
+ severity=OrphanSeverity.HIGH,
527
+ description=f"Docker container '{container_name}' not tracked in any state file",
528
+ details={
529
+ "container_id": container_id,
530
+ "container_name": container_name,
531
+ "image": container.get("Image"),
532
+ "status": container.get("Status"),
533
+ },
534
+ cleanup_action=f"docker stop {container_id}",
535
+ )
536
+ )
537
+
538
+ except json.JSONDecodeError:
539
+ continue
540
+
541
+ except subprocess.TimeoutExpired:
542
+ self.log_warning("Docker command timed out")
543
+ except Exception as e:
544
+ self.log_error(f"Error scanning Docker orphans: {e}")
545
+
546
+ return orphans
547
+
548
+ def _get_tracked_pm2_processes(self) -> Set[str]:
549
+ """
550
+ Get set of PM2 process names tracked in state files.
551
+
552
+ Returns:
553
+ Set of PM2 process names
554
+ """
555
+ tracked = set()
556
+
557
+ # Check project state
558
+ if self.state_file.exists():
559
+ try:
560
+ with open(self.state_file) as f:
561
+ state = json.load(f)
562
+
563
+ for deployment in state.get("deployments", {}).values():
564
+ if deployment.get("method") == "pm2":
565
+ process_name = deployment.get("process_name")
566
+ if process_name:
567
+ tracked.add(process_name)
568
+
569
+ except Exception as e:
570
+ self.log_warning(f"Error reading state file: {e}")
571
+
572
+ # TODO: Could also scan other projects' state files for comprehensive check
573
+
574
+ return tracked
575
+
576
+ def _get_tracked_docker_containers(self) -> Set[str]:
577
+ """
578
+ Get set of Docker containers tracked in state files.
579
+
580
+ Returns:
581
+ Set of container IDs and names
582
+ """
583
+ tracked = set()
584
+
585
+ # Check project state
586
+ if self.state_file.exists():
587
+ try:
588
+ with open(self.state_file) as f:
589
+ state = json.load(f)
590
+
591
+ for deployment in state.get("deployments", {}).values():
592
+ if deployment.get("method") == "docker":
593
+ container_id = deployment.get("container_id")
594
+ container_name = deployment.get("container_name")
595
+
596
+ if container_id:
597
+ tracked.add(container_id)
598
+ if container_name:
599
+ tracked.add(container_name)
600
+
601
+ except Exception as e:
602
+ self.log_warning(f"Error reading state file: {e}")
603
+
604
+ return tracked
605
+
606
+ def scan_all_orphans(self) -> Dict[str, List[OrphanInfo]]:
607
+ """
608
+ Perform comprehensive orphan scan.
609
+
610
+ Returns:
611
+ Dictionary mapping orphan types to lists of orphans
612
+ """
613
+ results = {
614
+ "dead_pids": self.scan_dead_pids(),
615
+ "deleted_projects": self.scan_deleted_projects(),
616
+ "untracked_processes": self.scan_untracked_processes(),
617
+ "pm2_orphans": self.scan_pm2_orphans(),
618
+ "docker_orphans": self.scan_docker_orphans(),
619
+ }
620
+
621
+ total = sum(len(orphans) for orphans in results.values())
622
+ self.log_info(f"Orphan scan complete: found {total} potential orphans")
623
+
624
+ return results
625
+
626
+ def cleanup_orphan(
627
+ self,
628
+ orphan: OrphanInfo,
629
+ force: bool = False,
630
+ ) -> Tuple[bool, str]:
631
+ """
632
+ Clean up a specific orphan.
633
+
634
+ Args:
635
+ orphan: Orphan info
636
+ force: Skip safety checks (use with extreme caution)
637
+
638
+ Returns:
639
+ Tuple of (success, message)
640
+ """
641
+ # High severity orphans require explicit confirmation
642
+ if orphan.severity == OrphanSeverity.HIGH and not force:
643
+ return False, "High severity orphan requires explicit force=True"
644
+
645
+ try:
646
+ if orphan.orphan_type == OrphanType.DEAD_PID:
647
+ return self._cleanup_dead_pid(orphan)
648
+
649
+ if orphan.orphan_type == OrphanType.DELETED_PROJECT:
650
+ return self._cleanup_deleted_project(orphan)
651
+
652
+ if orphan.orphan_type == OrphanType.UNTRACKED_PROCESS:
653
+ return self._cleanup_untracked_process(orphan, force)
654
+
655
+ if orphan.orphan_type == OrphanType.PM2_ORPHAN:
656
+ return self._cleanup_pm2_orphan(orphan, force)
657
+
658
+ if orphan.orphan_type == OrphanType.DOCKER_ORPHAN:
659
+ return self._cleanup_docker_orphan(orphan, force)
660
+
661
+ return False, f"Unknown orphan type: {orphan.orphan_type}"
662
+
663
+ except Exception as e:
664
+ self.log_error(f"Error cleaning up orphan: {e}")
665
+ return False, str(e)
666
+
667
+ def _cleanup_dead_pid(self, orphan: OrphanInfo) -> Tuple[bool, str]:
668
+ """Clean up dead PID entry from state file."""
669
+ try:
670
+ with open(self.state_file) as f:
671
+ state = json.load(f)
672
+
673
+ service_name = orphan.details.get("service_name")
674
+ if service_name in state.get("deployments", {}):
675
+ del state["deployments"][service_name]
676
+
677
+ with open(self.state_file, "w") as f:
678
+ json.dump(state, f, indent=2)
679
+
680
+ return True, f"Removed dead PID entry for {service_name}"
681
+
682
+ return False, "Entry not found in state file"
683
+
684
+ except Exception as e:
685
+ return False, f"Failed to cleanup: {e}"
686
+
687
+ def _cleanup_deleted_project(self, orphan: OrphanInfo) -> Tuple[bool, str]:
688
+ """Clean up deleted project entry from global registry."""
689
+ try:
690
+ with open(self.global_registry_file) as f:
691
+ registry = json.load(f)
692
+
693
+ port = str(orphan.details.get("port"))
694
+ if port in registry.get("allocations", {}):
695
+ del registry["allocations"][port]
696
+
697
+ with open(self.global_registry_file, "w") as f:
698
+ json.dump(registry, f, indent=2)
699
+
700
+ return True, f"Removed deleted project entry for port {port}"
701
+
702
+ return False, "Entry not found in global registry"
703
+
704
+ except Exception as e:
705
+ return False, f"Failed to cleanup: {e}"
706
+
707
+ def _cleanup_untracked_process(
708
+ self,
709
+ orphan: OrphanInfo,
710
+ force: bool,
711
+ ) -> Tuple[bool, str]:
712
+ """Clean up untracked process."""
713
+ pid = orphan.details.get("pid")
714
+ cmdline = orphan.details.get("cmdline", "")
715
+
716
+ # Safety check
717
+ is_safe, reason = self._is_process_safe_to_kill(pid, cmdline)
718
+ if not is_safe and not force:
719
+ return False, f"Safety check failed: {reason}"
720
+
721
+ try:
722
+ process = psutil.Process(pid)
723
+ process.terminate()
724
+
725
+ # Wait for graceful termination
726
+ process.wait(timeout=5)
727
+
728
+ return True, f"Terminated untracked process {pid}"
729
+
730
+ except psutil.TimeoutExpired:
731
+ if force:
732
+ process.kill()
733
+ return True, f"Force killed untracked process {pid}"
734
+ return False, "Process did not terminate gracefully"
735
+
736
+ except Exception as e:
737
+ return False, f"Failed to terminate process: {e}"
738
+
739
+ def _cleanup_pm2_orphan(
740
+ self,
741
+ orphan: OrphanInfo,
742
+ force: bool,
743
+ ) -> Tuple[bool, str]:
744
+ """Clean up orphaned PM2 process."""
745
+ if not force:
746
+ return False, "PM2 cleanup requires force=True"
747
+
748
+ pm2_name = orphan.details.get("pm2_name")
749
+
750
+ try:
751
+ result = subprocess.run(
752
+ ["pm2", "delete", pm2_name],
753
+ capture_output=True,
754
+ text=True,
755
+ timeout=10,
756
+ check=False,
757
+ )
758
+
759
+ if result.returncode == 0:
760
+ return True, f"Deleted PM2 process '{pm2_name}'"
761
+ return False, f"PM2 delete failed: {result.stderr}"
762
+
763
+ except Exception as e:
764
+ return False, f"Failed to delete PM2 process: {e}"
765
+
766
+ def _cleanup_docker_orphan(
767
+ self,
768
+ orphan: OrphanInfo,
769
+ force: bool,
770
+ ) -> Tuple[bool, str]:
771
+ """Clean up orphaned Docker container."""
772
+ if not force:
773
+ return False, "Docker cleanup requires force=True"
774
+
775
+ container_id = orphan.details.get("container_id")
776
+
777
+ try:
778
+ result = subprocess.run(
779
+ ["docker", "stop", container_id],
780
+ capture_output=True,
781
+ text=True,
782
+ timeout=30,
783
+ check=False,
784
+ )
785
+
786
+ if result.returncode == 0:
787
+ return True, f"Stopped Docker container {container_id}"
788
+ return False, f"Docker stop failed: {result.stderr}"
789
+
790
+ except Exception as e:
791
+ return False, f"Failed to stop Docker container: {e}"