autoforge-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/.claude/commands/check-code.md +32 -0
  2. package/.claude/commands/checkpoint.md +40 -0
  3. package/.claude/commands/create-spec.md +613 -0
  4. package/.claude/commands/expand-project.md +234 -0
  5. package/.claude/commands/gsd-to-autoforge-spec.md +10 -0
  6. package/.claude/commands/review-pr.md +75 -0
  7. package/.claude/templates/app_spec.template.txt +331 -0
  8. package/.claude/templates/coding_prompt.template.md +265 -0
  9. package/.claude/templates/initializer_prompt.template.md +354 -0
  10. package/.claude/templates/testing_prompt.template.md +146 -0
  11. package/.env.example +64 -0
  12. package/LICENSE.md +676 -0
  13. package/README.md +423 -0
  14. package/agent.py +444 -0
  15. package/api/__init__.py +10 -0
  16. package/api/database.py +536 -0
  17. package/api/dependency_resolver.py +449 -0
  18. package/api/migration.py +156 -0
  19. package/auth.py +83 -0
  20. package/autoforge_paths.py +315 -0
  21. package/autonomous_agent_demo.py +293 -0
  22. package/bin/autoforge.js +3 -0
  23. package/client.py +607 -0
  24. package/env_constants.py +27 -0
  25. package/examples/OPTIMIZE_CONFIG.md +230 -0
  26. package/examples/README.md +531 -0
  27. package/examples/org_config.yaml +172 -0
  28. package/examples/project_allowed_commands.yaml +139 -0
  29. package/lib/cli.js +791 -0
  30. package/mcp_server/__init__.py +1 -0
  31. package/mcp_server/feature_mcp.py +988 -0
  32. package/package.json +53 -0
  33. package/parallel_orchestrator.py +1800 -0
  34. package/progress.py +247 -0
  35. package/prompts.py +427 -0
  36. package/pyproject.toml +17 -0
  37. package/rate_limit_utils.py +132 -0
  38. package/registry.py +614 -0
  39. package/requirements-prod.txt +14 -0
  40. package/security.py +959 -0
  41. package/server/__init__.py +17 -0
  42. package/server/main.py +261 -0
  43. package/server/routers/__init__.py +32 -0
  44. package/server/routers/agent.py +177 -0
  45. package/server/routers/assistant_chat.py +327 -0
  46. package/server/routers/devserver.py +309 -0
  47. package/server/routers/expand_project.py +239 -0
  48. package/server/routers/features.py +746 -0
  49. package/server/routers/filesystem.py +514 -0
  50. package/server/routers/projects.py +524 -0
  51. package/server/routers/schedules.py +356 -0
  52. package/server/routers/settings.py +127 -0
  53. package/server/routers/spec_creation.py +357 -0
  54. package/server/routers/terminal.py +453 -0
  55. package/server/schemas.py +593 -0
  56. package/server/services/__init__.py +36 -0
  57. package/server/services/assistant_chat_session.py +496 -0
  58. package/server/services/assistant_database.py +304 -0
  59. package/server/services/chat_constants.py +57 -0
  60. package/server/services/dev_server_manager.py +557 -0
  61. package/server/services/expand_chat_session.py +399 -0
  62. package/server/services/process_manager.py +657 -0
  63. package/server/services/project_config.py +475 -0
  64. package/server/services/scheduler_service.py +683 -0
  65. package/server/services/spec_chat_session.py +502 -0
  66. package/server/services/terminal_manager.py +756 -0
  67. package/server/utils/__init__.py +1 -0
  68. package/server/utils/process_utils.py +134 -0
  69. package/server/utils/project_helpers.py +32 -0
  70. package/server/utils/validation.py +54 -0
  71. package/server/websocket.py +903 -0
  72. package/start.py +456 -0
  73. package/ui/dist/assets/index-8W_wmZzz.js +168 -0
  74. package/ui/dist/assets/index-B47Ubhox.css +1 -0
  75. package/ui/dist/assets/vendor-flow-CVNK-_lx.js +7 -0
  76. package/ui/dist/assets/vendor-query-BUABzP5o.js +1 -0
  77. package/ui/dist/assets/vendor-radix-DTNNCg2d.js +45 -0
  78. package/ui/dist/assets/vendor-react-qkC6yhPU.js +1 -0
  79. package/ui/dist/assets/vendor-utils-COeKbHgx.js +2 -0
  80. package/ui/dist/assets/vendor-xterm-DP_gxef0.js +16 -0
  81. package/ui/dist/index.html +23 -0
  82. package/ui/dist/ollama.png +0 -0
  83. package/ui/dist/vite.svg +6 -0
  84. package/ui/package.json +57 -0
@@ -0,0 +1,657 @@
1
+ """
2
+ Agent Process Manager
3
+ =====================
4
+
5
+ Manages the lifecycle of agent subprocesses per project.
6
+ Provides start/stop/pause/resume functionality with cross-platform support.
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ import os
12
+ import re
13
+ import subprocess
14
+ import sys
15
+ import threading
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Any, Awaitable, Callable, Literal, Set
19
+
20
+ import psutil
21
+
22
+ # Add parent directory to path for shared module imports
23
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
24
+ from auth import AUTH_ERROR_HELP_SERVER as AUTH_ERROR_HELP # noqa: E402
25
+ from auth import is_auth_error
26
+ from server.utils.process_utils import kill_process_tree
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Patterns for sensitive data that should be redacted from output
31
+ SENSITIVE_PATTERNS = [
32
+ r'sk-[a-zA-Z0-9]{20,}', # Anthropic API keys
33
+ r'ANTHROPIC_API_KEY=[^\s]+',
34
+ r'api[_-]?key[=:][^\s]+',
35
+ r'token[=:][^\s]+',
36
+ r'password[=:][^\s]+',
37
+ r'secret[=:][^\s]+',
38
+ r'ghp_[a-zA-Z0-9]{36,}', # GitHub personal access tokens
39
+ r'gho_[a-zA-Z0-9]{36,}', # GitHub OAuth tokens
40
+ r'ghs_[a-zA-Z0-9]{36,}', # GitHub server tokens
41
+ r'ghr_[a-zA-Z0-9]{36,}', # GitHub refresh tokens
42
+ r'aws[_-]?access[_-]?key[=:][^\s]+', # AWS keys
43
+ r'aws[_-]?secret[=:][^\s]+',
44
+ ]
45
+
46
+
47
+ def sanitize_output(line: str) -> str:
48
+ """Remove sensitive information from output lines."""
49
+ for pattern in SENSITIVE_PATTERNS:
50
+ line = re.sub(pattern, '[REDACTED]', line, flags=re.IGNORECASE)
51
+ return line
52
+
53
+
54
+ class AgentProcessManager:
55
+ """
56
+ Manages agent subprocess lifecycle for a single project.
57
+
58
+ Provides start/stop/pause/resume with cross-platform support via psutil.
59
+ Supports multiple output callbacks for WebSocket clients.
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ project_name: str,
65
+ project_dir: Path,
66
+ root_dir: Path,
67
+ ):
68
+ """
69
+ Initialize the process manager.
70
+
71
+ Args:
72
+ project_name: Name of the project
73
+ project_dir: Absolute path to the project directory
74
+ root_dir: Root directory of the autonomous-coding-ui project
75
+ """
76
+ self.project_name = project_name
77
+ self.project_dir = project_dir
78
+ self.root_dir = root_dir
79
+ self.process: subprocess.Popen | None = None
80
+ self._status: Literal["stopped", "running", "paused", "crashed"] = "stopped"
81
+ self.started_at: datetime | None = None
82
+ self._output_task: asyncio.Task | None = None
83
+ self.yolo_mode: bool = False # YOLO mode for rapid prototyping
84
+ self.model: str | None = None # Model being used
85
+ self.parallel_mode: bool = False # Parallel execution mode
86
+ self.max_concurrency: int | None = None # Max concurrent agents
87
+ self.testing_agent_ratio: int = 1 # Regression testing agents (0-3)
88
+
89
+ # Support multiple callbacks (for multiple WebSocket clients)
90
+ self._output_callbacks: Set[Callable[[str], Awaitable[None]]] = set()
91
+ self._status_callbacks: Set[Callable[[str], Awaitable[None]]] = set()
92
+ self._callbacks_lock = threading.Lock()
93
+
94
+ # Lock file to prevent multiple instances (stored in project directory)
95
+ from autoforge_paths import get_agent_lock_path
96
+ self.lock_file = get_agent_lock_path(self.project_dir)
97
+
98
+ @property
99
+ def status(self) -> Literal["stopped", "running", "paused", "crashed"]:
100
+ return self._status
101
+
102
+ @status.setter
103
+ def status(self, value: Literal["stopped", "running", "paused", "crashed"]):
104
+ old_status = self._status
105
+ self._status = value
106
+ if old_status != value:
107
+ self._notify_status_change(value)
108
+
109
+ def _notify_status_change(self, status: str) -> None:
110
+ """Notify all registered callbacks of status change."""
111
+ with self._callbacks_lock:
112
+ callbacks = list(self._status_callbacks)
113
+
114
+ for callback in callbacks:
115
+ try:
116
+ # Schedule the callback in the event loop
117
+ loop = asyncio.get_running_loop()
118
+ loop.create_task(self._safe_callback(callback, status))
119
+ except RuntimeError:
120
+ # No running event loop
121
+ pass
122
+
123
+ async def _safe_callback(self, callback: Callable, *args) -> None:
124
+ """Safely execute a callback, catching and logging any errors."""
125
+ try:
126
+ await callback(*args)
127
+ except Exception as e:
128
+ logger.warning(f"Callback error: {e}")
129
+
130
+ def add_output_callback(self, callback: Callable[[str], Awaitable[None]]) -> None:
131
+ """Add a callback for output lines."""
132
+ with self._callbacks_lock:
133
+ self._output_callbacks.add(callback)
134
+
135
+ def remove_output_callback(self, callback: Callable[[str], Awaitable[None]]) -> None:
136
+ """Remove an output callback."""
137
+ with self._callbacks_lock:
138
+ self._output_callbacks.discard(callback)
139
+
140
+ def add_status_callback(self, callback: Callable[[str], Awaitable[None]]) -> None:
141
+ """Add a callback for status changes."""
142
+ with self._callbacks_lock:
143
+ self._status_callbacks.add(callback)
144
+
145
+ def remove_status_callback(self, callback: Callable[[str], Awaitable[None]]) -> None:
146
+ """Remove a status callback."""
147
+ with self._callbacks_lock:
148
+ self._status_callbacks.discard(callback)
149
+
150
+ @property
151
+ def pid(self) -> int | None:
152
+ return self.process.pid if self.process else None
153
+
154
+ def _check_lock(self) -> bool:
155
+ """Check if another agent is already running for this project.
156
+
157
+ Uses PID + process creation time to handle PID reuse on Windows.
158
+ """
159
+ if not self.lock_file.exists():
160
+ return True
161
+
162
+ try:
163
+ lock_content = self.lock_file.read_text().strip()
164
+ # Support both legacy format (just PID) and new format (PID:CREATE_TIME)
165
+ if ":" in lock_content:
166
+ pid_str, create_time_str = lock_content.split(":", 1)
167
+ pid = int(pid_str)
168
+ stored_create_time = float(create_time_str)
169
+ else:
170
+ # Legacy format - just PID
171
+ pid = int(lock_content)
172
+ stored_create_time = None
173
+
174
+ if psutil.pid_exists(pid):
175
+ # Check if it's actually our agent process
176
+ try:
177
+ proc = psutil.Process(pid)
178
+ # Verify it's the same process using creation time (handles PID reuse)
179
+ if stored_create_time is not None:
180
+ # Allow 1 second tolerance for creation time comparison
181
+ if abs(proc.create_time() - stored_create_time) > 1.0:
182
+ # Different process reused the PID - stale lock
183
+ self.lock_file.unlink(missing_ok=True)
184
+ return True
185
+ cmdline = " ".join(proc.cmdline())
186
+ if "autonomous_agent_demo.py" in cmdline:
187
+ return False # Another agent is running
188
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
189
+ pass
190
+ # Stale lock file
191
+ self.lock_file.unlink(missing_ok=True)
192
+ return True
193
+ except (ValueError, OSError):
194
+ self.lock_file.unlink(missing_ok=True)
195
+ return True
196
+
197
+ def _create_lock(self) -> bool:
198
+ """Atomically create lock file with current process PID and creation time.
199
+
200
+ Returns:
201
+ True if lock was created successfully, False if lock already exists.
202
+ """
203
+ self.lock_file.parent.mkdir(parents=True, exist_ok=True)
204
+ if not self.process:
205
+ return False
206
+
207
+ try:
208
+ # Get process creation time for PID reuse detection
209
+ create_time = psutil.Process(self.process.pid).create_time()
210
+ lock_content = f"{self.process.pid}:{create_time}"
211
+
212
+ # Atomic lock creation using O_CREAT | O_EXCL
213
+ # This prevents TOCTOU race conditions
214
+ import os
215
+ fd = os.open(str(self.lock_file), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
216
+ os.write(fd, lock_content.encode())
217
+ os.close(fd)
218
+ return True
219
+ except FileExistsError:
220
+ # Another process beat us to it
221
+ return False
222
+ except (psutil.NoSuchProcess, OSError) as e:
223
+ logger.warning(f"Failed to create lock file: {e}")
224
+ return False
225
+
226
+ def _remove_lock(self) -> None:
227
+ """Remove lock file."""
228
+ self.lock_file.unlink(missing_ok=True)
229
+
230
+ async def _broadcast_output(self, line: str) -> None:
231
+ """Broadcast output line to all registered callbacks."""
232
+ with self._callbacks_lock:
233
+ callbacks = list(self._output_callbacks)
234
+
235
+ for callback in callbacks:
236
+ await self._safe_callback(callback, line)
237
+
238
+ async def _stream_output(self) -> None:
239
+ """Stream process output to callbacks."""
240
+ if not self.process or not self.process.stdout:
241
+ return
242
+
243
+ auth_error_detected = False
244
+ output_buffer = [] # Buffer recent lines for auth error detection
245
+
246
+ try:
247
+ loop = asyncio.get_running_loop()
248
+ while True:
249
+ # Use run_in_executor for blocking readline
250
+ line = await loop.run_in_executor(
251
+ None, self.process.stdout.readline
252
+ )
253
+ if not line:
254
+ break
255
+
256
+ decoded = line.decode("utf-8", errors="replace").rstrip()
257
+ sanitized = sanitize_output(decoded)
258
+
259
+ # Buffer recent output for auth error detection
260
+ output_buffer.append(decoded)
261
+ if len(output_buffer) > 20:
262
+ output_buffer.pop(0)
263
+
264
+ # Check for auth errors
265
+ if not auth_error_detected and is_auth_error(decoded):
266
+ auth_error_detected = True
267
+ # Broadcast auth error help message
268
+ for help_line in AUTH_ERROR_HELP.strip().split('\n'):
269
+ await self._broadcast_output(help_line)
270
+
271
+ await self._broadcast_output(sanitized)
272
+
273
+ except asyncio.CancelledError:
274
+ raise
275
+ except Exception as e:
276
+ logger.warning(f"Output streaming error: {e}")
277
+ finally:
278
+ # Check if process ended
279
+ if self.process and self.process.poll() is not None:
280
+ exit_code = self.process.returncode
281
+ if exit_code != 0 and self.status == "running":
282
+ # Check buffered output for auth errors if we haven't detected one yet
283
+ if not auth_error_detected:
284
+ combined_output = '\n'.join(output_buffer)
285
+ if is_auth_error(combined_output):
286
+ for help_line in AUTH_ERROR_HELP.strip().split('\n'):
287
+ await self._broadcast_output(help_line)
288
+ self.status = "crashed"
289
+ elif self.status == "running":
290
+ self.status = "stopped"
291
+ self._remove_lock()
292
+
293
+ async def start(
294
+ self,
295
+ yolo_mode: bool = False,
296
+ model: str | None = None,
297
+ parallel_mode: bool = False,
298
+ max_concurrency: int | None = None,
299
+ testing_agent_ratio: int = 1,
300
+ playwright_headless: bool = True,
301
+ batch_size: int = 3,
302
+ ) -> tuple[bool, str]:
303
+ """
304
+ Start the agent as a subprocess.
305
+
306
+ Args:
307
+ yolo_mode: If True, run in YOLO mode (skip testing agents)
308
+ model: Model to use (e.g., claude-opus-4-5-20251101)
309
+ parallel_mode: DEPRECATED - ignored, always uses unified orchestrator
310
+ max_concurrency: Max concurrent coding agents (1-5, default 1)
311
+ testing_agent_ratio: Number of regression testing agents (0-3, default 1)
312
+ playwright_headless: If True, run browser in headless mode
313
+
314
+ Returns:
315
+ Tuple of (success, message)
316
+ """
317
+ if self.status in ("running", "paused"):
318
+ return False, f"Agent is already {self.status}"
319
+
320
+ if not self._check_lock():
321
+ return False, "Another agent instance is already running for this project"
322
+
323
+ # Store for status queries
324
+ self.yolo_mode = yolo_mode
325
+ self.model = model
326
+ self.parallel_mode = True # Always True now (unified orchestrator)
327
+ self.max_concurrency = max_concurrency or 1
328
+ self.testing_agent_ratio = testing_agent_ratio
329
+
330
+ # Build command - unified orchestrator with --concurrency
331
+ cmd = [
332
+ sys.executable,
333
+ "-u", # Force unbuffered stdout/stderr for real-time output
334
+ str(self.root_dir / "autonomous_agent_demo.py"),
335
+ "--project-dir",
336
+ str(self.project_dir.resolve()),
337
+ ]
338
+
339
+ # Add --model flag if model is specified
340
+ if model:
341
+ cmd.extend(["--model", model])
342
+
343
+ # Add --yolo flag if YOLO mode is enabled
344
+ if yolo_mode:
345
+ cmd.append("--yolo")
346
+
347
+ # Add --concurrency flag (unified orchestrator always uses this)
348
+ cmd.extend(["--concurrency", str(max_concurrency or 1)])
349
+
350
+ # Add testing agent configuration
351
+ cmd.extend(["--testing-ratio", str(testing_agent_ratio)])
352
+
353
+ # Add --batch-size flag for multi-feature batching
354
+ cmd.extend(["--batch-size", str(batch_size)])
355
+
356
+ try:
357
+ # Start subprocess with piped stdout/stderr
358
+ # Use project_dir as cwd so Claude SDK sandbox allows access to project files
359
+ # stdin=DEVNULL prevents blocking if Claude CLI or child process tries to read stdin
360
+ # CREATE_NO_WINDOW on Windows prevents console window pop-ups
361
+ # PYTHONUNBUFFERED ensures output isn't delayed
362
+ popen_kwargs: dict[str, Any] = {
363
+ "stdin": subprocess.DEVNULL,
364
+ "stdout": subprocess.PIPE,
365
+ "stderr": subprocess.STDOUT,
366
+ "cwd": str(self.project_dir),
367
+ "env": {**os.environ, "PYTHONUNBUFFERED": "1", "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false"},
368
+ }
369
+ if sys.platform == "win32":
370
+ popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
371
+
372
+ self.process = subprocess.Popen(cmd, **popen_kwargs)
373
+
374
+ # Atomic lock creation - if it fails, another process beat us
375
+ if not self._create_lock():
376
+ # Kill the process we just started since we couldn't get the lock
377
+ self.process.terminate()
378
+ try:
379
+ self.process.wait(timeout=5)
380
+ except subprocess.TimeoutExpired:
381
+ self.process.kill()
382
+ self.process = None
383
+ return False, "Another agent instance is already running for this project"
384
+
385
+ self.started_at = datetime.now()
386
+ self.status = "running"
387
+
388
+ # Start output streaming task
389
+ self._output_task = asyncio.create_task(self._stream_output())
390
+
391
+ return True, f"Agent started with PID {self.process.pid}"
392
+ except Exception as e:
393
+ logger.exception("Failed to start agent")
394
+ return False, f"Failed to start agent: {e}"
395
+
396
+ async def stop(self) -> tuple[bool, str]:
397
+ """
398
+ Stop the agent and all its child processes (SIGTERM then SIGKILL if needed).
399
+
400
+ CRITICAL: Kills entire process tree to prevent orphaned coding/testing agents.
401
+
402
+ Returns:
403
+ Tuple of (success, message)
404
+ """
405
+ if not self.process or self.status == "stopped":
406
+ return False, "Agent is not running"
407
+
408
+ try:
409
+ # Cancel output streaming
410
+ if self._output_task:
411
+ self._output_task.cancel()
412
+ try:
413
+ await self._output_task
414
+ except asyncio.CancelledError:
415
+ pass
416
+
417
+ # CRITICAL: Kill entire process tree, not just orchestrator
418
+ # This ensures all spawned coding/testing agents are also terminated
419
+ proc = self.process # Capture reference before async call
420
+ loop = asyncio.get_running_loop()
421
+ result = await loop.run_in_executor(None, kill_process_tree, proc, 10.0)
422
+ logger.debug(
423
+ "Process tree kill result: status=%s, children=%d (terminated=%d, killed=%d)",
424
+ result.status, result.children_found,
425
+ result.children_terminated, result.children_killed
426
+ )
427
+
428
+ self._remove_lock()
429
+ self.status = "stopped"
430
+ self.process = None
431
+ self.started_at = None
432
+ self.yolo_mode = False # Reset YOLO mode
433
+ self.model = None # Reset model
434
+ self.parallel_mode = False # Reset parallel mode
435
+ self.max_concurrency = None # Reset concurrency
436
+ self.testing_agent_ratio = 1 # Reset testing ratio
437
+
438
+ return True, "Agent stopped"
439
+ except Exception as e:
440
+ logger.exception("Failed to stop agent")
441
+ return False, f"Failed to stop agent: {e}"
442
+
443
+ async def pause(self) -> tuple[bool, str]:
444
+ """
445
+ Pause the agent using psutil for cross-platform support.
446
+
447
+ Returns:
448
+ Tuple of (success, message)
449
+ """
450
+ if not self.process or self.status != "running":
451
+ return False, "Agent is not running"
452
+
453
+ try:
454
+ proc = psutil.Process(self.process.pid)
455
+ proc.suspend()
456
+ self.status = "paused"
457
+ return True, "Agent paused"
458
+ except psutil.NoSuchProcess:
459
+ self.status = "crashed"
460
+ self._remove_lock()
461
+ return False, "Agent process no longer exists"
462
+ except Exception as e:
463
+ logger.exception("Failed to pause agent")
464
+ return False, f"Failed to pause agent: {e}"
465
+
466
+ async def resume(self) -> tuple[bool, str]:
467
+ """
468
+ Resume a paused agent.
469
+
470
+ Returns:
471
+ Tuple of (success, message)
472
+ """
473
+ if not self.process or self.status != "paused":
474
+ return False, "Agent is not paused"
475
+
476
+ try:
477
+ proc = psutil.Process(self.process.pid)
478
+ proc.resume()
479
+ self.status = "running"
480
+ return True, "Agent resumed"
481
+ except psutil.NoSuchProcess:
482
+ self.status = "crashed"
483
+ self._remove_lock()
484
+ return False, "Agent process no longer exists"
485
+ except Exception as e:
486
+ logger.exception("Failed to resume agent")
487
+ return False, f"Failed to resume agent: {e}"
488
+
489
+ async def healthcheck(self) -> bool:
490
+ """
491
+ Check if the agent process is still alive.
492
+
493
+ Updates status to 'crashed' if process has died unexpectedly.
494
+
495
+ Returns:
496
+ True if healthy, False otherwise
497
+ """
498
+ if not self.process:
499
+ return self.status == "stopped"
500
+
501
+ poll = self.process.poll()
502
+ if poll is not None:
503
+ # Process has terminated
504
+ if self.status in ("running", "paused"):
505
+ self.status = "crashed"
506
+ self._remove_lock()
507
+ return False
508
+
509
+ return True
510
+
511
+ def get_status_dict(self) -> dict:
512
+ """Get current status as a dictionary."""
513
+ return {
514
+ "status": self.status,
515
+ "pid": self.pid,
516
+ "started_at": self.started_at.isoformat() if self.started_at else None,
517
+ "yolo_mode": self.yolo_mode,
518
+ "model": self.model,
519
+ "parallel_mode": self.parallel_mode,
520
+ "max_concurrency": self.max_concurrency,
521
+ "testing_agent_ratio": self.testing_agent_ratio,
522
+ }
523
+
524
+
525
+ # Global registry of process managers per project with thread safety
526
+ # Key is (project_name, resolved_project_dir) to prevent cross-project contamination
527
+ # when different projects share the same name but have different paths
528
+ _managers: dict[tuple[str, str], AgentProcessManager] = {}
529
+ _managers_lock = threading.Lock()
530
+
531
+
532
+ def get_manager(project_name: str, project_dir: Path, root_dir: Path) -> AgentProcessManager:
533
+ """Get or create a process manager for a project (thread-safe).
534
+
535
+ Args:
536
+ project_name: Name of the project
537
+ project_dir: Absolute path to the project directory
538
+ root_dir: Root directory of the autonomous-coding-ui project
539
+ """
540
+ with _managers_lock:
541
+ # Use composite key to prevent cross-project UI contamination (#71)
542
+ key = (project_name, str(project_dir.resolve()))
543
+ if key not in _managers:
544
+ _managers[key] = AgentProcessManager(project_name, project_dir, root_dir)
545
+ return _managers[key]
546
+
547
+
548
+ async def cleanup_all_managers() -> None:
549
+ """Stop all running agents. Called on server shutdown."""
550
+ with _managers_lock:
551
+ managers = list(_managers.values())
552
+
553
+ for manager in managers:
554
+ try:
555
+ if manager.status != "stopped":
556
+ await manager.stop()
557
+ except Exception as e:
558
+ logger.warning(f"Error stopping manager for {manager.project_name}: {e}")
559
+
560
+ with _managers_lock:
561
+ _managers.clear()
562
+
563
+
564
+ def cleanup_orphaned_locks() -> int:
565
+ """
566
+ Clean up orphaned lock files from previous server runs.
567
+
568
+ Scans all registered projects for .agent.lock files and removes them
569
+ if the referenced process is no longer running.
570
+
571
+ Returns:
572
+ Number of orphaned lock files cleaned up
573
+ """
574
+ import sys
575
+ root = Path(__file__).parent.parent.parent
576
+ if str(root) not in sys.path:
577
+ sys.path.insert(0, str(root))
578
+
579
+ from registry import list_registered_projects
580
+
581
+ cleaned = 0
582
+ try:
583
+ projects = list_registered_projects()
584
+ for name, info in projects.items():
585
+ project_path = Path(info.get("path", ""))
586
+ if not project_path.exists():
587
+ continue
588
+
589
+ # Check both legacy and new locations for lock files
590
+ from autoforge_paths import get_autoforge_dir
591
+ lock_locations = [
592
+ project_path / ".agent.lock",
593
+ get_autoforge_dir(project_path) / ".agent.lock",
594
+ ]
595
+ lock_file = None
596
+ for candidate in lock_locations:
597
+ if candidate.exists():
598
+ lock_file = candidate
599
+ break
600
+ if lock_file is None:
601
+ continue
602
+
603
+ try:
604
+ lock_content = lock_file.read_text().strip()
605
+ # Support both legacy format (just PID) and new format (PID:CREATE_TIME)
606
+ if ":" in lock_content:
607
+ pid_str, create_time_str = lock_content.split(":", 1)
608
+ pid = int(pid_str)
609
+ stored_create_time = float(create_time_str)
610
+ else:
611
+ # Legacy format - just PID
612
+ pid = int(lock_content)
613
+ stored_create_time = None
614
+
615
+ # Check if process is still running
616
+ if psutil.pid_exists(pid):
617
+ try:
618
+ proc = psutil.Process(pid)
619
+ # Verify it's the same process using creation time (handles PID reuse)
620
+ if stored_create_time is not None:
621
+ if abs(proc.create_time() - stored_create_time) > 1.0:
622
+ # Different process reused the PID - stale lock
623
+ lock_file.unlink(missing_ok=True)
624
+ cleaned += 1
625
+ logger.info("Removed orphaned lock file for project '%s' (PID reused)", name)
626
+ continue
627
+ cmdline = " ".join(proc.cmdline())
628
+ if "autonomous_agent_demo.py" in cmdline:
629
+ # Process is still running, don't remove
630
+ logger.info(
631
+ "Found running agent for project '%s' (PID %d)",
632
+ name, pid
633
+ )
634
+ continue
635
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
636
+ pass
637
+
638
+ # Process not running or not our agent - remove stale lock
639
+ lock_file.unlink(missing_ok=True)
640
+ cleaned += 1
641
+ logger.info("Removed orphaned lock file for project '%s'", name)
642
+
643
+ except (ValueError, OSError) as e:
644
+ # Invalid lock file content - remove it
645
+ logger.warning(
646
+ "Removing invalid lock file for project '%s': %s", name, e
647
+ )
648
+ lock_file.unlink(missing_ok=True)
649
+ cleaned += 1
650
+
651
+ except Exception as e:
652
+ logger.error("Error during orphan cleanup: %s", e)
653
+
654
+ if cleaned:
655
+ logger.info("Cleaned up %d orphaned lock file(s)", cleaned)
656
+
657
+ return cleaned