pactown 0.1.4__py3-none-any.whl → 0.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,143 @@
1
1
  """Sandbox manager for pactown services."""
2
2
 
3
+ import json
4
+ import logging
3
5
  import os
6
+ import re
4
7
  import shutil
8
+ import stat
9
+ import signal
5
10
  import subprocess
11
+ import time
12
+ import socket
6
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
7
14
  from dataclasses import dataclass, field
15
+ from datetime import datetime, UTC
8
16
  from pathlib import Path
9
- from typing import Optional, Any, Callable
10
- import signal
11
- import time
12
17
  from threading import Lock
18
+ from typing import Callable, Optional, List, Dict, Any
13
19
 
14
- from markpact import Sandbox, parse_blocks, run_cmd, ensure_venv
20
+ from markpact import Sandbox, ensure_venv
15
21
  from markpact.runner import install_deps
16
22
 
17
23
  from .config import ServiceConfig
24
+ from .markpact_blocks import parse_blocks
25
+
26
+ # Configure detailed logging
27
+ logger = logging.getLogger("pactown.sandbox")
28
+ logger.setLevel(logging.DEBUG)
29
+
30
+ # File handler for persistent logs
31
+ LOG_DIR = Path("/tmp/pactown-logs")
32
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
33
+ _log_path = str(LOG_DIR / "sandbox.log")
34
+ if not any(
35
+ isinstance(h, logging.FileHandler) and getattr(h, "baseFilename", None) == _log_path
36
+ for h in logger.handlers
37
+ ):
38
+ file_handler = logging.FileHandler(_log_path)
39
+ file_handler.setFormatter(logging.Formatter(
40
+ '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
41
+ ))
42
+ logger.addHandler(file_handler)
43
+
44
+
45
+ def _path_debug(path: Path) -> str:
46
+ try:
47
+ st = path.stat() if path.exists() else None
48
+ mode = oct(st.st_mode & 0o777) if st else "-"
49
+ uid = st.st_uid if st else "-"
50
+ gid = st.st_gid if st else "-"
51
+ except Exception:
52
+ mode, uid, gid = "?", "?", "?"
53
+ try:
54
+ readable = os.access(path, os.R_OK)
55
+ writable = os.access(path, os.W_OK)
56
+ executable = os.access(path, os.X_OK)
57
+ except Exception:
58
+ readable, writable, executable = False, False, False
59
+ return (
60
+ f"path={path} exists={path.exists()} is_dir={path.is_dir()} is_file={path.is_file()} "
61
+ f"mode={mode} uid={uid} gid={gid} access=r{int(readable)}w{int(writable)}x{int(executable)}"
62
+ )
63
+
64
+
65
+ def _escape_dotenv_value(value: str) -> str:
66
+ v = str(value)
67
+ v = v.replace("\\", "\\\\")
68
+ v = v.replace("\n", "\\n")
69
+ v = v.replace("\r", "\\r")
70
+ v = v.replace('"', '\\"')
71
+ return f'"{v}"'
72
+
73
+
74
+ def _write_dotenv_file(sandbox_path: Path, env: dict[str, str]) -> None:
75
+ lines: list[str] = []
76
+ for key, value in (env or {}).items():
77
+ if value is None:
78
+ continue
79
+ if not isinstance(key, str):
80
+ continue
81
+ if not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", key):
82
+ continue
83
+ lines.append(f"{key}={_escape_dotenv_value(str(value))}")
84
+
85
+ dotenv_path = sandbox_path / ".env"
86
+ dotenv_path.write_text("\n".join(lines) + ("\n" if lines else ""))
87
+ try:
88
+ dotenv_path.chmod(0o600)
89
+ except Exception:
90
+ pass
91
+
92
+
93
+ def _sandbox_fallback_ids() -> tuple[int, int]:
94
+ try:
95
+ uid = int(os.environ.get("PACTOWN_SANDBOX_UID", "65534"))
96
+ except Exception:
97
+ uid = 65534
98
+ try:
99
+ gid = int(os.environ.get("PACTOWN_SANDBOX_GID", str(uid)))
100
+ except Exception:
101
+ gid = uid
102
+ return uid, gid
103
+
104
+
105
+ def _chown_sandbox_tree(sandbox_path: Path, uid: int, gid: int) -> None:
106
+ try:
107
+ os.chown(sandbox_path, uid, gid)
108
+ except Exception:
109
+ pass
110
+ try:
111
+ sandbox_path.chmod(0o700)
112
+ except Exception:
113
+ pass
114
+
115
+ for root, dirnames, filenames in os.walk(sandbox_path):
116
+ if ".venv" in dirnames:
117
+ dirnames.remove(".venv")
118
+
119
+ root_path = Path(root)
120
+ try:
121
+ os.chown(root_path, uid, gid)
122
+ except Exception:
123
+ pass
124
+ try:
125
+ root_path.chmod(0o700)
126
+ except Exception:
127
+ pass
128
+
129
+ for name in filenames:
130
+ p = root_path / name
131
+ try:
132
+ st = os.lstat(p)
133
+ if stat.S_ISLNK(st.st_mode):
134
+ continue
135
+ except Exception:
136
+ continue
137
+ try:
138
+ os.chown(p, uid, gid)
139
+ except Exception:
140
+ pass
18
141
 
19
142
 
20
143
  @dataclass
@@ -26,13 +149,19 @@ class ServiceProcess:
26
149
  sandbox_path: Path
27
150
  process: Optional[subprocess.Popen] = None
28
151
  started_at: float = field(default_factory=time.time)
29
-
152
+
30
153
  @property
31
154
  def is_running(self) -> bool:
32
155
  if self.process:
33
156
  return self.process.poll() is None
34
157
  try:
35
158
  os.kill(self.pid, 0)
159
+ if self.port:
160
+ try:
161
+ with socket.create_connection(("127.0.0.1", int(self.port)), timeout=0.2):
162
+ return True
163
+ except OSError:
164
+ return False
36
165
  return True
37
166
  except OSError:
38
167
  return False
@@ -40,95 +169,339 @@ class ServiceProcess:
40
169
 
41
170
  class SandboxManager:
42
171
  """Manages sandboxes for multiple services."""
43
-
172
+
44
173
  def __init__(self, sandbox_root: str | Path):
45
174
  self.sandbox_root = Path(sandbox_root)
46
175
  self.sandbox_root.mkdir(parents=True, exist_ok=True)
47
176
  self._processes: dict[str, ServiceProcess] = {}
48
-
177
+
49
178
  def get_sandbox_path(self, service_name: str) -> Path:
50
179
  """Get sandbox path for a service."""
51
180
  return self.sandbox_root / service_name
52
-
53
- def create_sandbox(self, service: ServiceConfig, readme_path: Path) -> Sandbox:
181
+
182
+ def create_sandbox(
183
+ self,
184
+ service: ServiceConfig,
185
+ readme_path: Path,
186
+ install_dependencies: bool = True,
187
+ on_log: Optional[Callable[[str], None]] = None,
188
+ ) -> Sandbox:
54
189
  """Create a sandbox for a service from its README."""
190
+ def dbg(msg: str, level: str = "DEBUG"):
191
+ if on_log:
192
+ on_log(msg)
193
+ else:
194
+ logger.log(getattr(logging, level), f"[{service.name}] {msg}")
195
+
55
196
  sandbox_path = self.get_sandbox_path(service.name)
56
-
197
+
198
+ dbg(f"Sandbox root: {_path_debug(self.sandbox_root)}", "DEBUG")
199
+ dbg(f"Sandbox path: {_path_debug(sandbox_path)}", "DEBUG")
200
+ dbg(f"README path: {_path_debug(readme_path)}", "DEBUG")
201
+ dbg(f"UID/EUID/GID: uid={os.getuid()} euid={os.geteuid()} gid={os.getgid()}", "DEBUG")
202
+
57
203
  if sandbox_path.exists():
204
+ dbg(f"Removing existing sandbox: {sandbox_path}", "INFO")
58
205
  shutil.rmtree(sandbox_path)
59
- sandbox_path.mkdir(parents=True)
60
-
206
+ sandbox_path.mkdir(parents=True, exist_ok=False)
207
+ dbg(f"Created sandbox dir: {_path_debug(sandbox_path)}", "DEBUG")
208
+
61
209
  sandbox = Sandbox(sandbox_path)
62
-
210
+
63
211
  readme_content = readme_path.read_text()
212
+ dbg(f"Read README bytes={len(readme_content.encode('utf-8', errors='replace'))}", "DEBUG")
64
213
  blocks = parse_blocks(readme_content)
65
-
66
- deps = []
67
- run_command = None
68
-
214
+
215
+ kind_counts: dict[str, int] = {}
216
+ for b in blocks:
217
+ kind_counts[b.kind] = kind_counts.get(b.kind, 0) + 1
218
+ dbg(f"Parsed markpact blocks: total={len(blocks)} kinds={kind_counts}", "DEBUG")
219
+
220
+ deps: list[str] = []
221
+
69
222
  for block in blocks:
70
223
  if block.kind == "deps":
71
224
  deps.extend(block.body.strip().split("\n"))
72
225
  elif block.kind == "file":
73
226
  file_path = block.get_path() or "main.py"
227
+ dbg(f"Writing file: {file_path} (chars={len(block.body)})", "DEBUG")
74
228
  sandbox.write_file(file_path, block.body)
75
229
  elif block.kind == "run":
76
- run_command = block.body.strip()
77
-
78
- if deps:
79
- ensure_venv(sandbox, verbose=False)
80
- install_deps([d for d in deps if d.strip()], sandbox, verbose=False)
81
-
230
+ block.body.strip()
231
+
232
+ deps_clean = [d.strip() for d in deps if d.strip()]
233
+ if deps_clean:
234
+ # Always write requirements.txt so the sandbox can be used as a container build context
235
+ dbg(f"Dependencies detected: count={len(deps_clean)}", "INFO")
236
+ sandbox.write_requirements(deps_clean)
237
+ dbg(f"Wrote requirements.txt: {_path_debug(sandbox.path / 'requirements.txt')}", "DEBUG")
238
+
239
+ if install_dependencies:
240
+ dbg(f"Creating venv (.venv) in sandbox", "INFO")
241
+ try:
242
+ ensure_venv(sandbox, verbose=False)
243
+ dbg(f"Venv status: {_path_debug(sandbox.path / '.venv')}", "DEBUG")
244
+ except Exception as e:
245
+ dbg(f"ensure_venv failed: {e}", "ERROR")
246
+ raise
247
+ dbg("Installing dependencies via pip", "INFO")
248
+ try:
249
+ install_deps(deps_clean, sandbox, verbose=False)
250
+ dbg("Dependencies installed", "INFO")
251
+ except Exception as e:
252
+ dbg(f"install_deps failed: {e}", "ERROR")
253
+ raise
254
+ else:
255
+ dbg("No dependencies block found", "DEBUG")
256
+
82
257
  return sandbox
83
-
258
+
84
259
  def start_service(
85
260
  self,
86
261
  service: ServiceConfig,
87
262
  readme_path: Path,
88
263
  env: dict[str, str],
89
264
  verbose: bool = True,
265
+ restart_if_running: bool = False,
266
+ on_log: Optional[Callable[[str], None]] = None,
267
+ user_id: Optional[str] = None,
90
268
  ) -> ServiceProcess:
91
- """Start a service in its sandbox."""
269
+ """Start a service in its sandbox.
270
+
271
+ Args:
272
+ service: Service configuration
273
+ readme_path: Path to README.md with markpact blocks
274
+ env: Environment variables to pass to the service
275
+ verbose: Print status messages
276
+ restart_if_running: If True, stop and restart if already running
277
+ on_log: Callback for detailed logging
278
+ user_id: Optional SaaS user ID for process isolation
279
+ """
280
+ def log(msg: str, level: str = "INFO"):
281
+ logger.log(getattr(logging, level), f"[{service.name}] {msg}")
282
+ if on_log:
283
+ on_log(msg)
284
+ if verbose:
285
+ print(msg)
286
+
287
+ log(f"Starting service: {service.name}", "INFO")
288
+ log(f"Port: {service.port}, README: {readme_path}", "DEBUG")
289
+ log(f"Runner UID/EUID/GID: uid={os.getuid()} euid={os.geteuid()} gid={os.getgid()}", "DEBUG")
290
+ log(f"Sandbox root: {_path_debug(self.sandbox_root)}", "DEBUG")
291
+ log(f"README: {_path_debug(readme_path)}", "DEBUG")
292
+
92
293
  if service.name in self._processes:
93
294
  existing = self._processes[service.name]
94
295
  if existing.is_running:
95
- raise RuntimeError(f"Service {service.name} is already running")
96
-
97
- sandbox = self.create_sandbox(service, readme_path)
98
-
296
+ if restart_if_running:
297
+ log(f"Restarting {service.name}...", "INFO")
298
+ self.stop_service(service.name)
299
+ self.clean_sandbox(service.name)
300
+ else:
301
+ log(f"Service {service.name} already running", "ERROR")
302
+ raise RuntimeError(f"Service {service.name} is already running")
303
+
304
+ # Create sandbox with dependency installation
305
+ log("Creating sandbox and installing dependencies...", "INFO")
306
+ try:
307
+ sandbox = self.create_sandbox(
308
+ service,
309
+ readme_path,
310
+ install_dependencies=True,
311
+ on_log=log,
312
+ )
313
+ log(f"Sandbox created at: {sandbox.path}", "INFO")
314
+ except Exception as e:
315
+ log(f"Failed to create sandbox: {e}", "ERROR")
316
+ logger.exception(f"Sandbox creation failed for {service.name}")
317
+ raise
318
+
99
319
  readme_content = readme_path.read_text()
100
320
  blocks = parse_blocks(readme_content)
101
-
321
+
102
322
  run_command = None
103
323
  for block in blocks:
104
324
  if block.kind == "run":
105
325
  run_command = block.body.strip()
106
326
  break
107
-
327
+
108
328
  if not run_command:
329
+ log(f"No run command found in README", "ERROR")
109
330
  raise ValueError(f"No run command found in {readme_path}")
110
-
331
+
332
+ log(f"Run command: {run_command}", "DEBUG")
333
+
111
334
  full_env = os.environ.copy()
112
335
  full_env.update(env)
113
336
 
114
- if sandbox.has_venv:
337
+ # Ensure PORT is always set in environment
338
+ full_env["PORT"] = str(service.port)
339
+ full_env["MARKPACT_PORT"] = str(service.port)
340
+
341
+ dotenv_env = dict(env or {})
342
+ dotenv_env["PORT"] = str(service.port)
343
+ dotenv_env["MARKPACT_PORT"] = str(service.port)
344
+ _write_dotenv_file(sandbox.path, dotenv_env)
345
+
346
+ if sandbox.has_venv():
115
347
  venv_bin = str(sandbox.venv_bin)
116
348
  full_env["PATH"] = f"{venv_bin}:{full_env.get('PATH', '')}"
117
349
  full_env["VIRTUAL_ENV"] = str(sandbox.path / ".venv")
350
+ log(f"Using venv: {sandbox.path / '.venv'}", "DEBUG")
351
+ else:
352
+ log("WARNING: No venv found, using system Python", "WARNING")
353
+
354
+ # Expand $PORT in command
355
+ expanded_cmd = run_command.replace("$PORT", str(service.port))
356
+ expanded_cmd = expanded_cmd.replace("${PORT}", str(service.port))
357
+ expanded_cmd = expanded_cmd.replace("${MARKPACT_PORT}", str(service.port))
358
+ expanded_cmd = expanded_cmd.replace("$MARKPACT_PORT", str(service.port))
359
+
360
+ # Replace hardcoded ports in run command with the requested port
361
+ # This handles cases where LLM generates hardcoded ports like --port 8000
362
+ import re
363
+ port_patterns = [
364
+ (r'--port[=\s]+(\d+)', f'--port {service.port}'), # --port 8000 or --port=8000
365
+ (r'-p[=\s]+(\d+)', f'-p {service.port}'), # -p 8000 or -p=8000
366
+ (r':(\d{4,5})(?=\s|$|")', f':{service.port}'), # :8000 at end of string
367
+ ]
118
368
 
119
- if verbose:
120
- print(f"Starting {service.name} on port {service.port}...")
121
-
369
+ original_cmd = expanded_cmd
370
+ for pattern, replacement in port_patterns:
371
+ match = re.search(pattern, expanded_cmd)
372
+ if match:
373
+ old_port = match.group(1) if match.groups() else None
374
+ if old_port and old_port != str(service.port):
375
+ log(f"Replacing hardcoded port {old_port} with {service.port}", "INFO")
376
+ expanded_cmd = re.sub(pattern, replacement, expanded_cmd)
377
+
378
+ if expanded_cmd != original_cmd:
379
+ log(f"Port-corrected command: {expanded_cmd}", "INFO")
380
+ else:
381
+ log(f"Expanded command: {expanded_cmd}", "DEBUG")
382
+
383
+ # Remove --reload flag from uvicorn commands in sandbox environments
384
+ # --reload uses multiprocessing which can crash in Docker containers
385
+ if "--reload" in expanded_cmd and "uvicorn" in expanded_cmd:
386
+ expanded_cmd = re.sub(r'\s*--reload\s*', ' ', expanded_cmd)
387
+ log(f"Removed --reload flag (not compatible with sandbox): {expanded_cmd}", "INFO")
388
+
389
+ log(f"Starting process...", "INFO")
390
+
391
+ # Use user isolation if user_id provided
392
+ preexec = os.setsid
393
+ if user_id:
394
+ try:
395
+ from .user_isolation import get_isolation_manager
396
+ isolation = get_isolation_manager()
397
+ try:
398
+ can_isolate, reason = isolation.can_isolate()
399
+ log(f"Isolation capability: can_isolate={can_isolate} reason={reason}", "DEBUG")
400
+ except Exception as e:
401
+ log(f"Isolation capability check failed: {e}", "WARNING")
402
+ user = isolation.get_or_create_user(user_id)
403
+ log(f"🔒 Running as isolated user: {user.linux_username} (uid={user.linux_uid})", "INFO")
404
+
405
+ # Update env with user-specific settings
406
+ full_env["HOME"] = str(user.home_dir)
407
+ full_env["USER"] = user.linux_username
408
+ full_env["LOGNAME"] = user.linux_username
409
+
410
+ # Create preexec function for user switching
411
+ def preexec():
412
+ os.setsid()
413
+ if os.geteuid() == 0:
414
+ os.setgid(user.linux_gid)
415
+ os.setuid(user.linux_uid)
416
+ except Exception as e:
417
+ log(f"⚠️ User isolation not available: {e} - using sandbox uid", "WARNING")
418
+ if os.geteuid() == 0:
419
+ uid, gid = _sandbox_fallback_ids()
420
+ log(f"Sandbox uid fallback: uid={uid} gid={gid}", "DEBUG")
421
+ try:
422
+ dotenv_path = sandbox.path / ".env"
423
+ if dotenv_path.exists():
424
+ os.chown(dotenv_path, uid, gid)
425
+ except Exception:
426
+ pass
427
+ _chown_sandbox_tree(sandbox.path, uid, gid)
428
+
429
+ def preexec():
430
+ os.setsid()
431
+ os.setgid(gid)
432
+ os.setuid(uid)
433
+
434
+ # Always capture stderr for debugging
122
435
  process = subprocess.Popen(
123
- run_command,
436
+ expanded_cmd,
124
437
  shell=True,
125
438
  cwd=str(sandbox.path),
126
439
  env=full_env,
127
- stdout=subprocess.PIPE if not verbose else None,
128
- stderr=subprocess.PIPE if not verbose else None,
129
- preexec_fn=os.setsid,
440
+ stdout=subprocess.PIPE,
441
+ stderr=subprocess.PIPE,
442
+ preexec_fn=preexec,
130
443
  )
131
-
444
+
445
+ log(f"Process started with PID: {process.pid}", "INFO")
446
+
447
+ # Wait briefly for process to start (but don't block too long)
448
+ time.sleep(0.2)
449
+
450
+ # Check if process died immediately
451
+ poll_result = process.poll()
452
+ if poll_result is not None:
453
+ # Process already died - capture all output
454
+ exit_code = poll_result
455
+ stderr = ""
456
+ stdout = ""
457
+
458
+ try:
459
+ # Read all output with timeout
460
+ stdout_bytes, stderr_bytes = process.communicate(timeout=2)
461
+ stderr = stderr_bytes.decode('utf-8', errors='replace') if stderr_bytes else ""
462
+ stdout = stdout_bytes.decode('utf-8', errors='replace') if stdout_bytes else ""
463
+ except Exception as e:
464
+ log(f"Could not read process output: {e}", "WARNING")
465
+ if process.stderr:
466
+ try:
467
+ stderr = process.stderr.read().decode('utf-8', errors='replace')
468
+ except:
469
+ pass
470
+
471
+ # Interpret exit code
472
+ if exit_code < 0:
473
+ signal_name = {
474
+ -9: "SIGKILL",
475
+ -15: "SIGTERM",
476
+ -11: "SIGSEGV",
477
+ -6: "SIGABRT",
478
+ }.get(exit_code, f"signal {-exit_code}")
479
+ log(f"Process killed by {signal_name} (exit code: {exit_code})", "ERROR")
480
+ else:
481
+ log(f"Process exited with code: {exit_code}", "ERROR")
482
+
483
+ if stderr:
484
+ log(f"STDERR:\n{stderr[:2000]}", "ERROR")
485
+ if stdout:
486
+ log(f"STDOUT:\n{stdout[:1000]}", "DEBUG")
487
+
488
+ # Write to error log file
489
+ error_log = LOG_DIR / f"{service.name}_error.log"
490
+ with open(error_log, "w") as f:
491
+ f.write(f"Exit code: {exit_code}\n")
492
+ f.write(f"Command: {expanded_cmd}\n")
493
+ f.write(f"CWD: {sandbox.path}\n")
494
+ f.write(f"Venv: {sandbox.path / '.venv'}\n")
495
+ f.write(f"\n--- STDERR ---\n{stderr}\n")
496
+ f.write(f"\n--- STDOUT ---\n{stdout}\n")
497
+ # List files for debugging
498
+ try:
499
+ files = list(sandbox.path.glob("*"))
500
+ f.write(f"\n--- FILES ---\n{[str(f) for f in files]}\n")
501
+ except:
502
+ pass
503
+ log(f"Error log written to: {error_log}", "DEBUG")
504
+
132
505
  svc_process = ServiceProcess(
133
506
  name=service.name,
134
507
  pid=process.pid,
@@ -136,52 +509,80 @@ class SandboxManager:
136
509
  sandbox_path=sandbox.path,
137
510
  process=process,
138
511
  )
139
-
512
+
140
513
  self._processes[service.name] = svc_process
514
+
515
+ # Log sandbox contents for debugging
516
+ try:
517
+ files = list(sandbox.path.glob("*"))
518
+ log(f"Sandbox files: {[f.name for f in files]}", "DEBUG")
519
+ except:
520
+ pass
521
+
141
522
  return svc_process
142
-
523
+
143
524
  def stop_service(self, service_name: str, timeout: int = 10) -> bool:
144
525
  """Stop a running service."""
145
526
  if service_name not in self._processes:
527
+ logger.debug(f"Service {service_name} not in tracked processes")
146
528
  return False
147
-
529
+
148
530
  svc = self._processes[service_name]
149
-
531
+ old_pid = svc.pid
532
+
150
533
  if not svc.is_running:
534
+ logger.debug(f"Service {service_name} (PID {old_pid}) already stopped")
151
535
  del self._processes[service_name]
152
536
  return True
537
+
538
+ logger.info(f"Stopping service {service_name} (PID {old_pid})")
153
539
 
154
540
  try:
155
- os.killpg(os.getpgid(svc.pid), signal.SIGTERM)
541
+ pgid = os.getpgid(old_pid)
542
+ os.killpg(pgid, signal.SIGTERM)
543
+ logger.debug(f"Sent SIGTERM to process group {pgid}")
156
544
  except ProcessLookupError:
545
+ logger.debug(f"Process {old_pid} already gone")
157
546
  del self._processes[service_name]
158
547
  return True
159
-
548
+ except OSError as e:
549
+ logger.warning(f"Error getting pgid for {old_pid}: {e}")
550
+ # Try killing just the process
551
+ try:
552
+ os.kill(old_pid, signal.SIGTERM)
553
+ except ProcessLookupError:
554
+ pass
555
+
160
556
  deadline = time.time() + timeout
161
557
  while time.time() < deadline:
162
558
  if not svc.is_running:
163
559
  break
164
560
  time.sleep(0.1)
165
-
561
+
166
562
  if svc.is_running:
563
+ logger.warning(f"Service {service_name} didn't stop gracefully, sending SIGKILL")
167
564
  try:
168
- os.killpg(os.getpgid(svc.pid), signal.SIGKILL)
169
- except ProcessLookupError:
565
+ os.killpg(os.getpgid(old_pid), signal.SIGKILL)
566
+ except (ProcessLookupError, OSError):
170
567
  pass
171
-
568
+
172
569
  del self._processes[service_name]
570
+
571
+ # Wait for OS to clean up the process
572
+ time.sleep(0.3)
573
+ logger.info(f"Service {service_name} stopped")
173
574
  return True
174
-
575
+
175
576
  def stop_all(self, timeout: int = 10) -> None:
176
577
  """Stop all running services."""
177
578
  for name in list(self._processes.keys()):
178
579
  self.stop_service(name, timeout)
179
-
580
+
180
581
  def get_status(self, service_name: str) -> Optional[dict]:
181
582
  """Get status of a service."""
182
583
  if service_name not in self._processes:
183
584
  return None
184
-
585
+
185
586
  svc = self._processes[service_name]
186
587
  return {
187
588
  "name": svc.name,
@@ -191,7 +592,7 @@ class SandboxManager:
191
592
  "uptime": time.time() - svc.started_at,
192
593
  "sandbox": str(svc.sandbox_path),
193
594
  }
194
-
595
+
195
596
  def get_all_status(self) -> list[dict]:
196
597
  """Get status of all services."""
197
598
  return [
@@ -199,19 +600,19 @@ class SandboxManager:
199
600
  for name in self._processes
200
601
  if self.get_status(name)
201
602
  ]
202
-
603
+
203
604
  def clean_sandbox(self, service_name: str) -> None:
204
605
  """Remove sandbox directory for a service."""
205
606
  sandbox_path = self.get_sandbox_path(service_name)
206
607
  if sandbox_path.exists():
207
608
  shutil.rmtree(sandbox_path)
208
-
609
+
209
610
  def clean_all(self) -> None:
210
611
  """Remove all sandbox directories."""
211
612
  if self.sandbox_root.exists():
212
613
  shutil.rmtree(self.sandbox_root)
213
614
  self.sandbox_root.mkdir(parents=True)
214
-
615
+
215
616
  def create_sandboxes_parallel(
216
617
  self,
217
618
  services: list[tuple[ServiceConfig, Path]],
@@ -220,36 +621,36 @@ class SandboxManager:
220
621
  ) -> dict[str, Sandbox]:
221
622
  """
222
623
  Create sandboxes for multiple services in parallel.
223
-
624
+
224
625
  Args:
225
626
  services: List of (ServiceConfig, readme_path) tuples
226
627
  max_workers: Maximum parallel workers
227
628
  on_complete: Callback(name, success, duration)
228
-
629
+
229
630
  Returns:
230
631
  Dict of {service_name: Sandbox}
231
632
  """
232
633
  results: dict[str, Sandbox] = {}
233
634
  errors: dict[str, str] = {}
234
635
  lock = Lock()
235
-
636
+
236
637
  def create_one(service: ServiceConfig, readme_path: Path) -> tuple[str, Sandbox]:
237
638
  sandbox = self.create_sandbox(service, readme_path)
238
639
  return service.name, sandbox
239
-
640
+
240
641
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
241
642
  futures = {}
242
643
  start_times = {}
243
-
644
+
244
645
  for service, readme_path in services:
245
646
  start_times[service.name] = time.time()
246
647
  future = executor.submit(create_one, service, readme_path)
247
648
  futures[future] = service.name
248
-
649
+
249
650
  for future in as_completed(futures):
250
651
  name = futures[future]
251
652
  duration = time.time() - start_times[name]
252
-
653
+
253
654
  try:
254
655
  _, sandbox = future.result()
255
656
  with lock:
@@ -261,13 +662,13 @@ class SandboxManager:
261
662
  errors[name] = str(e)
262
663
  if on_complete:
263
664
  on_complete(name, False, duration)
264
-
665
+
265
666
  if errors:
266
667
  error_msg = "; ".join(f"{k}: {v}" for k, v in errors.items())
267
668
  raise RuntimeError(f"Failed to create sandboxes: {error_msg}")
268
-
669
+
269
670
  return results
270
-
671
+
271
672
  def start_services_parallel(
272
673
  self,
273
674
  services: list[tuple[ServiceConfig, Path, dict[str, str]]],
@@ -276,43 +677,43 @@ class SandboxManager:
276
677
  ) -> dict[str, ServiceProcess]:
277
678
  """
278
679
  Start multiple services in parallel.
279
-
680
+
280
681
  Note: Should only be used for services with no inter-dependencies.
281
682
  For dependent services, use the orchestrator's wave-based approach.
282
-
683
+
283
684
  Args:
284
685
  services: List of (ServiceConfig, readme_path, env) tuples
285
686
  max_workers: Maximum parallel workers
286
687
  on_complete: Callback(name, success, duration)
287
-
688
+
288
689
  Returns:
289
690
  Dict of {service_name: ServiceProcess}
290
691
  """
291
692
  results: dict[str, ServiceProcess] = {}
292
693
  errors: dict[str, str] = {}
293
694
  lock = Lock()
294
-
695
+
295
696
  def start_one(
296
- service: ServiceConfig,
297
- readme_path: Path,
697
+ service: ServiceConfig,
698
+ readme_path: Path,
298
699
  env: dict[str, str]
299
700
  ) -> tuple[str, ServiceProcess]:
300
701
  proc = self.start_service(service, readme_path, env, verbose=False)
301
702
  return service.name, proc
302
-
703
+
303
704
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
304
705
  futures = {}
305
706
  start_times = {}
306
-
707
+
307
708
  for service, readme_path, env in services:
308
709
  start_times[service.name] = time.time()
309
710
  future = executor.submit(start_one, service, readme_path, env)
310
711
  futures[future] = service.name
311
-
712
+
312
713
  for future in as_completed(futures):
313
714
  name = futures[future]
314
715
  duration = time.time() - start_times[name]
315
-
716
+
316
717
  try:
317
718
  _, proc = future.result()
318
719
  with lock:
@@ -324,5 +725,5 @@ class SandboxManager:
324
725
  errors[name] = str(e)
325
726
  if on_complete:
326
727
  on_complete(name, False, duration)
327
-
728
+
328
729
  return results, errors