pactown 0.1.4__py3-none-any.whl → 0.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1201 @@
1
+ """
2
+ High-level service runner for markpact projects.
3
+
4
+ Provides a simple API to run services directly from markdown content
5
+ with health checks, restart support, and endpoint testing.
6
+ """
7
+
8
+ import asyncio
9
+ import os
10
+ import shutil
11
+ import signal
12
+ import subprocess
13
+ import sys
14
+ import time
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+ from pathlib import Path
18
+ from typing import Any, Callable, Dict, List, Optional
19
+
20
+ import httpx
21
+
22
+
23
+ class ErrorCategory(str, Enum):
24
+ """Categorized error types for better diagnostics."""
25
+ NONE = "none"
26
+ VALIDATION = "validation" # Markpact content issues
27
+ DEPENDENCY = "dependency" # pip install failures
28
+ PORT_CONFLICT = "port_conflict" # Address already in use
29
+ STARTUP_TIMEOUT = "startup_timeout" # Server didn't respond in time
30
+ PROCESS_CRASH = "process_crash" # Process died unexpectedly
31
+ ENVIRONMENT = "environment" # Python/venv issues
32
+ PERMISSION = "permission" # File/directory access issues
33
+ UNKNOWN = "unknown"
34
+
35
+
36
+ @dataclass
37
+ class DiagnosticInfo:
38
+ """Environment diagnostics for debugging."""
39
+ python_version: str = ""
40
+ pip_version: str = ""
41
+ disk_space_mb: int = 0
42
+ sandbox_path: str = ""
43
+ venv_exists: bool = False
44
+ installed_packages: List[str] = field(default_factory=list)
45
+
46
+ @classmethod
47
+ def collect(cls, sandbox_path: Optional[Path] = None) -> "DiagnosticInfo":
48
+ """Collect diagnostic information."""
49
+ info = cls()
50
+
51
+ # Python version
52
+ info.python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
53
+
54
+ # Pip version
55
+ try:
56
+ result = subprocess.run(
57
+ [sys.executable, "-m", "pip", "--version"],
58
+ capture_output=True, text=True, timeout=5
59
+ )
60
+ if result.returncode == 0:
61
+ info.pip_version = result.stdout.split()[1]
62
+ except:
63
+ pass
64
+
65
+ # Disk space
66
+ try:
67
+ path = sandbox_path or Path("/tmp")
68
+ stat = shutil.disk_usage(str(path))
69
+ info.disk_space_mb = stat.free // (1024 * 1024)
70
+ except:
71
+ pass
72
+
73
+ if sandbox_path:
74
+ info.sandbox_path = str(sandbox_path)
75
+ info.venv_exists = (sandbox_path / ".venv").exists()
76
+
77
+ return info
78
+
79
+
80
+ @dataclass
81
+ class AutoFixSuggestion:
82
+ """Actionable suggestion to fix an error."""
83
+ action: str # e.g., "install_dependency", "change_port", "restart"
84
+ description: str
85
+ command: Optional[str] = None
86
+ auto_fixable: bool = False
87
+
88
+
89
+ # Reserved/system ports that should never be killed by pactown
90
+ PROTECTED_PORTS = frozenset({
91
+ 22, # SSH
92
+ 80, # HTTP (Traefik, nginx, etc.)
93
+ 443, # HTTPS (Traefik, nginx, etc.)
94
+ 5432, # PostgreSQL
95
+ 6379, # Redis
96
+ 3306, # MySQL
97
+ 27017, # MongoDB
98
+ 8080, # Common proxy port
99
+ 3000, # Common dev server (Next.js, etc.)
100
+ })
101
+
102
+
103
+ def kill_process_on_port(port: int, force: bool = False) -> bool:
104
+ """Kill any process using the specified port.
105
+
106
+ Uses /proc filesystem to find processes (works in minimal containers).
107
+ Returns True if a process was killed, False otherwise.
108
+
109
+ Args:
110
+ port: The port number to clear
111
+ force: If True, bypass protected port check (use with caution)
112
+
113
+ Note:
114
+ Ports in PROTECTED_PORTS (80, 443, 22, etc.) are protected by default
115
+ to prevent accidentally killing system services like Traefik.
116
+ """
117
+ # Safety check: don't kill processes on protected system ports
118
+ if not force and port in PROTECTED_PORTS:
119
+ return False
120
+
121
+ # Additional safety: don't kill on ports below 1024 (privileged) unless forced
122
+ if not force and port < 1024:
123
+ return False
124
+
125
+ killed = False
126
+
127
+ # Method 1: Check /proc/net/tcp for listening sockets
128
+ try:
129
+ hex_port = f"{port:04X}"
130
+ with open("/proc/net/tcp", "r") as f:
131
+ for line in f:
132
+ parts = line.split()
133
+ if len(parts) < 10:
134
+ continue
135
+ local_addr = parts[1]
136
+ if local_addr.endswith(f":{hex_port}"):
137
+ # Found a socket on this port, find the inode
138
+ inode = parts[9]
139
+ # Search for process with this inode
140
+ for pid_dir in os.listdir("/proc"):
141
+ if not pid_dir.isdigit():
142
+ continue
143
+ try:
144
+ fd_dir = f"/proc/{pid_dir}/fd"
145
+ for fd in os.listdir(fd_dir):
146
+ try:
147
+ link = os.readlink(f"{fd_dir}/{fd}")
148
+ if f"socket:[{inode}]" in link:
149
+ pid = int(pid_dir)
150
+ if pid > 1: # Don't kill init
151
+ os.kill(pid, signal.SIGKILL)
152
+ killed = True
153
+ except (OSError, PermissionError):
154
+ pass
155
+ except (OSError, PermissionError):
156
+ pass
157
+ except (FileNotFoundError, PermissionError):
158
+ pass
159
+
160
+ # Method 2: Fallback - try lsof/fuser if available
161
+ if not killed:
162
+ try:
163
+ result = subprocess.run(
164
+ ["lsof", "-ti", f":{port}"],
165
+ capture_output=True,
166
+ text=True,
167
+ )
168
+ if result.returncode == 0 and result.stdout.strip():
169
+ for pid in result.stdout.strip().split('\n'):
170
+ try:
171
+ os.kill(int(pid), signal.SIGKILL)
172
+ killed = True
173
+ except (ProcessLookupError, ValueError):
174
+ pass
175
+ except FileNotFoundError:
176
+ pass
177
+
178
+ if not killed:
179
+ try:
180
+ result = subprocess.run(["fuser", "-k", f"{port}/tcp"], capture_output=True)
181
+ killed = result.returncode == 0
182
+ except FileNotFoundError:
183
+ pass
184
+
185
+ # Give the OS a moment to clean up
186
+ if killed:
187
+ time.sleep(0.5)
188
+
189
+ return killed
190
+
191
+ from .config import ServiceConfig
192
+ from .markpact_blocks import parse_blocks, Block
193
+ from .sandbox_manager import SandboxManager, ServiceProcess, _write_dotenv_file
194
+
195
+
196
+ @dataclass
197
+ class RunResult:
198
+ """Result of running a service with detailed diagnostics."""
199
+ success: bool
200
+ port: int
201
+ pid: Optional[int] = None
202
+ message: str = ""
203
+ logs: List[str] = field(default_factory=list)
204
+ service_name: Optional[str] = None
205
+ sandbox_path: Optional[Path] = None
206
+ # Enhanced error reporting
207
+ error_category: ErrorCategory = ErrorCategory.NONE
208
+ diagnostics: Optional[DiagnosticInfo] = None
209
+ suggestions: List[AutoFixSuggestion] = field(default_factory=list)
210
+ stderr_output: str = "" # Captured stderr for debugging
211
+
212
+ def to_dict(self) -> dict:
213
+ """Convert to dictionary for JSON serialization."""
214
+ return {
215
+ "success": self.success,
216
+ "port": self.port,
217
+ "pid": self.pid,
218
+ "message": self.message,
219
+ "logs": self.logs,
220
+ "service_name": self.service_name,
221
+ "sandbox_path": str(self.sandbox_path) if self.sandbox_path else None,
222
+ "error_category": self.error_category.value,
223
+ "diagnostics": {
224
+ "python_version": self.diagnostics.python_version,
225
+ "pip_version": self.diagnostics.pip_version,
226
+ "disk_space_mb": self.diagnostics.disk_space_mb,
227
+ "sandbox_path": self.diagnostics.sandbox_path,
228
+ "venv_exists": self.diagnostics.venv_exists,
229
+ } if self.diagnostics else None,
230
+ "suggestions": [
231
+ {"action": s.action, "description": s.description,
232
+ "command": s.command, "auto_fixable": s.auto_fixable}
233
+ for s in self.suggestions
234
+ ],
235
+ "stderr_output": self.stderr_output,
236
+ }
237
+
238
+
239
+ @dataclass
240
+ class EndpointTestResult:
241
+ """Result of testing an endpoint."""
242
+ endpoint: str
243
+ success: bool
244
+ status: Optional[int] = None
245
+ error: Optional[str] = None
246
+ url: str = ""
247
+ response_time_ms: Optional[float] = None
248
+
249
+
250
+ @dataclass
251
+ class ValidationResult:
252
+ """Result of validating markpact content."""
253
+ valid: bool
254
+ errors: List[str] = field(default_factory=list)
255
+ file_count: int = 0
256
+ deps_count: int = 0
257
+ has_run: bool = False
258
+ has_health: bool = False
259
+
260
+
261
+ class ServiceRunner:
262
+ """
263
+ High-level service runner for markpact projects.
264
+
265
+ Usage:
266
+ runner = ServiceRunner("/tmp/sandboxes")
267
+ result = await runner.run_from_content(
268
+ service_id="my-service",
269
+ content="# My API\\n```python markpact:file path=main.py...",
270
+ port=8000,
271
+ user_id="user123", # Optional: for security policy enforcement
272
+ )
273
+ if result.success:
274
+ print(f"Running on port {result.port}")
275
+ """
276
+
277
+ def __init__(
278
+ self,
279
+ sandbox_root: str | Path = "/tmp/pactown-sandboxes",
280
+ default_health_check: str = "/health",
281
+ health_timeout: int = 10,
282
+ security_policy: Optional["SecurityPolicy"] = None,
283
+ enable_fast_start: bool = True,
284
+ ):
285
+ self.sandbox_root = Path(sandbox_root)
286
+ self.sandbox_root.mkdir(parents=True, exist_ok=True)
287
+ self.sandbox_manager = SandboxManager(self.sandbox_root)
288
+ self.default_health_check = default_health_check
289
+ self.health_timeout = health_timeout
290
+ self._services: Dict[str, str] = {} # external_id -> service_name
291
+ self._service_users: Dict[str, str] = {} # service_id -> user_id
292
+
293
+ # Security policy - use provided or get global default
294
+ from .security import get_security_policy
295
+ self.security_policy = security_policy or get_security_policy()
296
+
297
+ # Fast start - dependency caching for faster startup
298
+ self.enable_fast_start = enable_fast_start
299
+ if enable_fast_start:
300
+ from .fast_start import FastServiceStarter
301
+ self.fast_starter = FastServiceStarter(
302
+ sandbox_root=self.sandbox_root,
303
+ enable_caching=True,
304
+ enable_pool=True,
305
+ )
306
+ else:
307
+ self.fast_starter = None
308
+
309
+ def validate_content(self, content: str) -> ValidationResult:
310
+ """Validate markpact content before running."""
311
+ errors = []
312
+
313
+ try:
314
+ blocks = parse_blocks(content)
315
+ except Exception as e:
316
+ return ValidationResult(valid=False, errors=[f"Parse error: {e}"])
317
+
318
+ file_count = sum(1 for b in blocks if b.kind == "file")
319
+ deps_count = sum(
320
+ len(b.body.strip().split('\n'))
321
+ for b in blocks if b.kind == "deps"
322
+ )
323
+ has_run = any(b.kind == "run" for b in blocks)
324
+ has_health = any(b.kind == "health" or b.kind == "healthcheck" for b in blocks)
325
+
326
+ if file_count == 0:
327
+ errors.append("No files found. Add ```python markpact:file path=main.py``` blocks.")
328
+
329
+ if not has_run:
330
+ errors.append("No run command. Add ```bash markpact:run``` block.")
331
+
332
+ return ValidationResult(
333
+ valid=len(errors) == 0,
334
+ errors=errors,
335
+ file_count=file_count,
336
+ deps_count=deps_count,
337
+ has_run=has_run,
338
+ has_health=has_health,
339
+ )
340
+
341
+ def _prune_stale_user_services(
342
+ self,
343
+ user_id: str,
344
+ on_log: Optional[Callable[[str], None]] = None,
345
+ ) -> None:
346
+ if not user_id or user_id == "anonymous":
347
+ return
348
+
349
+ for service_id, owner_id in list(self._service_users.items()):
350
+ if owner_id != user_id:
351
+ continue
352
+
353
+ service_name = self._services.get(service_id)
354
+ running = False
355
+ if service_name:
356
+ status = self.sandbox_manager.get_status(service_name)
357
+ running = bool(status and status.get("running"))
358
+
359
+ if running:
360
+ continue
361
+
362
+ if on_log:
363
+ on_log(f"Pruning stale service: service_id={service_id} running={running}")
364
+
365
+ try:
366
+ self.security_policy.unregister_service(user_id, service_id)
367
+ except Exception:
368
+ pass
369
+
370
+ if service_name:
371
+ try:
372
+ self.sandbox_manager.stop_service(service_name)
373
+ except Exception:
374
+ pass
375
+
376
+ if service_id in self._services:
377
+ del self._services[service_id]
378
+ if service_id in self._service_users:
379
+ del self._service_users[service_id]
380
+
381
+ async def run_from_content(
382
+ self,
383
+ service_id: str,
384
+ content: str,
385
+ port: int,
386
+ env: Optional[Dict[str, str]] = None,
387
+ restart_if_running: bool = True,
388
+ wait_for_health: bool = True,
389
+ health_timeout: Optional[int] = None,
390
+ on_log: Optional[Callable[[str], None]] = None,
391
+ user_id: Optional[str] = None,
392
+ user_profile: Optional[Dict[str, Any]] = None,
393
+ ) -> RunResult:
394
+ """
395
+ Run a service directly from markdown content.
396
+
397
+ Args:
398
+ service_id: Unique identifier for the service
399
+ content: Markdown content with markpact blocks
400
+ port: Port to run the service on
401
+ env: Additional environment variables
402
+ restart_if_running: Restart if already running
403
+ wait_for_health: Wait for health check to pass
404
+ health_timeout: Health check timeout (default: self.health_timeout)
405
+ on_log: Callback for log messages
406
+ user_id: User ID for security policy enforcement
407
+ user_profile: User profile dict (tier, limits) for security checks
408
+
409
+ Returns:
410
+ RunResult with success status, logs, and service info
411
+ """
412
+ logs: List[str] = []
413
+ service_name = f"service_{service_id}"
414
+ effective_user_id = user_id or "anonymous"
415
+
416
+ def log(msg: str):
417
+ logs.append(msg)
418
+ if on_log:
419
+ on_log(msg)
420
+
421
+ # Set user profile if provided
422
+ if user_profile and user_id:
423
+ from .security import UserProfile
424
+ profile = UserProfile.from_dict({**user_profile, "user_id": user_id})
425
+ self.security_policy.set_user_profile(profile)
426
+
427
+ # Clean up stale services before enforcing concurrent limits
428
+ self._prune_stale_user_services(effective_user_id, on_log=log)
429
+
430
+ # Security check - can this user start a service?
431
+ security_check = await self.security_policy.check_can_start_service(
432
+ user_id=effective_user_id,
433
+ service_id=service_id,
434
+ port=port,
435
+ )
436
+
437
+ if not security_check.allowed:
438
+ log(f"🔒 Security: {security_check.reason}")
439
+ return RunResult(
440
+ success=False,
441
+ port=port,
442
+ message=security_check.reason or "Security check failed",
443
+ logs=logs,
444
+ error_category=ErrorCategory.PERMISSION,
445
+ )
446
+
447
+ # Apply throttle delay if server is under load
448
+ if security_check.delay_seconds > 0:
449
+ log(f"⏳ Server under load, waiting {security_check.delay_seconds:.1f}s...")
450
+ await asyncio.sleep(security_check.delay_seconds)
451
+
452
+ # Check if already running (tracked by sandbox manager)
453
+ status = self.sandbox_manager.get_status(service_name)
454
+ if status and status.get("running"):
455
+ if restart_if_running:
456
+ log(f"Service {service_name} is running (PID: {status.get('pid')}), restarting...")
457
+ try:
458
+ self.sandbox_manager.stop_service(service_name)
459
+ self.sandbox_manager.clean_sandbox(service_name)
460
+ # Wait for cleanup to complete
461
+ await asyncio.sleep(0.5)
462
+ log("Previous instance stopped")
463
+ except Exception as e:
464
+ log(f"Warning: could not stop previous instance: {e}")
465
+ else:
466
+ return RunResult(
467
+ success=False,
468
+ port=port,
469
+ message="Service is already running",
470
+ logs=logs,
471
+ service_name=service_name,
472
+ )
473
+
474
+ # Kill any orphan process on the port (handles container restarts)
475
+ killed = kill_process_on_port(port)
476
+ if killed:
477
+ log(f"Killed orphan process on port {port}")
478
+ await asyncio.sleep(0.3) # Wait for port to be released
479
+
480
+ # Validate content
481
+ validation = self.validate_content(content)
482
+ log(f"Found {validation.file_count} files, {validation.deps_count} dependencies")
483
+
484
+ if not validation.valid:
485
+ for err in validation.errors:
486
+ log(f"❌ {err}")
487
+ return RunResult(
488
+ success=False,
489
+ port=port,
490
+ message=validation.errors[0] if validation.errors else "Validation failed",
491
+ logs=logs,
492
+ )
493
+
494
+ # Create temporary README file
495
+ readme_path = self.sandbox_root / f"{service_name}_README.md"
496
+ readme_path.write_text(content)
497
+
498
+ # Create ServiceConfig
499
+ service_env = {"PORT": str(port)}
500
+ if env:
501
+ service_env.update(env)
502
+
503
+ service_config = ServiceConfig(
504
+ name=service_name,
505
+ readme=str(readme_path),
506
+ port=port,
507
+ env=service_env,
508
+ health_check=self.default_health_check,
509
+ )
510
+
511
+ try:
512
+ log(f"Creating sandbox for {service_name}")
513
+
514
+ # Start service with detailed logging and user isolation
515
+ process = self.sandbox_manager.start_service(
516
+ service=service_config,
517
+ readme_path=readme_path,
518
+ env=service_env,
519
+ verbose=False,
520
+ restart_if_running=False, # Already handled above
521
+ on_log=log, # Pass log callback for detailed logging
522
+ user_id=effective_user_id if effective_user_id != "anonymous" else None,
523
+ )
524
+
525
+ log(f"Sandbox created: {process.sandbox_path}")
526
+ log(f"Process started with PID: {process.pid}")
527
+
528
+ # Check if process died immediately after startup
529
+ if process.process and process.process.poll() is not None:
530
+ exit_code = process.process.returncode
531
+ stderr = ""
532
+ if process.process.stderr:
533
+ try:
534
+ stderr = process.process.stderr.read().decode()[:1000]
535
+ except:
536
+ pass
537
+ log(f"⚠️ Process exited immediately with code {exit_code}")
538
+ if stderr:
539
+ log(f"STDERR: {stderr[:500]}")
540
+
541
+ # Wait for health check
542
+ if wait_for_health:
543
+ timeout = health_timeout or self.health_timeout
544
+ log("Waiting for server to start...")
545
+
546
+ health_result = await self._wait_for_health(
547
+ process=process,
548
+ port=port,
549
+ timeout=timeout,
550
+ on_log=log,
551
+ )
552
+
553
+ if not health_result["success"]:
554
+ # Cleanup
555
+ self.sandbox_manager.stop_service(service_name)
556
+ self.sandbox_manager.clean_sandbox(service_name)
557
+
558
+ error_cat = health_result.get("error_category", ErrorCategory.STARTUP_TIMEOUT)
559
+ stderr_out = health_result.get("stderr", "")
560
+ suggestions = self._generate_suggestions(error_cat, stderr_out, port)
561
+
562
+ log("❌ Server failed to start - check dependencies and code")
563
+ return RunResult(
564
+ success=False,
565
+ port=port,
566
+ message=f"Server failed to start within {timeout} seconds",
567
+ logs=logs,
568
+ error_category=error_cat,
569
+ stderr_output=stderr_out,
570
+ suggestions=suggestions,
571
+ diagnostics=DiagnosticInfo.collect(process.sandbox_path),
572
+ )
573
+
574
+ # Track mapping
575
+ self._services[service_id] = service_name
576
+ self._service_users[service_id] = effective_user_id
577
+
578
+ # Register with security policy for concurrent service tracking
579
+ self.security_policy.register_service(effective_user_id, service_id)
580
+
581
+ log(f"✓ Project running on http://localhost:{port}")
582
+ log(f" PID: {process.pid}")
583
+
584
+ return RunResult(
585
+ success=True,
586
+ port=port,
587
+ pid=process.pid,
588
+ message=f"Service running on port {port}",
589
+ logs=logs,
590
+ service_name=service_name,
591
+ sandbox_path=process.sandbox_path,
592
+ )
593
+
594
+ except Exception as e:
595
+ log(f"Error starting service: {e}")
596
+ return RunResult(
597
+ success=False,
598
+ port=port,
599
+ message=f"Failed to start: {e}",
600
+ logs=logs,
601
+ )
602
+
603
+ def _generate_suggestions(
604
+ self,
605
+ error_cat: ErrorCategory,
606
+ stderr: str,
607
+ port: int
608
+ ) -> List[AutoFixSuggestion]:
609
+ """Generate actionable suggestions based on error type."""
610
+ suggestions = []
611
+
612
+ if error_cat == ErrorCategory.PORT_CONFLICT:
613
+ suggestions.append(AutoFixSuggestion(
614
+ action="kill_port_process",
615
+ description=f"Kill process using port {port}",
616
+ command=f"fuser -k {port}/tcp",
617
+ auto_fixable=True,
618
+ ))
619
+ suggestions.append(AutoFixSuggestion(
620
+ action="use_different_port",
621
+ description="Try a different port",
622
+ ))
623
+
624
+ elif error_cat == ErrorCategory.DEPENDENCY:
625
+ # Extract failed package from stderr
626
+ if "No matching distribution" in stderr:
627
+ pkg = stderr.split("No matching distribution found for")[-1].split()[0] if "for" in stderr else "unknown"
628
+ suggestions.append(AutoFixSuggestion(
629
+ action="check_package_name",
630
+ description=f"Package '{pkg}' not found - check spelling or availability",
631
+ ))
632
+ suggestions.append(AutoFixSuggestion(
633
+ action="clear_cache",
634
+ description="Clear pip cache and retry",
635
+ command="pip cache purge",
636
+ auto_fixable=True,
637
+ ))
638
+
639
+ elif error_cat == ErrorCategory.PROCESS_CRASH:
640
+ if "SyntaxError" in stderr:
641
+ suggestions.append(AutoFixSuggestion(
642
+ action="fix_syntax",
643
+ description="Fix Python syntax error in code",
644
+ ))
645
+ if "ModuleNotFoundError" in stderr or "ImportError" in stderr or "No module named" in stderr:
646
+ # Extract module name
647
+ if "No module named" in stderr:
648
+ module = stderr.split("No module named")[-1].strip().split()[0].strip("'\"")
649
+ suggestions.append(AutoFixSuggestion(
650
+ action="add_dependency",
651
+ description=f"Add '{module}' to dependencies block",
652
+ ))
653
+ else:
654
+ suggestions.append(AutoFixSuggestion(
655
+ action="check_imports",
656
+ description="Check that all imported modules are in dependencies",
657
+ ))
658
+ if "Address already in use" in stderr:
659
+ suggestions.append(AutoFixSuggestion(
660
+ action="kill_port_process",
661
+ description=f"Kill process using port {port}",
662
+ command=f"fuser -k {port}/tcp",
663
+ auto_fixable=True,
664
+ ))
665
+ if "Traceback" in stderr and not suggestions:
666
+ # Generic crash - suggest checking logs
667
+ suggestions.append(AutoFixSuggestion(
668
+ action="check_code",
669
+ description="Review code for runtime errors - see stderr_output for full traceback",
670
+ ))
671
+
672
+ elif error_cat == ErrorCategory.STARTUP_TIMEOUT:
673
+ suggestions.append(AutoFixSuggestion(
674
+ action="increase_timeout",
675
+ description="Increase health check timeout for slow dependencies",
676
+ ))
677
+ suggestions.append(AutoFixSuggestion(
678
+ action="check_run_command",
679
+ description="Verify the run command starts a web server",
680
+ ))
681
+
682
+ return suggestions
683
+
684
+ async def _wait_for_health(
685
+ self,
686
+ process: ServiceProcess,
687
+ port: int,
688
+ timeout: int,
689
+ on_log: Callable[[str], None],
690
+ ) -> dict:
691
+ """Wait for service to pass health check.
692
+
693
+ Returns dict with: success, error_category, stderr
694
+ """
695
+ attempts = timeout * 2 # Check every 0.5s
696
+ stderr_output = ""
697
+
698
+ for _ in range(attempts):
699
+ await asyncio.sleep(0.5)
700
+
701
+ # Check if process is still running
702
+ if not process.is_running:
703
+ exit_code = process.process.returncode if process.process else "unknown"
704
+ on_log(f"❌ Process died (exit code: {exit_code})")
705
+
706
+ # Try to get error output
707
+ if process.process and process.process.stderr:
708
+ try:
709
+ stderr_bytes = process.process.stderr.read()
710
+ if stderr_bytes:
711
+ stderr_output = stderr_bytes.decode()[:1000]
712
+ on_log(f"Error output: {stderr_output[:500]}")
713
+ except:
714
+ pass
715
+
716
+ # Categorize error based on stderr
717
+ error_cat = ErrorCategory.PROCESS_CRASH
718
+ if "Address already in use" in stderr_output:
719
+ error_cat = ErrorCategory.PORT_CONFLICT
720
+ elif "ModuleNotFoundError" in stderr_output or "No module named" in stderr_output:
721
+ error_cat = ErrorCategory.DEPENDENCY
722
+ elif "SyntaxError" in stderr_output:
723
+ error_cat = ErrorCategory.VALIDATION
724
+
725
+ return {"success": False, "error_category": error_cat, "stderr": stderr_output}
726
+
727
+ # Try to connect
728
+ try:
729
+ async with httpx.AsyncClient(timeout=1.0) as client:
730
+ resp = await client.get(f"http://localhost:{port}/")
731
+ if resp.status_code < 500:
732
+ on_log(f"✓ Server responding (status {resp.status_code})")
733
+ return {"success": True, "error_category": ErrorCategory.NONE, "stderr": ""}
734
+ except:
735
+ pass # Keep trying
736
+
737
+ # Timeout reached - try to capture any stderr
738
+ if process.is_running and process.process and process.process.stderr:
739
+ try:
740
+ import select
741
+ if select.select([process.process.stderr], [], [], 0)[0]:
742
+ stderr_bytes = process.process.stderr.read(2000)
743
+ if stderr_bytes:
744
+ stderr_output = stderr_bytes.decode("utf-8", errors="replace")
745
+ on_log(f"Process output: {stderr_output[:500]}")
746
+ except:
747
+ pass
748
+ on_log(f"⏱️ Health check timed out after {timeout}s - process still running: {process.is_running}")
749
+ return {"success": False, "error_category": ErrorCategory.STARTUP_TIMEOUT, "stderr": stderr_output}
750
+
751
+ def stop(self, service_id: str) -> RunResult:
752
+ """Stop a running service."""
753
+ logs: List[str] = []
754
+
755
+ service_name = self._services.get(service_id)
756
+ if not service_name:
757
+ return RunResult(
758
+ success=False,
759
+ port=0,
760
+ message="Service not found",
761
+ logs=["No running service found with this ID"],
762
+ )
763
+
764
+ status = self.sandbox_manager.get_status(service_name)
765
+ if not status:
766
+ del self._services[service_id]
767
+ return RunResult(
768
+ success=False,
769
+ port=0,
770
+ message="Service not running",
771
+ logs=["Service not found in sandbox manager"],
772
+ )
773
+
774
+ port = status.get("port", 0)
775
+ logs.append(f"Stopping service on port {port}")
776
+
777
+ try:
778
+ success = self.sandbox_manager.stop_service(service_name)
779
+
780
+ if success:
781
+ logs.append("Process terminated")
782
+ self.sandbox_manager.clean_sandbox(service_name)
783
+ logs.append("Sandbox cleaned up")
784
+
785
+ # Unregister from security policy
786
+ user_id = self._service_users.get(service_id, "anonymous")
787
+ self.security_policy.unregister_service(user_id, service_id)
788
+ if service_id in self._service_users:
789
+ del self._service_users[service_id]
790
+
791
+ del self._services[service_id]
792
+
793
+ return RunResult(
794
+ success=True,
795
+ port=port,
796
+ message="Service stopped",
797
+ logs=logs,
798
+ )
799
+ else:
800
+ logs.append("Failed to stop service")
801
+ return RunResult(
802
+ success=False,
803
+ port=port,
804
+ message="Failed to stop service",
805
+ logs=logs,
806
+ )
807
+ except Exception as e:
808
+ logs.append(f"Error stopping service: {e}")
809
+ return RunResult(
810
+ success=False,
811
+ port=port,
812
+ message=f"Error: {e}",
813
+ logs=logs,
814
+ )
815
+
816
+ def get_status(self, service_id: str) -> Optional[Dict]:
817
+ """Get status of a service."""
818
+ service_name = self._services.get(service_id)
819
+ if not service_name:
820
+ return None
821
+
822
+ status = self.sandbox_manager.get_status(service_name)
823
+ if not status:
824
+ return None
825
+
826
+ return {
827
+ "service_id": service_id,
828
+ "service_name": service_name,
829
+ "user_id": self._service_users.get(service_id),
830
+ "running": status.get("running", False),
831
+ "port": status.get("port"),
832
+ "pid": status.get("pid"),
833
+ "uptime": status.get("uptime"),
834
+ "sandbox": status.get("sandbox"),
835
+ }
836
+
837
+ def list_services(self) -> List[Dict]:
838
+ """List all running services."""
839
+ result = []
840
+ for service_id, service_name in self._services.items():
841
+ status = self.sandbox_manager.get_status(service_name)
842
+ if status and status.get("running"):
843
+ result.append({
844
+ "service_id": service_id,
845
+ "service_name": service_name,
846
+ "user_id": self._service_users.get(service_id),
847
+ "port": status.get("port"),
848
+ "pid": status.get("pid"),
849
+ "running": status.get("running", False),
850
+ })
851
+ return result
852
+
853
+ async def test_endpoints(
854
+ self,
855
+ service_id: str,
856
+ endpoints: Optional[List[str]] = None,
857
+ timeout: float = 5.0,
858
+ ) -> List[EndpointTestResult]:
859
+ """Test endpoints of a running service."""
860
+ if endpoints is None:
861
+ endpoints = ["/", "/health", "/docs"]
862
+
863
+ service_name = self._services.get(service_id)
864
+ if not service_name:
865
+ return [EndpointTestResult(
866
+ endpoint="*",
867
+ success=False,
868
+ error="Service not found",
869
+ )]
870
+
871
+ status = self.sandbox_manager.get_status(service_name)
872
+ if not status or not status.get("running"):
873
+ return [EndpointTestResult(
874
+ endpoint="*",
875
+ success=False,
876
+ error="Service not running",
877
+ )]
878
+
879
+ port = status.get("port")
880
+ if not port:
881
+ return [EndpointTestResult(
882
+ endpoint="*",
883
+ success=False,
884
+ error="No port assigned",
885
+ )]
886
+
887
+ results = []
888
+
889
+ async with httpx.AsyncClient(timeout=timeout) as client:
890
+ for endpoint in endpoints:
891
+ url = f"http://localhost:{port}{endpoint}"
892
+ start = time.time()
893
+
894
+ try:
895
+ response = await client.get(url)
896
+ elapsed = (time.time() - start) * 1000
897
+
898
+ results.append(EndpointTestResult(
899
+ endpoint=endpoint,
900
+ success=True,
901
+ status=response.status_code,
902
+ url=url,
903
+ response_time_ms=elapsed,
904
+ ))
905
+ except Exception as e:
906
+ results.append(EndpointTestResult(
907
+ endpoint=endpoint,
908
+ success=False,
909
+ error=str(e),
910
+ url=url,
911
+ ))
912
+
913
+ return results
914
+
915
+ def stop_all(self) -> None:
916
+ """Stop all running services."""
917
+ for service_id in list(self._services.keys()):
918
+ self.stop(service_id)
919
+
920
+ async def fast_run(
921
+ self,
922
+ service_id: str,
923
+ content: str,
924
+ port: int,
925
+ env: Optional[Dict[str, str]] = None,
926
+ user_id: Optional[str] = None,
927
+ user_profile: Optional[Dict[str, Any]] = None,
928
+ skip_health_check: bool = False,
929
+ on_log: Optional[Callable[[str], None]] = None,
930
+ ) -> RunResult:
931
+ """
932
+ Fast service startup with dependency caching.
933
+
934
+ Uses cached venvs to achieve millisecond startup for repeated deps.
935
+ Security checks are still enforced.
936
+
937
+ Args:
938
+ service_id: Unique identifier
939
+ content: Markdown content
940
+ port: Port to run on
941
+ env: Environment variables
942
+ user_id: User ID for security
943
+ user_profile: User profile for limits
944
+ skip_health_check: Return immediately without waiting for health
945
+ on_log: Log callback
946
+
947
+ Returns:
948
+ RunResult with startup time in message
949
+ """
950
+ import time as time_module
951
+ start_time = time_module.time()
952
+ logs: List[str] = []
953
+ service_name = f"service_{service_id}"
954
+ effective_user_id = user_id or "anonymous"
955
+
956
+ def log(msg: str):
957
+ logs.append(msg)
958
+ if on_log:
959
+ on_log(msg)
960
+
961
+ # Security check (same as regular run)
962
+ if user_profile and user_id:
963
+ from .security import UserProfile
964
+ profile = UserProfile.from_dict({**user_profile, "user_id": user_id})
965
+ self.security_policy.set_user_profile(profile)
966
+
967
+ # Clean up stale services before enforcing concurrent limits
968
+ self._prune_stale_user_services(effective_user_id, on_log=log)
969
+
970
+ security_check = await self.security_policy.check_can_start_service(
971
+ user_id=effective_user_id,
972
+ service_id=service_id,
973
+ port=port,
974
+ )
975
+
976
+ if not security_check.allowed:
977
+ log(f"🔒 Security: {security_check.reason}")
978
+ return RunResult(
979
+ success=False,
980
+ port=port,
981
+ message=security_check.reason or "Security check failed",
982
+ logs=logs,
983
+ error_category=ErrorCategory.PERMISSION,
984
+ )
985
+
986
+ # Apply throttle if needed
987
+ if security_check.delay_seconds > 0:
988
+ log(f"⏳ Throttle: {security_check.delay_seconds:.1f}s")
989
+ await asyncio.sleep(security_check.delay_seconds)
990
+
991
+ # Kill any orphan process on port
992
+ if kill_process_on_port(port):
993
+ log(f"Killed orphan on port {port}")
994
+
995
+ # Use fast starter if available
996
+ if self.fast_starter:
997
+ log("⚡ Fast start mode enabled")
998
+ fast_result = await self.fast_starter.fast_create_sandbox(
999
+ service_name=service_name,
1000
+ content=content,
1001
+ on_log=log,
1002
+ )
1003
+
1004
+ if not fast_result.success:
1005
+ return RunResult(
1006
+ success=False,
1007
+ port=port,
1008
+ message=fast_result.message,
1009
+ logs=logs,
1010
+ )
1011
+
1012
+ sandbox_path = fast_result.sandbox_path
1013
+ cache_info = "cached" if fast_result.cache_hit else "fresh"
1014
+ log(f"⚡ Sandbox ready in {fast_result.startup_time_ms:.0f}ms ({cache_info})")
1015
+ else:
1016
+ # Fallback to regular sandbox creation
1017
+ log("Creating sandbox (no cache)...")
1018
+ validation = self.validate_content(content)
1019
+ if not validation.valid:
1020
+ return RunResult(
1021
+ success=False,
1022
+ port=port,
1023
+ message=validation.errors[0] if validation.errors else "Validation failed",
1024
+ logs=logs,
1025
+ )
1026
+
1027
+ import tempfile
1028
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
1029
+ f.write(content)
1030
+ readme_path = Path(f.name)
1031
+
1032
+ # Use sandbox manager for regular creation
1033
+ from .config import ServiceConfig
1034
+ config = ServiceConfig(name=service_name, readme=str(readme_path), port=port)
1035
+
1036
+ try:
1037
+ sandbox = self.sandbox_manager.create_sandbox(config, readme_path)
1038
+ sandbox_path = sandbox.path
1039
+ finally:
1040
+ readme_path.unlink()
1041
+
1042
+ # Start the service process
1043
+ blocks = parse_blocks(content)
1044
+ run_cmd = None
1045
+ for block in blocks:
1046
+ if block.kind == "run":
1047
+ run_cmd = block.body.strip()
1048
+ break
1049
+
1050
+ if not run_cmd:
1051
+ return RunResult(
1052
+ success=False,
1053
+ port=port,
1054
+ message="No run command found",
1055
+ logs=logs,
1056
+ )
1057
+
1058
+ # Prepare environment
1059
+ run_env = os.environ.copy()
1060
+ run_env["PORT"] = str(port)
1061
+ run_env["HOST"] = "0.0.0.0"
1062
+ if env:
1063
+ run_env.update(env)
1064
+
1065
+ dotenv_env = dict(env or {})
1066
+ dotenv_env["PORT"] = str(port)
1067
+ dotenv_env["MARKPACT_PORT"] = str(port)
1068
+ _write_dotenv_file(sandbox_path, dotenv_env)
1069
+
1070
+ # Resolve venv path (could be symlink to cache)
1071
+ venv_path = sandbox_path / ".venv"
1072
+ if venv_path.is_symlink():
1073
+ actual_venv = venv_path.resolve()
1074
+ run_env["VIRTUAL_ENV"] = str(actual_venv)
1075
+ run_env["PATH"] = f"{actual_venv}/bin:{run_env.get('PATH', '')}"
1076
+ elif venv_path.exists():
1077
+ run_env["VIRTUAL_ENV"] = str(venv_path)
1078
+ run_env["PATH"] = f"{venv_path}/bin:{run_env.get('PATH', '')}"
1079
+
1080
+ # Expand $PORT in command
1081
+ run_cmd = run_cmd.replace("$PORT", str(port))
1082
+
1083
+ log(f"Starting: {run_cmd[:50]}...")
1084
+
1085
+ try:
1086
+ process = subprocess.Popen(
1087
+ run_cmd,
1088
+ shell=True,
1089
+ cwd=str(sandbox_path),
1090
+ env=run_env,
1091
+ stdout=subprocess.PIPE,
1092
+ stderr=subprocess.PIPE,
1093
+ )
1094
+
1095
+ # Register service
1096
+ self._services[service_id] = service_name
1097
+ self._service_users[service_id] = effective_user_id
1098
+ self.security_policy.register_service(effective_user_id, service_id)
1099
+
1100
+ # Track in sandbox manager
1101
+ from .sandbox_manager import ServiceProcess
1102
+ self.sandbox_manager._processes[service_name] = ServiceProcess(
1103
+ name=service_name,
1104
+ pid=process.pid,
1105
+ port=port,
1106
+ sandbox_path=sandbox_path,
1107
+ process=process,
1108
+ )
1109
+
1110
+ total_time_ms = (time_module.time() - start_time) * 1000
1111
+
1112
+ if skip_health_check:
1113
+ log(f"⚡ Started in {total_time_ms:.0f}ms (health check skipped)")
1114
+ return RunResult(
1115
+ success=True,
1116
+ port=port,
1117
+ pid=process.pid,
1118
+ message=f"Started in {total_time_ms:.0f}ms (async)",
1119
+ logs=logs,
1120
+ service_name=service_name,
1121
+ sandbox_path=sandbox_path,
1122
+ )
1123
+
1124
+ # Quick health check (max 5s for fast mode)
1125
+ log("Quick health check...")
1126
+ health_ok = await self._quick_health_check(process, port, timeout=5)
1127
+
1128
+ total_time_ms = (time_module.time() - start_time) * 1000
1129
+
1130
+ if health_ok:
1131
+ log(f"✓ Running in {total_time_ms:.0f}ms")
1132
+ return RunResult(
1133
+ success=True,
1134
+ port=port,
1135
+ pid=process.pid,
1136
+ message=f"Running on port {port} ({total_time_ms:.0f}ms)",
1137
+ logs=logs,
1138
+ service_name=service_name,
1139
+ sandbox_path=sandbox_path,
1140
+ )
1141
+ else:
1142
+ # Check if process died
1143
+ if process.poll() is not None:
1144
+ stderr = process.stderr.read().decode()[:500] if process.stderr else ""
1145
+ log(f"❌ Process died: {stderr[:200]}")
1146
+ return RunResult(
1147
+ success=False,
1148
+ port=port,
1149
+ message="Process crashed during startup",
1150
+ logs=logs,
1151
+ stderr_output=stderr,
1152
+ )
1153
+ else:
1154
+ log(f"⚠️ Health check timeout, but process running")
1155
+ return RunResult(
1156
+ success=True,
1157
+ port=port,
1158
+ pid=process.pid,
1159
+ message=f"Started (health pending) in {total_time_ms:.0f}ms",
1160
+ logs=logs,
1161
+ service_name=service_name,
1162
+ sandbox_path=sandbox_path,
1163
+ )
1164
+
1165
+ except Exception as e:
1166
+ log(f"Error: {e}")
1167
+ return RunResult(
1168
+ success=False,
1169
+ port=port,
1170
+ message=str(e),
1171
+ logs=logs,
1172
+ )
1173
+
1174
+ async def _quick_health_check(
1175
+ self,
1176
+ process: subprocess.Popen,
1177
+ port: int,
1178
+ timeout: int = 5,
1179
+ ) -> bool:
1180
+ """Quick health check with shorter timeout."""
1181
+ for _ in range(timeout * 4): # Check every 250ms
1182
+ await asyncio.sleep(0.25)
1183
+
1184
+ if process.poll() is not None:
1185
+ return False
1186
+
1187
+ try:
1188
+ async with httpx.AsyncClient(timeout=0.5) as client:
1189
+ resp = await client.get(f"http://localhost:{port}/")
1190
+ if resp.status_code < 500:
1191
+ return True
1192
+ except:
1193
+ pass
1194
+
1195
+ return False
1196
+
1197
+ def get_cache_stats(self) -> Dict[str, Any]:
1198
+ """Get fast start cache statistics."""
1199
+ if self.fast_starter:
1200
+ return self.fast_starter.get_cache_stats()
1201
+ return {"caching_enabled": False}