mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. mlx_stack/__init__.py +5 -0
  2. mlx_stack/_version.py +24 -0
  3. mlx_stack/cli/__init__.py +5 -0
  4. mlx_stack/cli/bench.py +221 -0
  5. mlx_stack/cli/config.py +166 -0
  6. mlx_stack/cli/down.py +109 -0
  7. mlx_stack/cli/init.py +180 -0
  8. mlx_stack/cli/install.py +165 -0
  9. mlx_stack/cli/logs.py +234 -0
  10. mlx_stack/cli/main.py +187 -0
  11. mlx_stack/cli/models.py +304 -0
  12. mlx_stack/cli/profile.py +65 -0
  13. mlx_stack/cli/pull.py +134 -0
  14. mlx_stack/cli/recommend.py +397 -0
  15. mlx_stack/cli/status.py +111 -0
  16. mlx_stack/cli/up.py +163 -0
  17. mlx_stack/cli/watch.py +252 -0
  18. mlx_stack/core/__init__.py +1 -0
  19. mlx_stack/core/benchmark.py +1182 -0
  20. mlx_stack/core/catalog.py +560 -0
  21. mlx_stack/core/config.py +471 -0
  22. mlx_stack/core/deps.py +323 -0
  23. mlx_stack/core/hardware.py +304 -0
  24. mlx_stack/core/launchd.py +531 -0
  25. mlx_stack/core/litellm_gen.py +188 -0
  26. mlx_stack/core/log_rotation.py +231 -0
  27. mlx_stack/core/log_viewer.py +386 -0
  28. mlx_stack/core/models.py +639 -0
  29. mlx_stack/core/paths.py +79 -0
  30. mlx_stack/core/process.py +887 -0
  31. mlx_stack/core/pull.py +815 -0
  32. mlx_stack/core/scoring.py +611 -0
  33. mlx_stack/core/stack_down.py +317 -0
  34. mlx_stack/core/stack_init.py +524 -0
  35. mlx_stack/core/stack_status.py +229 -0
  36. mlx_stack/core/stack_up.py +856 -0
  37. mlx_stack/core/watchdog.py +744 -0
  38. mlx_stack/data/__init__.py +1 -0
  39. mlx_stack/data/catalog/__init__.py +1 -0
  40. mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  41. mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  42. mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  43. mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  44. mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  45. mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  46. mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  47. mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  48. mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  49. mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  50. mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  51. mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  52. mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  53. mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  54. mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  55. mlx_stack/py.typed +1 -0
  56. mlx_stack/utils/__init__.py +1 -0
  57. mlx_stack-0.1.0.dist-info/METADATA +397 -0
  58. mlx_stack-0.1.0.dist-info/RECORD +61 -0
  59. mlx_stack-0.1.0.dist-info/WHEEL +4 -0
  60. mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
  61. mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,856 @@
1
+ """Stack startup logic for mlx-stack.
2
+
3
+ Orchestrates starting all services defined in a stack definition:
4
+ reads default.yaml, starts vllm-mlx subprocesses sequentially (largest
5
+ model first), performs HTTP health checks with 120s timeout, starts
6
+ LiteLLM after all healthy servers, creates PID files, and produces a
7
+ summary. Supports dry-run, selective tier start, lockfile, port
8
+ conflict detection, stale PID cleanup, memory warnings, auto-install
9
+ of dependencies, and localhost-only binding. Propagates the OpenRouter
10
+ API key securely via env var.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import shutil
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ import psutil
21
+ import yaml
22
+
23
+ from mlx_stack.core.catalog import CatalogEntry, get_entry_by_id, load_catalog
24
+ from mlx_stack.core.config import ConfigCorruptError, get_value
25
+ from mlx_stack.core.deps import (
26
+ DependencyError,
27
+ DependencyInstallError,
28
+ ensure_dependency,
29
+ )
30
+ from mlx_stack.core.paths import get_data_home, get_stacks_dir
31
+ from mlx_stack.core.process import (
32
+ HealthCheckError,
33
+ LockError,
34
+ ProcessError,
35
+ acquire_lock,
36
+ check_port_conflict,
37
+ cleanup_stale_pid,
38
+ is_process_alive,
39
+ read_pid_file,
40
+ remove_pid_file,
41
+ start_service,
42
+ wait_for_healthy,
43
+ )
44
+ from mlx_stack.core.stack_init import STACK_SCHEMA_VERSION
45
+
46
+ # --------------------------------------------------------------------------- #
47
+ # Constants
48
+ # --------------------------------------------------------------------------- #
49
+
50
+ # Health check path for vllm-mlx
51
+ VLLM_HEALTH_PATH = "/v1/models"
52
+
53
+ # Health check path for LiteLLM
54
+ LITELLM_HEALTH_PATH = "/health/liveliness"
55
+
56
+ # LiteLLM service name for PID files
57
+ LITELLM_SERVICE_NAME = "litellm"
58
+
59
+
60
+ # --------------------------------------------------------------------------- #
61
+ # Exceptions
62
+ # --------------------------------------------------------------------------- #
63
+
64
+
65
+ class UpError(Exception):
66
+ """Raised when the up command encounters a fatal error."""
67
+
68
+
69
+ # --------------------------------------------------------------------------- #
70
+ # Data classes
71
+ # --------------------------------------------------------------------------- #
72
+
73
+
74
+ @dataclass
75
+ class TierStatus:
76
+ """Status of a single tier after startup attempt."""
77
+
78
+ name: str
79
+ model: str
80
+ port: int
81
+ status: str # "healthy", "failed", "skipped", "dry-run", "already-running"
82
+ error: str | None = None
83
+
84
+
85
+ @dataclass
86
+ class UpResult:
87
+ """Result of the up command execution."""
88
+
89
+ tiers: list[TierStatus] = field(default_factory=list)
90
+ litellm: TierStatus | None = None
91
+ dry_run: bool = False
92
+ dry_run_commands: list[dict[str, Any]] = field(default_factory=list)
93
+ warnings: list[str] = field(default_factory=list)
94
+ already_running: bool = False
95
+
96
+
97
+ # --------------------------------------------------------------------------- #
98
+ # Stack definition loading & validation
99
+ # --------------------------------------------------------------------------- #
100
+
101
+
102
+ def load_stack_definition(stack_name: str = "default") -> dict[str, Any]:
103
+ """Load and validate a stack definition from disk.
104
+
105
+ Args:
106
+ stack_name: Name of the stack to load.
107
+
108
+ Returns:
109
+ The parsed stack definition dict.
110
+
111
+ Raises:
112
+ UpError: If the stack file is missing, invalid YAML, or has
113
+ an unsupported schema version.
114
+ """
115
+ stack_path = get_stacks_dir() / f"{stack_name}.yaml"
116
+
117
+ if not stack_path.exists():
118
+ msg = (
119
+ f"No stack definition found at {stack_path}.\n"
120
+ "Run 'mlx-stack init' to create a stack configuration."
121
+ )
122
+ raise UpError(msg)
123
+
124
+ try:
125
+ content = stack_path.read_text(encoding="utf-8")
126
+ except OSError as exc:
127
+ msg = f"Could not read stack file: {exc}"
128
+ raise UpError(msg) from None
129
+
130
+ try:
131
+ stack = yaml.safe_load(content)
132
+ except yaml.YAMLError as exc:
133
+ msg = f"Invalid YAML in stack file {stack_path}: {exc}"
134
+ raise UpError(msg) from None
135
+
136
+ if not isinstance(stack, dict):
137
+ msg = f"Stack file {stack_path} has invalid format: expected a mapping."
138
+ raise UpError(msg) from None
139
+
140
+ # Validate schema version
141
+ schema_version = stack.get("schema_version")
142
+ if schema_version != STACK_SCHEMA_VERSION:
143
+ msg = (
144
+ f"Unsupported stack schema_version: {schema_version} "
145
+ f"(expected {STACK_SCHEMA_VERSION}). "
146
+ "Re-run 'mlx-stack init --force' to regenerate."
147
+ )
148
+ raise UpError(msg)
149
+
150
+ # Validate tiers exist
151
+ tiers = stack.get("tiers")
152
+ if not tiers or not isinstance(tiers, list):
153
+ msg = "Stack definition has no tiers."
154
+ raise UpError(msg)
155
+
156
+ return stack
157
+
158
+
159
+ # --------------------------------------------------------------------------- #
160
+ # Memory estimation
161
+ # --------------------------------------------------------------------------- #
162
+
163
+
164
+ def estimate_memory_usage(
165
+ tiers: list[dict[str, Any]],
166
+ catalog: list[CatalogEntry] | None = None,
167
+ ) -> float:
168
+ """Estimate total memory usage for all tiers.
169
+
170
+ Uses catalog benchmark data to look up memory_gb per model+quant.
171
+ Falls back to a rough params_b-based estimate if no benchmark data.
172
+
173
+ Args:
174
+ tiers: List of tier dicts from the stack definition.
175
+ catalog: The loaded catalog. If None, attempts to load.
176
+
177
+ Returns:
178
+ Estimated total memory in GB.
179
+ """
180
+ if catalog is None:
181
+ try:
182
+ catalog = load_catalog()
183
+ except Exception:
184
+ return 0.0
185
+
186
+ total = 0.0
187
+ for tier in tiers:
188
+ model_id = tier.get("model", "")
189
+ entry = get_entry_by_id(catalog, model_id)
190
+ if entry is None:
191
+ continue
192
+
193
+ # Look for memory_gb in any benchmark entry
194
+ memory_gb = 0.0
195
+ for _hw_key, bench in entry.benchmarks.items():
196
+ memory_gb = bench.memory_gb
197
+ break # Take the first available benchmark's memory
198
+
199
+ if memory_gb <= 0:
200
+ # Rough estimate: ~1 GB per billion parameters for int4
201
+ memory_gb = entry.params_b * 1.0
202
+
203
+ total += memory_gb
204
+
205
+ return total
206
+
207
+
208
+ def check_memory_warning(estimated_gb: float) -> str | None:
209
+ """Check if estimated memory usage exceeds available system memory.
210
+
211
+ Args:
212
+ estimated_gb: Estimated total memory usage in GB.
213
+
214
+ Returns:
215
+ A warning string if memory is likely insufficient, or None.
216
+ """
217
+ try:
218
+ vmem = psutil.virtual_memory()
219
+ available_gb = vmem.available / (1024**3)
220
+ except Exception:
221
+ return None
222
+
223
+ if estimated_gb > available_gb:
224
+ return (
225
+ f"Estimated memory usage ({estimated_gb:.1f} GB) exceeds "
226
+ f"available system memory ({available_gb:.1f} GB). "
227
+ "Performance may be degraded."
228
+ )
229
+ return None
230
+
231
+
232
+ # --------------------------------------------------------------------------- #
233
+ # Preflight local-model existence check
234
+ # --------------------------------------------------------------------------- #
235
+
236
+
237
+ def _get_models_dir() -> Path:
238
+ """Resolve the models directory from config.
239
+
240
+ Returns:
241
+ Path to the models directory.
242
+ """
243
+ try:
244
+ model_dir = str(get_value("model-dir"))
245
+ return Path(model_dir).expanduser()
246
+ except (ConfigCorruptError, Exception):
247
+ return get_data_home() / "models"
248
+
249
+
250
+ def check_local_model_exists(tier: dict[str, Any]) -> str | None:
251
+ """Check if a tier's local model exists on disk.
252
+
253
+ Looks for the model in the configured models directory by both
254
+ the model ID and the source repo directory name.
255
+
256
+ Args:
257
+ tier: A tier dict from the stack definition.
258
+
259
+ Returns:
260
+ An error message string if the model is missing, or None if found.
261
+ """
262
+ models_dir = _get_models_dir()
263
+ model_id = tier.get("model", "")
264
+ source = tier.get("source", "")
265
+
266
+ # Check by model ID as directory name
267
+ model_path = models_dir / model_id
268
+
269
+ # Check by HF repo name (directory name from source)
270
+ source_dir_name = source.rsplit("/", 1)[-1] if "/" in source else source
271
+ source_path = models_dir / source_dir_name if source_dir_name else None
272
+
273
+ if model_path.exists() or (source_path is not None and source_path.exists()):
274
+ return None
275
+
276
+ # Model not found — generate diagnostic message
277
+ return (
278
+ f"Model '{model_id}' not found locally. "
279
+ f"Run 'mlx-stack pull {model_id}' to download it."
280
+ )
281
+
282
+
283
+ # --------------------------------------------------------------------------- #
284
+ # vllm-mlx command building
285
+ # --------------------------------------------------------------------------- #
286
+
287
+
288
+ def build_vllm_command(
289
+ tier: dict[str, Any],
290
+ vllm_binary: str,
291
+ ) -> list[str]:
292
+ """Build the vllm-mlx command for a tier.
293
+
294
+ Args:
295
+ tier: Tier dict from the stack definition.
296
+ vllm_binary: Path to the vllm-mlx binary.
297
+
298
+ Returns:
299
+ The command as a list of strings.
300
+ """
301
+ model_source = tier.get("source", "")
302
+ port = tier["port"]
303
+
304
+ cmd = [
305
+ vllm_binary,
306
+ "serve", model_source,
307
+ "--port", str(port),
308
+ "--host", "127.0.0.1",
309
+ ]
310
+
311
+ # Add vllm_flags
312
+ vllm_flags = tier.get("vllm_flags", {})
313
+ for flag_name, flag_value in vllm_flags.items():
314
+ flag_key = f"--{flag_name.replace('_', '-')}"
315
+ if isinstance(flag_value, bool):
316
+ if flag_value:
317
+ cmd.append(flag_key)
318
+ else:
319
+ cmd.extend([flag_key, str(flag_value)])
320
+
321
+ return cmd
322
+
323
+
324
+ def build_litellm_command(
325
+ litellm_binary: str,
326
+ litellm_port: int,
327
+ litellm_config_path: Path,
328
+ ) -> list[str]:
329
+ """Build the litellm command.
330
+
331
+ Args:
332
+ litellm_binary: Path to the litellm binary.
333
+ litellm_port: Port for LiteLLM.
334
+ litellm_config_path: Path to litellm.yaml config.
335
+
336
+ Returns:
337
+ The command as a list of strings.
338
+ """
339
+ return [
340
+ litellm_binary,
341
+ "--config", str(litellm_config_path),
342
+ "--port", str(litellm_port),
343
+ "--host", "127.0.0.1",
344
+ ]
345
+
346
+
347
+ # --------------------------------------------------------------------------- #
348
+ # Dry-run command formatting
349
+ # --------------------------------------------------------------------------- #
350
+
351
+
352
+ def format_dry_run_command(
353
+ cmd: list[str],
354
+ env_vars: dict[str, str] | None = None,
355
+ ) -> str:
356
+ """Format a command for dry-run display.
357
+
358
+ Hides any sensitive environment variable values.
359
+
360
+ Args:
361
+ cmd: The command as a list of strings.
362
+ env_vars: Optional environment variables (values hidden).
363
+
364
+ Returns:
365
+ A human-readable command string.
366
+ """
367
+ parts: list[str] = []
368
+
369
+ if env_vars:
370
+ for key in sorted(env_vars.keys()):
371
+ # Mask all env var values in dry-run
372
+ parts.append(f"{key}=***")
373
+
374
+ parts.extend(cmd)
375
+ return " ".join(parts)
376
+
377
+
378
+ # --------------------------------------------------------------------------- #
379
+ # Sort tiers by params_b descending (largest model first)
380
+ # --------------------------------------------------------------------------- #
381
+
382
+
383
+ def sort_tiers_by_size(
384
+ tiers: list[dict[str, Any]],
385
+ catalog: list[CatalogEntry] | None = None,
386
+ ) -> list[dict[str, Any]]:
387
+ """Sort tiers by model size descending (largest first).
388
+
389
+ Uses catalog params_b for ordering. Falls back to tier name if
390
+ catalog entry is not found.
391
+
392
+ Args:
393
+ tiers: Tier entries from the stack definition.
394
+ catalog: Loaded catalog for params_b lookup.
395
+
396
+ Returns:
397
+ Tiers sorted largest model first.
398
+ """
399
+ if catalog is None:
400
+ return list(tiers)
401
+
402
+ def sort_key(tier: dict[str, Any]) -> tuple[float, str]:
403
+ model_id = tier.get("model", "")
404
+ entry = get_entry_by_id(catalog, model_id)
405
+ params_b = entry.params_b if entry else 0.0
406
+ return (-params_b, tier.get("name", ""))
407
+
408
+ return sorted(tiers, key=sort_key)
409
+
410
+
411
+ # --------------------------------------------------------------------------- #
412
+ # Main startup orchestration
413
+ # --------------------------------------------------------------------------- #
414
+
415
+
416
+ def run_up(
417
+ dry_run: bool = False,
418
+ tier_filter: str | None = None,
419
+ stack_name: str = "default",
420
+ ) -> UpResult:
421
+ """Execute the full stack startup flow.
422
+
423
+ 1. Load and validate stack definition.
424
+ 2. Auto-install missing dependencies.
425
+ 3. Check for stale PIDs and clean up.
426
+ 4. Check for already-running services.
427
+ 5. Estimate memory and warn if needed.
428
+ 6. Start vllm-mlx instances sequentially (largest first).
429
+ 7. Health check each instance with exponential backoff.
430
+ 8. Start LiteLLM after all healthy model servers.
431
+ 9. Return summary result.
432
+
433
+ Args:
434
+ dry_run: If True, show commands without executing.
435
+ tier_filter: If set, start only this tier (plus LiteLLM).
436
+ stack_name: Stack definition name.
437
+
438
+ Returns:
439
+ An UpResult with the outcome.
440
+
441
+ Raises:
442
+ UpError: On fatal errors (missing stack, schema mismatch, etc.).
443
+ LockError: If the lockfile is held by another process.
444
+ """
445
+ result = UpResult(dry_run=dry_run)
446
+
447
+ # --- Load stack definition ---
448
+ stack = load_stack_definition(stack_name)
449
+ tiers = stack["tiers"]
450
+
451
+ # --- Read config ---
452
+ try:
453
+ litellm_port = int(get_value("litellm-port"))
454
+ except (ConfigCorruptError, ValueError):
455
+ litellm_port = 4000
456
+
457
+ try:
458
+ openrouter_key = str(get_value("openrouter-key"))
459
+ except (ConfigCorruptError, Exception):
460
+ openrouter_key = ""
461
+
462
+ litellm_config_path = get_data_home() / "litellm.yaml"
463
+
464
+ # --- Validate --tier filter ---
465
+ valid_tier_names = [t["name"] for t in tiers]
466
+ if tier_filter is not None:
467
+ if tier_filter not in valid_tier_names:
468
+ valid_list = ", ".join(sorted(valid_tier_names))
469
+ msg = (
470
+ f"Unknown tier '{tier_filter}'. "
471
+ f"Valid tiers: {valid_list}"
472
+ )
473
+ raise UpError(msg)
474
+ tiers = [t for t in tiers if t["name"] == tier_filter]
475
+
476
+ # --- Load catalog for sorting and memory estimation ---
477
+ try:
478
+ catalog = load_catalog()
479
+ except Exception:
480
+ catalog = None
481
+
482
+ # --- Sort tiers by model size (largest first) ---
483
+ tiers = sort_tiers_by_size(tiers, catalog)
484
+
485
+ # --- Dry-run mode ---
486
+ if dry_run:
487
+ return _run_dry_run(
488
+ tiers=tiers,
489
+ litellm_port=litellm_port,
490
+ litellm_config_path=litellm_config_path,
491
+ openrouter_key=openrouter_key,
492
+ catalog=catalog,
493
+ result=result,
494
+ )
495
+
496
+ # --- Acquire lockfile ---
497
+ # The context manager ensures the lock is released on exit, failure,
498
+ # or crash (OS-level FD cleanup).
499
+ try:
500
+ with acquire_lock():
501
+ return _run_startup(
502
+ tiers=tiers,
503
+ litellm_port=litellm_port,
504
+ litellm_config_path=litellm_config_path,
505
+ openrouter_key=openrouter_key,
506
+ catalog=catalog,
507
+ tier_filter=tier_filter,
508
+ result=result,
509
+ )
510
+ except LockError:
511
+ raise
512
+
513
+
514
+ def _run_dry_run(
515
+ tiers: list[dict[str, Any]],
516
+ litellm_port: int,
517
+ litellm_config_path: Path,
518
+ openrouter_key: str,
519
+ catalog: list[CatalogEntry] | None,
520
+ result: UpResult,
521
+ ) -> UpResult:
522
+ """Execute a dry-run — show commands without starting processes.
523
+
524
+ Args:
525
+ tiers: Tiers to start.
526
+ litellm_port: LiteLLM port.
527
+ litellm_config_path: Path to litellm.yaml.
528
+ openrouter_key: OpenRouter API key (masked in output).
529
+ catalog: Loaded catalog.
530
+ result: The UpResult to populate.
531
+
532
+ Returns:
533
+ The populated UpResult.
534
+ """
535
+ vllm_binary = shutil.which("vllm-mlx") or "vllm-mlx"
536
+ litellm_binary = shutil.which("litellm") or "litellm"
537
+
538
+ for tier in tiers:
539
+ cmd = build_vllm_command(tier, vllm_binary)
540
+ cmd_str = format_dry_run_command(cmd)
541
+
542
+ result.dry_run_commands.append({
543
+ "service": tier["name"],
544
+ "command": cmd_str,
545
+ "type": "vllm-mlx",
546
+ })
547
+
548
+ result.tiers.append(TierStatus(
549
+ name=tier["name"],
550
+ model=tier.get("model", ""),
551
+ port=tier["port"],
552
+ status="dry-run",
553
+ ))
554
+
555
+ # LiteLLM command
556
+ litellm_cmd = build_litellm_command(litellm_binary, litellm_port, litellm_config_path)
557
+
558
+ env_display: dict[str, str] | None = None
559
+ if openrouter_key:
560
+ env_display = {"OPENROUTER_API_KEY": "***"}
561
+
562
+ litellm_cmd_str = format_dry_run_command(litellm_cmd, env_display)
563
+
564
+ result.dry_run_commands.append({
565
+ "service": LITELLM_SERVICE_NAME,
566
+ "command": litellm_cmd_str,
567
+ "type": "litellm",
568
+ })
569
+
570
+ result.litellm = TierStatus(
571
+ name=LITELLM_SERVICE_NAME,
572
+ model="proxy",
573
+ port=litellm_port,
574
+ status="dry-run",
575
+ )
576
+
577
+ return result
578
+
579
+
580
+ def _run_startup(
581
+ tiers: list[dict[str, Any]],
582
+ litellm_port: int,
583
+ litellm_config_path: Path,
584
+ openrouter_key: str,
585
+ catalog: list[CatalogEntry] | None,
586
+ tier_filter: str | None,
587
+ result: UpResult,
588
+ ) -> UpResult:
589
+ """Execute the actual startup sequence.
590
+
591
+ Args:
592
+ tiers: Tiers to start.
593
+ litellm_port: LiteLLM port.
594
+ litellm_config_path: Path to litellm.yaml.
595
+ openrouter_key: OpenRouter API key.
596
+ catalog: Loaded catalog.
597
+ tier_filter: If set, only start this tier.
598
+ result: The UpResult to populate.
599
+
600
+ Returns:
601
+ The populated UpResult.
602
+ """
603
+ # --- Auto-install dependencies ---
604
+ try:
605
+ ensure_dependency("vllm-mlx")
606
+ ensure_dependency("litellm")
607
+ except (DependencyError, DependencyInstallError) as exc:
608
+ raise UpError(f"Dependency installation failed: {exc}") from None
609
+
610
+ # --- Resolve binary paths ---
611
+ vllm_binary = shutil.which("vllm-mlx")
612
+ if vllm_binary is None:
613
+ raise UpError(
614
+ "vllm-mlx not found on PATH after installation. "
615
+ "Install manually: uv tool install vllm-mlx"
616
+ )
617
+
618
+ litellm_binary = shutil.which("litellm")
619
+ if litellm_binary is None:
620
+ raise UpError(
621
+ "litellm not found on PATH after installation. "
622
+ "Install manually: uv tool install litellm"
623
+ )
624
+
625
+ # --- Check for already-running / stale PIDs ---
626
+ any_stale = False
627
+
628
+ for tier in tiers:
629
+ tier_name = tier["name"]
630
+ try:
631
+ pid = read_pid_file(tier_name)
632
+ except ProcessError:
633
+ # Corrupt PID file — treat as stale, clean up gracefully
634
+ remove_pid_file(tier_name)
635
+ any_stale = True
636
+ continue
637
+
638
+ if pid is not None:
639
+ if is_process_alive(pid):
640
+ # Already running
641
+ result.tiers.append(TierStatus(
642
+ name=tier_name,
643
+ model=tier.get("model", ""),
644
+ port=tier["port"],
645
+ status="already-running",
646
+ ))
647
+ continue
648
+ else:
649
+ # Stale PID — clean up
650
+ cleanup_stale_pid(tier_name)
651
+ any_stale = True
652
+ else:
653
+ pass # Tier needs to be started
654
+
655
+ # Check LiteLLM
656
+ try:
657
+ litellm_pid = read_pid_file(LITELLM_SERVICE_NAME)
658
+ except ProcessError:
659
+ # Corrupt LiteLLM PID file — treat as stale, clean up gracefully
660
+ remove_pid_file(LITELLM_SERVICE_NAME)
661
+ litellm_pid = None
662
+ any_stale = True
663
+
664
+ litellm_already_running = False
665
+ if litellm_pid is not None:
666
+ if is_process_alive(litellm_pid):
667
+ litellm_already_running = True
668
+ else:
669
+ cleanup_stale_pid(LITELLM_SERVICE_NAME)
670
+ any_stale = True
671
+
672
+ # If all tiers + LiteLLM are already running, report and return
673
+ tiers_already_running = [
674
+ t for t in result.tiers if t.status == "already-running"
675
+ ]
676
+ if len(tiers_already_running) == len(tiers) and litellm_already_running:
677
+ result.already_running = True
678
+ result.litellm = TierStatus(
679
+ name=LITELLM_SERVICE_NAME,
680
+ model="proxy",
681
+ port=litellm_port,
682
+ status="already-running",
683
+ )
684
+ return result
685
+
686
+ if any_stale:
687
+ result.warnings.append("Cleaned up stale PID files from previously crashed services.")
688
+
689
+ # --- Memory warning ---
690
+ estimated_gb = estimate_memory_usage(tiers, catalog)
691
+ if estimated_gb > 0:
692
+ warning = check_memory_warning(estimated_gb)
693
+ if warning:
694
+ result.warnings.append(warning)
695
+
696
+ # --- Start vllm-mlx instances sequentially ---
697
+ healthy_count = 0
698
+ tiers_needing_start = [
699
+ t for t in tiers
700
+ if t["name"] not in {ts.name for ts in result.tiers}
701
+ ]
702
+
703
+ for tier in tiers_needing_start:
704
+ tier_name = tier["name"]
705
+ port = tier["port"]
706
+
707
+ # Preflight: check local model exists on disk
708
+ missing_msg = check_local_model_exists(tier)
709
+ if missing_msg is not None:
710
+ result.tiers.append(TierStatus(
711
+ name=tier_name,
712
+ model=tier.get("model", ""),
713
+ port=port,
714
+ status="skipped",
715
+ error=missing_msg,
716
+ ))
717
+ continue
718
+
719
+ # Check port conflict
720
+ conflict = check_port_conflict(port)
721
+ if conflict is not None:
722
+ conflict_pid, conflict_name = conflict
723
+ result.tiers.append(TierStatus(
724
+ name=tier_name,
725
+ model=tier.get("model", ""),
726
+ port=port,
727
+ status="skipped",
728
+ error=(
729
+ f"Port {port} already in use by "
730
+ f"PID {conflict_pid} ({conflict_name})"
731
+ ),
732
+ ))
733
+ continue
734
+
735
+ # Start the vllm-mlx subprocess
736
+ cmd = build_vllm_command(tier, vllm_binary)
737
+
738
+ try:
739
+ start_service(
740
+ service_name=tier_name,
741
+ cmd=cmd,
742
+ port=port,
743
+ )
744
+ except Exception as exc:
745
+ result.tiers.append(TierStatus(
746
+ name=tier_name,
747
+ model=tier.get("model", ""),
748
+ port=port,
749
+ status="failed",
750
+ error=str(exc),
751
+ ))
752
+ continue
753
+
754
+ # Health check with exponential backoff
755
+ try:
756
+ wait_for_healthy(port=port, path=VLLM_HEALTH_PATH)
757
+ result.tiers.append(TierStatus(
758
+ name=tier_name,
759
+ model=tier.get("model", ""),
760
+ port=port,
761
+ status="healthy",
762
+ ))
763
+ healthy_count += 1
764
+ except HealthCheckError as exc:
765
+ result.tiers.append(TierStatus(
766
+ name=tier_name,
767
+ model=tier.get("model", ""),
768
+ port=port,
769
+ status="failed",
770
+ error=str(exc),
771
+ ))
772
+
773
+ # --- Count total healthy (including already-running) ---
774
+ total_healthy = sum(
775
+ 1 for t in result.tiers if t.status in ("healthy", "already-running")
776
+ )
777
+
778
+ # --- Start LiteLLM if any healthy tiers and not already running ---
779
+ if litellm_already_running:
780
+ result.litellm = TierStatus(
781
+ name=LITELLM_SERVICE_NAME,
782
+ model="proxy",
783
+ port=litellm_port,
784
+ status="already-running",
785
+ )
786
+ elif total_healthy == 0:
787
+ result.litellm = TierStatus(
788
+ name=LITELLM_SERVICE_NAME,
789
+ model="proxy",
790
+ port=litellm_port,
791
+ status="skipped",
792
+ error="All model servers failed; LiteLLM not started.",
793
+ )
794
+ else:
795
+ # Check LiteLLM port conflict
796
+ litellm_conflict = check_port_conflict(litellm_port)
797
+ if litellm_conflict is not None:
798
+ conflict_pid, conflict_name = litellm_conflict
799
+ result.litellm = TierStatus(
800
+ name=LITELLM_SERVICE_NAME,
801
+ model="proxy",
802
+ port=litellm_port,
803
+ status="skipped",
804
+ error=(
805
+ f"Port {litellm_port} already in use by "
806
+ f"PID {conflict_pid} ({conflict_name})"
807
+ ),
808
+ )
809
+ else:
810
+ litellm_cmd = build_litellm_command(
811
+ litellm_binary, litellm_port, litellm_config_path,
812
+ )
813
+
814
+ # Build env with OpenRouter key if configured
815
+ litellm_env: dict[str, str] | None = None
816
+ if openrouter_key:
817
+ litellm_env = {"OPENROUTER_API_KEY": openrouter_key}
818
+
819
+ try:
820
+ start_service(
821
+ service_name=LITELLM_SERVICE_NAME,
822
+ cmd=litellm_cmd,
823
+ port=litellm_port,
824
+ env=litellm_env,
825
+ )
826
+
827
+ # Health check LiteLLM
828
+ try:
829
+ wait_for_healthy(
830
+ port=litellm_port,
831
+ path=LITELLM_HEALTH_PATH,
832
+ )
833
+ result.litellm = TierStatus(
834
+ name=LITELLM_SERVICE_NAME,
835
+ model="proxy",
836
+ port=litellm_port,
837
+ status="healthy",
838
+ )
839
+ except HealthCheckError as exc:
840
+ result.litellm = TierStatus(
841
+ name=LITELLM_SERVICE_NAME,
842
+ model="proxy",
843
+ port=litellm_port,
844
+ status="failed",
845
+ error=str(exc),
846
+ )
847
+ except Exception as exc:
848
+ result.litellm = TierStatus(
849
+ name=LITELLM_SERVICE_NAME,
850
+ model="proxy",
851
+ port=litellm_port,
852
+ status="failed",
853
+ error=str(exc),
854
+ )
855
+
856
+ return result