mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. mlx_stack/__init__.py +5 -0
  2. mlx_stack/_version.py +24 -0
  3. mlx_stack/cli/__init__.py +5 -0
  4. mlx_stack/cli/bench.py +221 -0
  5. mlx_stack/cli/config.py +166 -0
  6. mlx_stack/cli/down.py +109 -0
  7. mlx_stack/cli/init.py +180 -0
  8. mlx_stack/cli/install.py +165 -0
  9. mlx_stack/cli/logs.py +234 -0
  10. mlx_stack/cli/main.py +187 -0
  11. mlx_stack/cli/models.py +304 -0
  12. mlx_stack/cli/profile.py +65 -0
  13. mlx_stack/cli/pull.py +134 -0
  14. mlx_stack/cli/recommend.py +397 -0
  15. mlx_stack/cli/status.py +111 -0
  16. mlx_stack/cli/up.py +163 -0
  17. mlx_stack/cli/watch.py +252 -0
  18. mlx_stack/core/__init__.py +1 -0
  19. mlx_stack/core/benchmark.py +1182 -0
  20. mlx_stack/core/catalog.py +560 -0
  21. mlx_stack/core/config.py +471 -0
  22. mlx_stack/core/deps.py +323 -0
  23. mlx_stack/core/hardware.py +304 -0
  24. mlx_stack/core/launchd.py +531 -0
  25. mlx_stack/core/litellm_gen.py +188 -0
  26. mlx_stack/core/log_rotation.py +231 -0
  27. mlx_stack/core/log_viewer.py +386 -0
  28. mlx_stack/core/models.py +639 -0
  29. mlx_stack/core/paths.py +79 -0
  30. mlx_stack/core/process.py +887 -0
  31. mlx_stack/core/pull.py +815 -0
  32. mlx_stack/core/scoring.py +611 -0
  33. mlx_stack/core/stack_down.py +317 -0
  34. mlx_stack/core/stack_init.py +524 -0
  35. mlx_stack/core/stack_status.py +229 -0
  36. mlx_stack/core/stack_up.py +856 -0
  37. mlx_stack/core/watchdog.py +744 -0
  38. mlx_stack/data/__init__.py +1 -0
  39. mlx_stack/data/catalog/__init__.py +1 -0
  40. mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  41. mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  42. mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  43. mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  44. mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  45. mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  46. mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  47. mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  48. mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  49. mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  50. mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  51. mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  52. mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  53. mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  54. mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  55. mlx_stack/py.typed +1 -0
  56. mlx_stack/utils/__init__.py +1 -0
  57. mlx_stack-0.1.0.dist-info/METADATA +397 -0
  58. mlx_stack-0.1.0.dist-info/RECORD +61 -0
  59. mlx_stack-0.1.0.dist-info/WHEEL +4 -0
  60. mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
  61. mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,397 @@
1
+ """CLI command for model recommendation — `mlx-stack recommend`.
2
+
3
+ Recommends an optimal model stack based on hardware profile and user intent.
4
+ Reads existing profile or auto-detects hardware. Display-only — no files written.
5
+
6
+ Supports --budget, --intent (balanced/agent-fleet), and --show-all flags.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ from typing import Any
14
+
15
+ import click
16
+ from rich.console import Console
17
+ from rich.table import Table
18
+ from rich.text import Text
19
+
20
+ from mlx_stack.core.catalog import load_catalog
21
+ from mlx_stack.core.config import ConfigCorruptError, get_value
22
+ from mlx_stack.core.hardware import (
23
+ HardwareError,
24
+ HardwareProfile,
25
+ detect_hardware,
26
+ load_profile,
27
+ )
28
+ from mlx_stack.core.paths import get_benchmarks_dir
29
+ from mlx_stack.core.scoring import (
30
+ VALID_INTENTS,
31
+ RecommendationResult,
32
+ ScoringError,
33
+ )
34
+ from mlx_stack.core.scoring import (
35
+ recommend as run_recommend,
36
+ )
37
+
38
+ console = Console(stderr=True)
39
+
40
+
41
+ # --------------------------------------------------------------------------- #
42
+ # Budget parsing
43
+ # --------------------------------------------------------------------------- #
44
+
45
+ _BUDGET_PATTERN = re.compile(r"^(\d+(?:\.\d+)?)\s*(gb|GB|Gb|gB)?$")
46
+
47
+
48
+ def parse_budget(raw: str) -> float:
49
+ """Parse a budget string like '30gb', '30GB', '30' into GB float.
50
+
51
+ Args:
52
+ raw: The raw budget string from CLI.
53
+
54
+ Returns:
55
+ Budget in GB as a float.
56
+
57
+ Raises:
58
+ click.BadParameter: If the budget format is invalid or value is non-positive.
59
+ """
60
+ match = _BUDGET_PATTERN.match(raw.strip())
61
+ if not match:
62
+ msg = (
63
+ f"Invalid budget format '{raw}'. "
64
+ f"Expected a positive number with optional 'gb' suffix (e.g., '30gb', '16')."
65
+ )
66
+ raise click.BadParameter(msg, param_hint="'--budget'")
67
+
68
+ value = float(match.group(1))
69
+ if value <= 0:
70
+ msg = (
71
+ f"Invalid budget '{raw}'. Budget must be a positive value."
72
+ )
73
+ raise click.BadParameter(msg, param_hint="'--budget'")
74
+
75
+ return value
76
+
77
+
78
+ # --------------------------------------------------------------------------- #
79
+ # Hardware profile resolution
80
+ # --------------------------------------------------------------------------- #
81
+
82
+
83
+ def _resolve_profile() -> HardwareProfile:
84
+ """Load existing profile or auto-detect hardware.
85
+
86
+ Returns:
87
+ A HardwareProfile instance.
88
+
89
+ Raises:
90
+ SystemExit: If hardware detection fails.
91
+ """
92
+ profile = load_profile()
93
+ if profile is not None:
94
+ return profile
95
+
96
+ # Auto-detect (in-memory only — recommend is display-only, no file writes)
97
+ console.print("[dim]No saved profile found — detecting hardware...[/dim]")
98
+ try:
99
+ profile = detect_hardware()
100
+ return profile
101
+ except HardwareError as exc:
102
+ console.print(f"[bold red]Error:[/bold red] {exc}")
103
+ raise SystemExit(1) from None
104
+
105
+
106
+ # --------------------------------------------------------------------------- #
107
+ # Saved benchmarks loading
108
+ # --------------------------------------------------------------------------- #
109
+
110
+
111
+ def _load_saved_benchmarks(profile_id: str) -> dict[str, Any] | None:
112
+ """Load saved benchmark data for the given profile, if available.
113
+
114
+ Reads from ~/.mlx-stack/benchmarks/<profile_id>.json.
115
+
116
+ Args:
117
+ profile_id: The hardware profile ID.
118
+
119
+ Returns:
120
+ Dict mapping model_id -> benchmark data, or None if no data.
121
+ """
122
+ benchmarks_dir = get_benchmarks_dir()
123
+ benchmark_file = benchmarks_dir / f"{profile_id}.json"
124
+
125
+ if not benchmark_file.exists():
126
+ return None
127
+
128
+ try:
129
+ data = json.loads(benchmark_file.read_text(encoding="utf-8"))
130
+ if isinstance(data, dict):
131
+ return data
132
+ except (json.JSONDecodeError, OSError):
133
+ console.print(
134
+ f"[yellow]⚠ Warning:[/yellow] Could not parse saved benchmarks "
135
+ f"at {benchmark_file}. Falling back to catalog data."
136
+ )
137
+
138
+ return None
139
+
140
+
141
+ # --------------------------------------------------------------------------- #
142
+ # Display helpers
143
+ # --------------------------------------------------------------------------- #
144
+
145
+
146
+ def _format_tps(tps: float, is_estimated: bool) -> str:
147
+ """Format tokens per second with optional estimated label."""
148
+ formatted = f"{tps:.1f} tok/s"
149
+ if is_estimated:
150
+ formatted += " (est.)"
151
+ return formatted
152
+
153
+
154
+ def _format_memory(memory_gb: float) -> str:
155
+ """Format memory usage in GB."""
156
+ return f"{memory_gb:.1f} GB"
157
+
158
+
159
+ def _display_tier_table(result: RecommendationResult) -> None:
160
+ """Display the recommended tiers as a Rich table."""
161
+ out = Console()
162
+
163
+ out.print()
164
+ title = Text("Recommended Stack", style="bold cyan")
165
+ title.append(f" ({result.intent})")
166
+ out.print(title)
167
+ out.print(
168
+ f"[dim]Hardware: {result.hardware_profile.chip} "
169
+ f"({result.hardware_profile.memory_gb} GB) · "
170
+ f"Budget: {result.memory_budget_gb:.1f} GB[/dim]"
171
+ )
172
+ out.print()
173
+
174
+ table = Table(show_header=True, header_style="bold cyan")
175
+ table.add_column("Tier", style="bold", min_width=10)
176
+ table.add_column("Model", min_width=20)
177
+ table.add_column("Quant", min_width=6)
178
+ table.add_column("Gen TPS", justify="right", min_width=15)
179
+ table.add_column("Memory", justify="right", min_width=10)
180
+
181
+ for tier_assign in result.tiers:
182
+ table.add_row(
183
+ tier_assign.tier,
184
+ tier_assign.model.entry.name,
185
+ tier_assign.quant,
186
+ _format_tps(tier_assign.model.gen_tps, tier_assign.model.is_estimated),
187
+ _format_memory(tier_assign.model.memory_gb),
188
+ )
189
+
190
+ out.print(table)
191
+
192
+ # Cloud fallback row if OpenRouter key is configured
193
+ try:
194
+ openrouter_key = get_value("openrouter-key")
195
+ except (ConfigCorruptError, Exception):
196
+ openrouter_key = ""
197
+
198
+ if openrouter_key:
199
+ out.print()
200
+ out.print(
201
+ "[bold green]☁ Cloud Fallback[/bold green] "
202
+ "Premium tier via OpenRouter (GPT-4o / Claude Sonnet)"
203
+ )
204
+
205
+ # Estimated warning
206
+ has_estimates = any(t.model.is_estimated for t in result.tiers)
207
+ if has_estimates:
208
+ out.print()
209
+ out.print(
210
+ "[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]"
211
+ )
212
+ out.print(
213
+ " Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
214
+ )
215
+
216
+ out.print()
217
+ out.print("[dim]This is a recommendation only — no files were written.[/dim]")
218
+ out.print("[dim]Run [bold]mlx-stack init[/bold] to generate stack configuration.[/dim]")
219
+
220
+
221
+ def _display_all_models(result: RecommendationResult) -> None:
222
+ """Display all budget-fitting models sorted by composite score."""
223
+ out = Console()
224
+
225
+ out.print()
226
+ title = Text("All Budget-Fitting Models", style="bold cyan")
227
+ title.append(f" ({result.intent})")
228
+ out.print(title)
229
+ out.print(
230
+ f"[dim]Hardware: {result.hardware_profile.chip} "
231
+ f"({result.hardware_profile.memory_gb} GB) · "
232
+ f"Budget: {result.memory_budget_gb:.1f} GB[/dim]"
233
+ )
234
+ out.print()
235
+
236
+ table = Table(show_header=True, header_style="bold cyan")
237
+ table.add_column("#", justify="right", style="dim", min_width=3)
238
+ table.add_column("Model", min_width=20)
239
+ table.add_column("Family", min_width=10)
240
+ table.add_column("Params", justify="right", min_width=8)
241
+ table.add_column("Score", justify="right", min_width=8)
242
+ table.add_column("Gen TPS", justify="right", min_width=15)
243
+ table.add_column("Memory", justify="right", min_width=10)
244
+
245
+ for idx, scored in enumerate(result.all_scored, 1):
246
+ table.add_row(
247
+ str(idx),
248
+ scored.entry.name,
249
+ scored.entry.family,
250
+ f"{scored.entry.params_b:.1f}B",
251
+ f"{scored.composite_score:.3f}",
252
+ _format_tps(scored.gen_tps, scored.is_estimated),
253
+ _format_memory(scored.memory_gb),
254
+ )
255
+
256
+ out.print(table)
257
+ out.print()
258
+ count = len(result.all_scored)
259
+ budget = f"{result.memory_budget_gb:.1f}"
260
+ out.print(f"[dim]{count} models fit within the {budget} GB budget.[/dim]")
261
+
262
+ # Cloud fallback note
263
+ try:
264
+ openrouter_key = get_value("openrouter-key")
265
+ except (ConfigCorruptError, Exception):
266
+ openrouter_key = ""
267
+
268
+ if openrouter_key:
269
+ out.print()
270
+ out.print(
271
+ "[bold green]☁ Cloud Fallback[/bold green] "
272
+ "Premium tier via OpenRouter also available."
273
+ )
274
+
275
+ # Estimated warning
276
+ has_estimates = any(m.is_estimated for m in result.all_scored)
277
+ if has_estimates:
278
+ out.print()
279
+ out.print(
280
+ "[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]"
281
+ )
282
+ out.print(
283
+ " Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
284
+ )
285
+
286
+ out.print()
287
+ out.print("[dim]This is a recommendation only — no files were written.[/dim]")
288
+
289
+
290
+ # --------------------------------------------------------------------------- #
291
+ # Click command
292
+ # --------------------------------------------------------------------------- #
293
+
294
+
295
+ @click.command()
296
+ @click.option(
297
+ "--budget",
298
+ type=str,
299
+ default=None,
300
+ help="Memory budget override (e.g., '30gb', '16'). Defaults to 40%% of unified memory.",
301
+ )
302
+ @click.option(
303
+ "--intent",
304
+ type=str,
305
+ default=None,
306
+ help="Recommendation intent: balanced (default) or agent-fleet.",
307
+ )
308
+ @click.option(
309
+ "--show-all",
310
+ is_flag=True,
311
+ default=False,
312
+ help="Show all budget-fitting models sorted by score instead of tier assignments.",
313
+ )
314
+ def recommend(budget: str | None, intent: str | None, show_all: bool) -> None:
315
+ """Recommend an optimal model stack for your hardware.
316
+
317
+ Analyzes your hardware profile and the model catalog to recommend
318
+ an optimal stack with tier assignments (standard, fast, longctx).
319
+
320
+ Uses 40% of unified memory as the default budget. Override with --budget.
321
+ Supports --intent to change optimization strategy (balanced or agent-fleet).
322
+ Use --show-all to see all budget-fitting models ranked by composite score.
323
+
324
+ This command is display-only — no configuration files are written.
325
+ """
326
+ # --- Validate intent ---
327
+ if intent is None:
328
+ intent = "balanced"
329
+ elif intent not in VALID_INTENTS:
330
+ valid = ", ".join(sorted(VALID_INTENTS))
331
+ console.print(
332
+ f"[bold red]Error:[/bold red] Invalid intent '{intent}'. "
333
+ f"Valid intents: {valid}"
334
+ )
335
+ raise SystemExit(1)
336
+
337
+ # --- Parse budget ---
338
+ budget_gb_override: float | None = None
339
+ if budget is not None:
340
+ try:
341
+ budget_gb_override = parse_budget(budget)
342
+ except click.BadParameter as exc:
343
+ console.print(f"[bold red]Error:[/bold red] {exc.format_message()}")
344
+ raise SystemExit(1) from None
345
+
346
+ # --- Resolve hardware profile ---
347
+ profile = _resolve_profile()
348
+
349
+ # --- Read memory-budget-pct from config (used when no --budget override) ---
350
+ budget_pct = 40
351
+ if budget_gb_override is None:
352
+ try:
353
+ budget_pct = int(get_value("memory-budget-pct"))
354
+ except (ConfigCorruptError, ValueError):
355
+ budget_pct = 40
356
+
357
+ # --- Load catalog ---
358
+ try:
359
+ catalog = load_catalog()
360
+ except Exception as exc:
361
+ console.print(f"[bold red]Error:[/bold red] Could not load model catalog: {exc}")
362
+ raise SystemExit(1) from None
363
+
364
+ # --- Load saved benchmarks ---
365
+ saved_benchmarks = _load_saved_benchmarks(profile.profile_id)
366
+
367
+ # --- Run recommendation ---
368
+ try:
369
+ result = run_recommend(
370
+ catalog=catalog,
371
+ profile=profile,
372
+ intent=intent,
373
+ budget_pct=budget_pct,
374
+ budget_gb_override=budget_gb_override,
375
+ saved_benchmarks=saved_benchmarks,
376
+ )
377
+ except ScoringError as exc:
378
+ console.print(f"[bold red]Error:[/bold red] {exc}")
379
+ raise SystemExit(1) from None
380
+
381
+ # --- Check for zero results ---
382
+ if not result.all_scored:
383
+ console.print(
384
+ f"[bold red]Error:[/bold red] No models fit within the "
385
+ f"{result.memory_budget_gb:.1f} GB budget."
386
+ )
387
+ console.print(
388
+ "[dim]Try increasing the budget with --budget or "
389
+ "adjusting memory-budget-pct in config.[/dim]"
390
+ )
391
+ raise SystemExit(1)
392
+
393
+ # --- Display results ---
394
+ if show_all:
395
+ _display_all_models(result)
396
+ else:
397
+ _display_tier_table(result)
@@ -0,0 +1,111 @@
1
+ """CLI command for service status — `mlx-stack status`.
2
+
3
+ Displays the health and metrics for all managed services in a
4
+ formatted Rich table or as JSON (with --json). Read-only: does not
5
+ modify any files or acquire the lockfile.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+
12
+ import click
13
+ from rich.console import Console
14
+ from rich.table import Table
15
+ from rich.text import Text
16
+
17
+ from mlx_stack.core.stack_status import StatusResult, run_status, status_to_dict
18
+
19
+ console = Console(stderr=True)
20
+
21
+ # Status display styling — maps state to Rich markup
22
+ _STATUS_STYLES: dict[str, str] = {
23
+ "healthy": "[bold green]healthy[/bold green]",
24
+ "degraded": "[bold yellow]degraded[/bold yellow]",
25
+ "down": "[bold red]down[/bold red]",
26
+ "crashed": "[bold red]crashed[/bold red]",
27
+ "stopped": "[dim]stopped[/dim]",
28
+ }
29
+
30
+
31
+ def _display_table(result: StatusResult) -> None:
32
+ """Display service statuses as a Rich table.
33
+
34
+ Columns: Tier, Model, Port, Status, Uptime.
35
+
36
+ Args:
37
+ result: The StatusResult to display.
38
+ """
39
+ out = Console()
40
+ out.print()
41
+
42
+ table = Table(
43
+ title="Service Status",
44
+ show_header=True,
45
+ header_style="bold cyan",
46
+ )
47
+ table.add_column("Tier", style="bold", min_width=12)
48
+ table.add_column("Model", min_width=20)
49
+ table.add_column("Port", justify="right", min_width=6)
50
+ table.add_column("Status", min_width=10)
51
+ table.add_column("Uptime", justify="right", min_width=10)
52
+
53
+ for svc in result.services:
54
+ status_display = _STATUS_STYLES.get(svc.status, svc.status)
55
+ table.add_row(
56
+ svc.tier,
57
+ svc.model,
58
+ str(svc.port),
59
+ status_display,
60
+ svc.uptime_display,
61
+ )
62
+
63
+ out.print(table)
64
+ out.print()
65
+
66
+
67
+ def _display_json(result: StatusResult) -> None:
68
+ """Display service statuses as JSON to stdout.
69
+
70
+ Args:
71
+ result: The StatusResult to display.
72
+ """
73
+ data = status_to_dict(result)
74
+ click.echo(json.dumps(data, indent=2))
75
+
76
+
77
+ @click.command()
78
+ @click.option("--json", "json_output", is_flag=True, help="Output in JSON format.")
79
+ def status(json_output: bool) -> None:
80
+ """Show health and status of all services.
81
+
82
+ Reports the current state of each managed service: healthy, degraded,
83
+ down, crashed, or stopped. Displays a formatted table by default, or
84
+ valid JSON with --json.
85
+
86
+ This command is read-only and safe to run concurrently with other
87
+ mlx-stack commands.
88
+ """
89
+ result = run_status()
90
+
91
+ # Handle no-stack scenario
92
+ if result.no_stack:
93
+ if json_output:
94
+ _display_json(result)
95
+ else:
96
+ out = Console()
97
+ out.print()
98
+ out.print(
99
+ Text(
100
+ result.message or "No stack configured — run 'mlx-stack init'.",
101
+ style="yellow",
102
+ )
103
+ )
104
+ out.print()
105
+ return
106
+
107
+ # Display results
108
+ if json_output:
109
+ _display_json(result)
110
+ else:
111
+ _display_table(result)
mlx_stack/cli/up.py ADDED
@@ -0,0 +1,163 @@
1
+ """CLI command for starting services — `mlx-stack up`.
2
+
3
+ Starts all services defined in the active stack, or a single tier
4
+ with --tier. Supports --dry-run to preview commands without executing.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import click
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+ from rich.text import Text
13
+
14
+ from mlx_stack.core.process import LockError
15
+ from mlx_stack.core.stack_up import UpError, UpResult, run_up
16
+
17
+ console = Console(stderr=True)
18
+
19
+
20
+ def _display_dry_run(result: UpResult) -> None:
21
+ """Display dry-run commands.
22
+
23
+ Shows the exact shell commands that would be executed for each
24
+ vllm-mlx instance and LiteLLM without actually running them.
25
+
26
+ Args:
27
+ result: The UpResult from the dry-run.
28
+ """
29
+ out = Console()
30
+ out.print()
31
+ out.print(Text("Dry run — commands that would be executed:", style="bold cyan"))
32
+ out.print()
33
+
34
+ for cmd_info in result.dry_run_commands:
35
+ service = cmd_info["service"]
36
+ command = cmd_info["command"]
37
+ svc_type = cmd_info["type"]
38
+
39
+ label = f"[bold]{service}[/bold]" if svc_type == "vllm-mlx" else "[bold]litellm[/bold]"
40
+ out.print(f" {label}:")
41
+ out.print(f" [green]{command}[/green]")
42
+ out.print()
43
+
44
+
45
+ def _display_summary(result: UpResult) -> None:
46
+ """Display a summary table of service statuses.
47
+
48
+ Shows tier name, model, port, and status for each service plus
49
+ LiteLLM.
50
+
51
+ Args:
52
+ result: The UpResult from startup.
53
+ """
54
+ out = Console()
55
+ out.print()
56
+
57
+ if result.already_running:
58
+ out.print(
59
+ Text("All services are already running.", style="bold yellow")
60
+ )
61
+ out.print()
62
+
63
+ # Warnings
64
+ for warning in result.warnings:
65
+ out.print(f"[yellow]⚠ {warning}[/yellow]")
66
+
67
+ if result.warnings:
68
+ out.print()
69
+
70
+ # Summary table
71
+ table = Table(
72
+ title="Service Summary",
73
+ show_header=True,
74
+ header_style="bold cyan",
75
+ )
76
+ table.add_column("Service", style="bold", min_width=12)
77
+ table.add_column("Model", min_width=20)
78
+ table.add_column("Port", justify="right", min_width=6)
79
+ table.add_column("Status", min_width=10)
80
+
81
+ # Status styling
82
+ status_styles = {
83
+ "healthy": "[bold green]healthy[/bold green]",
84
+ "already-running": "[bold green]already-running[/bold green]",
85
+ "failed": "[bold red]failed[/bold red]",
86
+ "skipped": "[yellow]skipped[/yellow]",
87
+ "dry-run": "[cyan]dry-run[/cyan]",
88
+ }
89
+
90
+ for tier in result.tiers:
91
+ status_display = status_styles.get(tier.status, tier.status)
92
+ if tier.error:
93
+ status_display += f"\n[dim]{tier.error}[/dim]"
94
+ table.add_row(
95
+ tier.name,
96
+ tier.model,
97
+ str(tier.port),
98
+ status_display,
99
+ )
100
+
101
+ # LiteLLM row
102
+ if result.litellm:
103
+ litellm = result.litellm
104
+ status_display = status_styles.get(litellm.status, litellm.status)
105
+ if litellm.error:
106
+ status_display += f"\n[dim]{litellm.error}[/dim]"
107
+ table.add_row(
108
+ litellm.name,
109
+ litellm.model,
110
+ str(litellm.port),
111
+ status_display,
112
+ )
113
+
114
+ out.print(table)
115
+ out.print()
116
+
117
+ # Next steps for healthy stacks
118
+ any_healthy = any(
119
+ t.status in ("healthy", "already-running") for t in result.tiers
120
+ )
121
+ if any_healthy:
122
+ litellm_port = result.litellm.port if result.litellm else 4000
123
+ out.print(
124
+ f"[dim]Endpoint: http://localhost:{litellm_port}/v1[/dim]"
125
+ )
126
+ out.print()
127
+
128
+
129
+ @click.command()
130
+ @click.option("--dry-run", is_flag=True, help="Show commands without executing.")
131
+ @click.option("--tier", "tier_filter", type=str, help="Start only the specified tier.")
132
+ def up(dry_run: bool, tier_filter: str | None) -> None:
133
+ """Start all services in the active stack.
134
+
135
+ Reads the stack definition from ~/.mlx-stack/stacks/default.yaml
136
+ and starts one vllm-mlx subprocess per tier plus a LiteLLM proxy.
137
+
138
+ Use --dry-run to preview the exact commands without starting anything.
139
+ Use --tier to start only a specific tier (plus LiteLLM if needed).
140
+ """
141
+ try:
142
+ result = run_up(
143
+ dry_run=dry_run,
144
+ tier_filter=tier_filter,
145
+ )
146
+ except UpError as exc:
147
+ console.print(f"[bold red]Error:[/bold red] {exc}")
148
+ raise SystemExit(1) from None
149
+ except LockError as exc:
150
+ console.print(f"[bold red]Error:[/bold red] {exc}")
151
+ raise SystemExit(1) from None
152
+
153
+ if result.dry_run:
154
+ _display_dry_run(result)
155
+ else:
156
+ _display_summary(result)
157
+
158
+ # Exit with non-zero if all tiers failed
159
+ any_success = any(
160
+ t.status in ("healthy", "already-running", "dry-run") for t in result.tiers
161
+ )
162
+ if not any_success and not result.dry_run:
163
+ raise SystemExit(1)