mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. mlx_stack/__init__.py +5 -0
  2. mlx_stack/_version.py +24 -0
  3. mlx_stack/cli/__init__.py +5 -0
  4. mlx_stack/cli/bench.py +221 -0
  5. mlx_stack/cli/config.py +166 -0
  6. mlx_stack/cli/down.py +109 -0
  7. mlx_stack/cli/init.py +180 -0
  8. mlx_stack/cli/install.py +165 -0
  9. mlx_stack/cli/logs.py +234 -0
  10. mlx_stack/cli/main.py +187 -0
  11. mlx_stack/cli/models.py +304 -0
  12. mlx_stack/cli/profile.py +65 -0
  13. mlx_stack/cli/pull.py +134 -0
  14. mlx_stack/cli/recommend.py +397 -0
  15. mlx_stack/cli/status.py +111 -0
  16. mlx_stack/cli/up.py +163 -0
  17. mlx_stack/cli/watch.py +252 -0
  18. mlx_stack/core/__init__.py +1 -0
  19. mlx_stack/core/benchmark.py +1182 -0
  20. mlx_stack/core/catalog.py +560 -0
  21. mlx_stack/core/config.py +471 -0
  22. mlx_stack/core/deps.py +323 -0
  23. mlx_stack/core/hardware.py +304 -0
  24. mlx_stack/core/launchd.py +531 -0
  25. mlx_stack/core/litellm_gen.py +188 -0
  26. mlx_stack/core/log_rotation.py +231 -0
  27. mlx_stack/core/log_viewer.py +386 -0
  28. mlx_stack/core/models.py +639 -0
  29. mlx_stack/core/paths.py +79 -0
  30. mlx_stack/core/process.py +887 -0
  31. mlx_stack/core/pull.py +815 -0
  32. mlx_stack/core/scoring.py +611 -0
  33. mlx_stack/core/stack_down.py +317 -0
  34. mlx_stack/core/stack_init.py +524 -0
  35. mlx_stack/core/stack_status.py +229 -0
  36. mlx_stack/core/stack_up.py +856 -0
  37. mlx_stack/core/watchdog.py +744 -0
  38. mlx_stack/data/__init__.py +1 -0
  39. mlx_stack/data/catalog/__init__.py +1 -0
  40. mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  41. mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  42. mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  43. mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  44. mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  45. mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  46. mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  47. mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  48. mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  49. mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  50. mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  51. mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  52. mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  53. mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  54. mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  55. mlx_stack/py.typed +1 -0
  56. mlx_stack/utils/__init__.py +1 -0
  57. mlx_stack-0.1.0.dist-info/METADATA +397 -0
  58. mlx_stack-0.1.0.dist-info/RECORD +61 -0
  59. mlx_stack-0.1.0.dist-info/WHEEL +4 -0
  60. mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
  61. mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
mlx_stack/cli/main.py ADDED
@@ -0,0 +1,187 @@
1
+ """Main CLI entry point for mlx-stack.
2
+
3
+ Provides the top-level Click command group with --help, --version,
4
+ Rich-formatted output, and typo suggestions for unknown subcommands.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import difflib
10
+
11
+ import click
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+ from rich.text import Text
15
+
16
+ from mlx_stack import __version__
17
+ from mlx_stack.cli.bench import bench as bench_command
18
+ from mlx_stack.cli.config import config as config_group
19
+ from mlx_stack.cli.down import down as down_command
20
+ from mlx_stack.cli.init import init as init_command
21
+ from mlx_stack.cli.install import install as install_command
22
+ from mlx_stack.cli.install import uninstall as uninstall_command
23
+ from mlx_stack.cli.logs import logs as logs_command
24
+ from mlx_stack.cli.models import models as models_command
25
+ from mlx_stack.cli.profile import profile as profile_command
26
+ from mlx_stack.cli.pull import pull as pull_command
27
+ from mlx_stack.cli.recommend import recommend as recommend_command
28
+ from mlx_stack.cli.status import status as status_command
29
+ from mlx_stack.cli.up import up as up_command
30
+ from mlx_stack.cli.watch import watch as watch_command
31
+
32
+ console = Console(stderr=True)
33
+
34
+
35
+ class RichGroup(click.Group):
36
+ """Custom Click Group with Rich-formatted help and typo suggestions."""
37
+
38
+ def format_help(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
39
+ """Format help text using Rich tables."""
40
+ console_out = Console()
41
+
42
+ # Title
43
+ console_out.print()
44
+ title = Text("mlx-stack", style="bold cyan")
45
+ title.append(" — CLI control plane for local LLM infrastructure on Apple Silicon")
46
+ console_out.print(title)
47
+ console_out.print()
48
+
49
+ # Usage
50
+ usage = Text("Usage: ", style="bold") + Text("mlx-stack [OPTIONS] COMMAND [ARGS]...")
51
+ console_out.print(usage)
52
+ console_out.print()
53
+
54
+ # Options
55
+ console_out.print(Text("Options:", style="bold yellow"))
56
+ options_table = Table(show_header=False, box=None, padding=(0, 2))
57
+ options_table.add_column(style="green", min_width=20)
58
+ options_table.add_column()
59
+ options_table.add_row("--version", "Show version and exit.")
60
+ options_table.add_row("--help", "Show this message and exit.")
61
+ console_out.print(options_table)
62
+ console_out.print()
63
+
64
+ # Commands grouped by category
65
+ commands = self.list_commands(ctx)
66
+ if commands:
67
+ # Group commands by category
68
+ categories: dict[str, list[tuple[str, str]]] = {
69
+ "Setup & Configuration": [],
70
+ "Model Management": [],
71
+ "Stack Lifecycle": [],
72
+ "Diagnostics": [],
73
+ }
74
+
75
+ command_categories = {
76
+ "profile": "Setup & Configuration",
77
+ "config": "Setup & Configuration",
78
+ "init": "Setup & Configuration",
79
+ "recommend": "Model Management",
80
+ "models": "Model Management",
81
+ "pull": "Model Management",
82
+ "up": "Stack Lifecycle",
83
+ "down": "Stack Lifecycle",
84
+ "status": "Stack Lifecycle",
85
+ "watch": "Stack Lifecycle",
86
+ "install": "Stack Lifecycle",
87
+ "uninstall": "Stack Lifecycle",
88
+ "bench": "Diagnostics",
89
+ "logs": "Diagnostics",
90
+ }
91
+
92
+ for cmd_name in commands:
93
+ cmd = self.get_command(ctx, cmd_name)
94
+ if cmd is None:
95
+ continue
96
+ help_text = cmd.get_short_help_str(limit=80)
97
+ category = command_categories.get(cmd_name, "Other")
98
+ if category in categories:
99
+ categories[category].append((cmd_name, help_text))
100
+ else:
101
+ categories.setdefault("Other", []).append((cmd_name, help_text))
102
+
103
+ for category_name, cmds in categories.items():
104
+ if not cmds:
105
+ continue
106
+ console_out.print(Text(f"{category_name}:", style="bold yellow"))
107
+ cmd_table = Table(show_header=False, box=None, padding=(0, 2))
108
+ cmd_table.add_column(style="green", min_width=20)
109
+ cmd_table.add_column()
110
+ for cmd_name, help_text in cmds:
111
+ cmd_table.add_row(cmd_name, help_text)
112
+ console_out.print(cmd_table)
113
+ console_out.print()
114
+
115
+ def resolve_command(
116
+ self, ctx: click.Context, args: list[str]
117
+ ) -> tuple[str | None, click.Command | None, list[str]]:
118
+ """Override resolve_command to provide typo suggestions."""
119
+ try:
120
+ return super().resolve_command(ctx, args)
121
+ except click.UsageError:
122
+ # Get the attempted command name
123
+ if args:
124
+ cmd_name = args[0]
125
+ available = self.list_commands(ctx)
126
+ matches = difflib.get_close_matches(cmd_name, available, n=3, cutoff=0.5)
127
+
128
+ error_msg = f"Error: No such command '{cmd_name}'."
129
+ if matches:
130
+ suggestions = ", ".join(f"'{m}'" for m in matches)
131
+ error_msg += f"\n\nDid you mean one of these?\n {suggestions}"
132
+ error_msg += "\n\nRun 'mlx-stack --help' for a list of available commands."
133
+
134
+ console.print(f"[red]{error_msg}[/red]")
135
+ ctx.exit(2)
136
+ raise SystemExit(2) # noqa: B904 — we want to exit, not chain
137
+ raise
138
+
139
+
140
+ def version_callback(ctx: click.Context, _param: click.Parameter, value: bool) -> None:
141
+ """Print version and exit."""
142
+ if not value or ctx.resilient_parsing:
143
+ return
144
+ click.echo(f"mlx-stack, version {__version__}")
145
+ ctx.exit(0)
146
+
147
+
148
+ @click.group(cls=RichGroup, invoke_without_command=True)
149
+ @click.option(
150
+ "--version",
151
+ is_flag=True,
152
+ callback=version_callback,
153
+ expose_value=False,
154
+ is_eager=True,
155
+ help="Show version and exit.",
156
+ )
157
+ @click.pass_context
158
+ def cli(ctx: click.Context) -> None:
159
+ """CLI control plane for local LLM infrastructure on Apple Silicon."""
160
+ if ctx.invoked_subcommand is None:
161
+ click.echo(ctx.get_help())
162
+
163
+
164
+ # --- Placeholder commands for planned features ---
165
+ # These will be replaced by real implementations in subsequent features.
166
+
167
+
168
+ cli.add_command(profile_command, "profile")
169
+ cli.add_command(recommend_command, "recommend")
170
+ cli.add_command(init_command, "init")
171
+
172
+
173
+ cli.add_command(pull_command, "pull")
174
+ cli.add_command(models_command, "models")
175
+ cli.add_command(up_command, "up")
176
+ cli.add_command(down_command, "down")
177
+ cli.add_command(status_command, "status")
178
+
179
+
180
+ cli.add_command(watch_command, "watch")
181
+ cli.add_command(install_command, "install")
182
+ cli.add_command(uninstall_command, "uninstall")
183
+
184
+ cli.add_command(bench_command, "bench")
185
+ cli.add_command(logs_command, "logs")
186
+
187
+ cli.add_command(config_group, "config")
@@ -0,0 +1,304 @@
1
+ """CLI command for model listing — `mlx-stack models`.
2
+
3
+ Lists locally downloaded models with disk size, quantization, and source type.
4
+ Active stack models are marked with a visual indicator. The --catalog flag
5
+ shows all 15 catalog models with hardware-specific benchmark data.
6
+
7
+ Output is formatted as a Rich table with human-readable names.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import click
13
+ from rich.console import Console
14
+ from rich.table import Table
15
+ from rich.text import Text
16
+
17
+ from mlx_stack.core.catalog import (
18
+ CatalogError,
19
+ load_catalog,
20
+ query_by_capability,
21
+ query_by_family,
22
+ query_by_tag,
23
+ )
24
+ from mlx_stack.core.hardware import load_profile
25
+ from mlx_stack.core.models import (
26
+ ModelsError,
27
+ format_size,
28
+ get_models_directory,
29
+ get_remote_stack_models,
30
+ list_catalog_models,
31
+ scan_local_models,
32
+ )
33
+
34
+ console = Console(stderr=True)
35
+
36
+
37
+ # --------------------------------------------------------------------------- #
38
+ # Local models display
39
+ # --------------------------------------------------------------------------- #
40
+
41
+
42
+ def _display_local_models() -> None:
43
+ """Display locally downloaded models in a Rich table."""
44
+ out = Console()
45
+
46
+ models_dir = get_models_directory()
47
+ try:
48
+ catalog = load_catalog()
49
+ except CatalogError:
50
+ catalog = []
51
+
52
+ local_models = scan_local_models(models_dir=models_dir, catalog=catalog)
53
+ remote_models = get_remote_stack_models(local_models=local_models, catalog=catalog)
54
+
55
+ if not local_models and not remote_models:
56
+ out.print()
57
+ out.print(
58
+ "[yellow]No models found.[/yellow] "
59
+ "Run [bold]mlx-stack pull[/bold] to download a model, "
60
+ "or [bold]mlx-stack init[/bold] to set up a stack."
61
+ )
62
+ out.print()
63
+ return
64
+
65
+ out.print()
66
+ out.print(Text("Local Models", style="bold cyan"))
67
+ out.print()
68
+
69
+ if local_models:
70
+ table = Table(show_header=True, header_style="bold cyan")
71
+ table.add_column("", min_width=2) # Active indicator
72
+ table.add_column("Model", min_width=20)
73
+ table.add_column("Size", justify="right", min_width=8)
74
+ table.add_column("Quant", min_width=6)
75
+ table.add_column("Source", min_width=14)
76
+
77
+ for model in local_models:
78
+ # Active indicator
79
+ indicator = "✓" if model.is_active else ""
80
+ indicator_style = "bold green" if model.is_active else ""
81
+
82
+ # Display name: prefer catalog name, fall back to directory name
83
+ display_name = model.catalog_name if model.catalog_name else model.name
84
+
85
+ # Size
86
+ size_str = format_size(model.disk_size_bytes)
87
+
88
+ table.add_row(
89
+ Text(indicator, style=indicator_style),
90
+ display_name,
91
+ size_str,
92
+ model.quant,
93
+ model.source_type,
94
+ )
95
+
96
+ out.print(table)
97
+ else:
98
+ out.print("[dim]No local models downloaded yet.[/dim]")
99
+
100
+ # Show remote-only stack models
101
+ if remote_models:
102
+ out.print()
103
+ out.print(Text("Stack Models (not downloaded)", style="bold yellow"))
104
+ out.print()
105
+
106
+ remote_table = Table(show_header=True, header_style="bold yellow")
107
+ remote_table.add_column("", min_width=2)
108
+ remote_table.add_column("Model", min_width=20)
109
+ remote_table.add_column("Tier", min_width=10)
110
+ remote_table.add_column("Quant", min_width=6)
111
+ remote_table.add_column("Source", min_width=10)
112
+ remote_table.add_column("Est. Size", justify="right", min_width=10)
113
+
114
+ for rm in remote_models:
115
+ est_size = f"{rm['est_size_gb']:.1f} GB" if rm.get("est_size_gb") else "—"
116
+ remote_table.add_row(
117
+ Text("✓", style="bold green"),
118
+ rm["catalog_name"],
119
+ rm["tier"],
120
+ rm["quant"],
121
+ "remote",
122
+ est_size,
123
+ )
124
+
125
+ out.print(remote_table)
126
+
127
+ out.print()
128
+ out.print(f"[dim]Models directory: {models_dir}[/dim]")
129
+ if any(m.is_active for m in local_models) or remote_models:
130
+ out.print("[dim]✓ = active in current stack[/dim]")
131
+ out.print()
132
+
133
+
134
+ # --------------------------------------------------------------------------- #
135
+ # Catalog display
136
+ # --------------------------------------------------------------------------- #
137
+
138
+
139
+ def _display_catalog(
140
+ family: str | None = None,
141
+ tag: str | None = None,
142
+ tool_calling: bool = False,
143
+ ) -> None:
144
+ """Display the full model catalog with hardware-specific benchmark data.
145
+
146
+ Args:
147
+ family: Optional family name filter (case-insensitive).
148
+ tag: Optional tag filter (case-insensitive).
149
+ tool_calling: If True, filter to tool-calling-capable models only.
150
+ """
151
+ out = Console()
152
+
153
+ try:
154
+ catalog = load_catalog()
155
+ except CatalogError as exc:
156
+ console.print(f"[bold red]Error:[/bold red] Could not load catalog: {exc}")
157
+ raise SystemExit(1) from None
158
+
159
+ # Apply filters
160
+ filtered = catalog
161
+ if family:
162
+ filtered = query_by_family(filtered, family)
163
+ if tag:
164
+ filtered = query_by_tag(filtered, tag)
165
+ if tool_calling:
166
+ filtered = query_by_capability(filtered, tool_calling=True)
167
+
168
+ if not filtered:
169
+ out.print()
170
+ filter_parts: list[str] = []
171
+ if family:
172
+ filter_parts.append(f"family={family}")
173
+ if tag:
174
+ filter_parts.append(f"tag={tag}")
175
+ if tool_calling:
176
+ filter_parts.append("tool-calling")
177
+ filter_desc = ", ".join(filter_parts) if filter_parts else "filters"
178
+ out.print(
179
+ f"[yellow]No models match the given filters ({filter_desc}).[/yellow] "
180
+ "Run [bold]mlx-stack models --catalog[/bold] to see all models."
181
+ )
182
+ out.print()
183
+ return
184
+
185
+ profile = load_profile()
186
+ local_models = scan_local_models(catalog=catalog)
187
+ catalog_models = list_catalog_models(
188
+ catalog=filtered, profile=profile, local_models=local_models
189
+ )
190
+
191
+ out.print()
192
+ out.print(Text("Model Catalog", style="bold cyan"))
193
+
194
+ if profile:
195
+ out.print(f"[dim]Hardware: {profile.chip} ({profile.memory_gb} GB)[/dim]")
196
+ else:
197
+ out.print(
198
+ "[dim]No hardware profile — run 'mlx-stack profile' for hardware-specific data[/dim]"
199
+ )
200
+
201
+ out.print()
202
+
203
+ table = Table(show_header=True, header_style="bold cyan")
204
+ table.add_column("", width=1) # Local indicator
205
+ table.add_column("Name", min_width=14, no_wrap=True)
206
+ table.add_column("Family", min_width=8)
207
+ table.add_column("Params", justify="right", min_width=5)
208
+ table.add_column("Quants", min_width=10)
209
+
210
+ if profile:
211
+ table.add_column("Gen t/s", justify="right", min_width=7)
212
+ table.add_column("Mem GB", justify="right", min_width=6)
213
+
214
+ for cm in catalog_models:
215
+ # Local indicator
216
+ local_indicator = "●" if cm.is_local else ""
217
+ local_style = "bold green" if cm.is_local else ""
218
+
219
+ # Parameters
220
+ params_str = f"{cm.params_b:.1f}B" if cm.params_b >= 1.0 else f"{cm.params_b:.1f}B"
221
+
222
+ # Quantizations
223
+ quants_str = ", ".join(cm.quants)
224
+
225
+ row: list[str | Text] = [
226
+ Text(local_indicator, style=local_style),
227
+ cm.name,
228
+ cm.family,
229
+ params_str,
230
+ quants_str,
231
+ ]
232
+
233
+ if profile:
234
+ # Gen t/s
235
+ if cm.gen_tps is not None:
236
+ tps_str = f"{cm.gen_tps:.0f}"
237
+ if cm.is_estimated:
238
+ tps_str += "~"
239
+ else:
240
+ tps_str = "—"
241
+
242
+ # Memory
243
+ if cm.memory_gb is not None:
244
+ mem_str = f"{cm.memory_gb:.1f}"
245
+ if cm.is_estimated:
246
+ mem_str += "~"
247
+ else:
248
+ mem_str = "—"
249
+
250
+ row.extend([tps_str, mem_str])
251
+
252
+ table.add_row(*row)
253
+
254
+ out.print(table)
255
+
256
+ out.print()
257
+ if profile and any(cm.is_estimated for cm in catalog_models):
258
+ out.print("[dim]~ = estimated values (run 'mlx-stack bench --save' to calibrate)[/dim]")
259
+ out.print("[dim]● = available locally[/dim]")
260
+ out.print()
261
+
262
+
263
+ # --------------------------------------------------------------------------- #
264
+ # Click command
265
+ # --------------------------------------------------------------------------- #
266
+
267
+
268
+ @click.command()
269
+ @click.option("--catalog", is_flag=True, help="Show full catalog with benchmark data.")
270
+ @click.option("--family", default=None, help="Filter catalog by model family (e.g., 'qwen3.5').")
271
+ @click.option("--tag", default=None, help="Filter catalog by tag (e.g., 'agent-ready').")
272
+ @click.option(
273
+ "--tool-calling", "tool_calling", is_flag=True,
274
+ help="Filter catalog to tool-calling-capable models only.",
275
+ )
276
+ def models(
277
+ catalog: bool,
278
+ family: str | None,
279
+ tag: str | None,
280
+ tool_calling: bool,
281
+ ) -> None:
282
+ """List local models or browse the catalog.
283
+
284
+ Without flags, shows locally downloaded models with disk size,
285
+ quantization, and source type. Active stack models are marked
286
+ with a visual indicator.
287
+
288
+ Use --catalog to display all 15 catalog models with hardware-specific
289
+ benchmark data (gen_tps, memory) for your detected hardware profile.
290
+
291
+ Filter flags (--family, --tag, --tool-calling) require --catalog.
292
+ """
293
+ try:
294
+ # If filter flags are used without --catalog, enable catalog mode
295
+ if (family or tag or tool_calling) and not catalog:
296
+ catalog = True
297
+
298
+ if catalog:
299
+ _display_catalog(family=family, tag=tag, tool_calling=tool_calling)
300
+ else:
301
+ _display_local_models()
302
+ except ModelsError as exc:
303
+ console.print(f"[bold red]Error:[/bold red] {exc}")
304
+ raise SystemExit(1) from None
@@ -0,0 +1,65 @@
1
+ """CLI command for hardware detection — `mlx-stack profile`.
2
+
3
+ Detects Apple Silicon hardware, displays results as a Rich table,
4
+ and writes the profile to ~/.mlx-stack/profile.json.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import click
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from mlx_stack.core.hardware import HardwareError, detect_hardware, save_profile
14
+
15
+ console = Console(stderr=True)
16
+
17
+
18
+ @click.command()
19
+ def profile() -> None:
20
+ """Detect Apple Silicon hardware and write profile."""
21
+ try:
22
+ hw = detect_hardware()
23
+ except HardwareError as exc:
24
+ console.print(f"[bold red]Error:[/bold red] {exc}")
25
+ raise SystemExit(1) from None
26
+
27
+ # Save profile to disk
28
+ try:
29
+ save_profile(hw)
30
+ except OSError as exc:
31
+ console.print(f"[bold red]Error:[/bold red] Could not write profile: {exc}")
32
+ raise SystemExit(1) from None
33
+
34
+ # Display results as a Rich table
35
+ out = Console()
36
+ table = Table(title="Hardware Profile", show_header=True, header_style="bold cyan")
37
+ table.add_column("Property", style="bold")
38
+ table.add_column("Value")
39
+
40
+ table.add_row("Chip", hw.chip)
41
+ table.add_row("GPU Cores", str(hw.gpu_cores))
42
+ table.add_row("Unified Memory", f"{hw.memory_gb} GB")
43
+
44
+ bandwidth_str = f"{hw.bandwidth_gbps} GB/s"
45
+ if hw.is_estimate:
46
+ bandwidth_str += " (estimate)"
47
+ table.add_row("Memory Bandwidth", bandwidth_str)
48
+ table.add_row("Profile ID", hw.profile_id)
49
+
50
+ out.print()
51
+ out.print(table)
52
+
53
+ if hw.is_estimate:
54
+ out.print()
55
+ out.print(
56
+ "[yellow]⚠ Bandwidth is estimated for unknown chip.[/yellow]"
57
+ )
58
+ out.print(
59
+ " Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
60
+ )
61
+
62
+ out.print()
63
+ from mlx_stack.core.paths import get_profile_path
64
+
65
+ out.print(f"[dim]Profile saved to {get_profile_path()}[/dim]")
mlx_stack/cli/pull.py ADDED
@@ -0,0 +1,134 @@
1
+ """CLI command for model download — `mlx-stack pull`.
2
+
3
+ Downloads models from the catalog with source resolution, disk space
4
+ checking, progress display, duplicate detection, inventory tracking,
5
+ and optional post-download benchmark.
6
+
7
+ Supports --quant for quantization override, --bench for post-download
8
+ smoke test, and --force for re-downloading existing models.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import click
14
+ from rich.console import Console
15
+
16
+ from mlx_stack.core.catalog import CatalogError
17
+ from mlx_stack.core.pull import (
18
+ ConversionError,
19
+ DiskSpaceError,
20
+ DownloadError,
21
+ InvalidModelError,
22
+ PullError,
23
+ pull_model,
24
+ )
25
+
26
+ console = Console(stderr=True)
27
+
28
+
29
+ @click.command()
30
+ @click.argument("model", required=True)
31
+ @click.option(
32
+ "--quant",
33
+ type=str,
34
+ default=None,
35
+ help="Quantization level (int4, int8, bf16). Default from config.",
36
+ )
37
+ @click.option(
38
+ "--bench",
39
+ is_flag=True,
40
+ default=False,
41
+ help="Run a quick benchmark after download.",
42
+ )
43
+ @click.option(
44
+ "--force",
45
+ is_flag=True,
46
+ default=False,
47
+ help="Re-download even if model already exists.",
48
+ )
49
+ def pull(model: str, quant: str | None, bench: bool, force: bool) -> None:
50
+ """Download a model from the catalog.
51
+
52
+ MODEL is the catalog model ID (e.g., qwen3.5-8b). Use 'mlx-stack models --catalog'
53
+ to see available models.
54
+
55
+ Without --quant, uses the default quantization from config (default: int4).
56
+ Invalid quantization values are rejected with a clear error.
57
+
58
+ Downloads are checked against available disk space before starting.
59
+ Already-downloaded models are detected and skipped unless --force is used.
60
+
61
+ With --bench, runs a quick benchmark after download completes. This
62
+ auto-installs vllm-mlx if needed.
63
+ """
64
+ out = Console()
65
+
66
+ try:
67
+ result = pull_model(
68
+ model_id=model,
69
+ quant=quant,
70
+ force=force,
71
+ console=out,
72
+ )
73
+
74
+ if bench:
75
+ _run_post_download_bench(model, result.quant, out)
76
+
77
+ except InvalidModelError as exc:
78
+ console.print(f"[bold red]Error:[/bold red] {exc}")
79
+ raise SystemExit(1) from None
80
+ except DiskSpaceError as exc:
81
+ console.print(f"[bold red]Error:[/bold red] {exc}")
82
+ raise SystemExit(1) from None
83
+ except DownloadError as exc:
84
+ console.print(f"[bold red]Download error:[/bold red] {exc}")
85
+ raise SystemExit(1) from None
86
+ except ConversionError as exc:
87
+ console.print(f"[bold red]Conversion error:[/bold red] {exc}")
88
+ raise SystemExit(1) from None
89
+ except PullError as exc:
90
+ console.print(f"[bold red]Error:[/bold red] {exc}")
91
+ raise SystemExit(1) from None
92
+ except CatalogError as exc:
93
+ console.print(f"[bold red]Catalog error:[/bold red] {exc}")
94
+ raise SystemExit(1) from None
95
+
96
+
97
+ def _run_post_download_bench(model_id: str, quant: str, out: Console) -> None:
98
+ """Run a quick benchmark after downloading a model.
99
+
100
+ Auto-installs vllm-mlx if needed.
101
+
102
+ Args:
103
+ model_id: The model ID that was pulled.
104
+ quant: The quantization level.
105
+ out: Rich console for output.
106
+ """
107
+ out.print()
108
+ out.print("[bold cyan]Running post-download benchmark...[/bold cyan]")
109
+
110
+ try:
111
+ from mlx_stack.core.benchmark import BenchmarkError, run_benchmark
112
+
113
+ result = run_benchmark(target=model_id, save=True)
114
+ out.print(
115
+ f" Prompt TPS: {result.prompt_tps_mean:.1f} ± {result.prompt_tps_std:.1f} tok/s"
116
+ )
117
+ out.print(
118
+ f" Gen TPS: {result.gen_tps_mean:.1f} ± {result.gen_tps_std:.1f} tok/s"
119
+ )
120
+ out.print()
121
+ out.print(
122
+ "[dim]Results saved for use by 'recommend' and 'init' scoring.[/dim]"
123
+ )
124
+ except BenchmarkError as exc:
125
+ out.print(
126
+ f"[yellow]Benchmark failed: {exc}[/yellow]\n"
127
+ f"Run 'mlx-stack bench {model_id}' to retry."
128
+ )
129
+ except Exception as exc:
130
+ out.print(
131
+ f"[yellow]Could not run benchmark: {exc}[/yellow]\n"
132
+ "Skipping benchmark. Install vllm-mlx manually and run "
133
+ f"'mlx-stack bench {model_id}'."
134
+ )