mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. mlx_stack/__init__.py +5 -0
  2. mlx_stack/_version.py +24 -0
  3. mlx_stack/cli/__init__.py +5 -0
  4. mlx_stack/cli/bench.py +221 -0
  5. mlx_stack/cli/config.py +166 -0
  6. mlx_stack/cli/down.py +109 -0
  7. mlx_stack/cli/init.py +180 -0
  8. mlx_stack/cli/install.py +165 -0
  9. mlx_stack/cli/logs.py +234 -0
  10. mlx_stack/cli/main.py +187 -0
  11. mlx_stack/cli/models.py +304 -0
  12. mlx_stack/cli/profile.py +65 -0
  13. mlx_stack/cli/pull.py +134 -0
  14. mlx_stack/cli/recommend.py +397 -0
  15. mlx_stack/cli/status.py +111 -0
  16. mlx_stack/cli/up.py +163 -0
  17. mlx_stack/cli/watch.py +252 -0
  18. mlx_stack/core/__init__.py +1 -0
  19. mlx_stack/core/benchmark.py +1182 -0
  20. mlx_stack/core/catalog.py +560 -0
  21. mlx_stack/core/config.py +471 -0
  22. mlx_stack/core/deps.py +323 -0
  23. mlx_stack/core/hardware.py +304 -0
  24. mlx_stack/core/launchd.py +531 -0
  25. mlx_stack/core/litellm_gen.py +188 -0
  26. mlx_stack/core/log_rotation.py +231 -0
  27. mlx_stack/core/log_viewer.py +386 -0
  28. mlx_stack/core/models.py +639 -0
  29. mlx_stack/core/paths.py +79 -0
  30. mlx_stack/core/process.py +887 -0
  31. mlx_stack/core/pull.py +815 -0
  32. mlx_stack/core/scoring.py +611 -0
  33. mlx_stack/core/stack_down.py +317 -0
  34. mlx_stack/core/stack_init.py +524 -0
  35. mlx_stack/core/stack_status.py +229 -0
  36. mlx_stack/core/stack_up.py +856 -0
  37. mlx_stack/core/watchdog.py +744 -0
  38. mlx_stack/data/__init__.py +1 -0
  39. mlx_stack/data/catalog/__init__.py +1 -0
  40. mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  41. mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  42. mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  43. mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  44. mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  45. mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  46. mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  47. mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  48. mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  49. mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  50. mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  51. mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  52. mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  53. mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  54. mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  55. mlx_stack/py.typed +1 -0
  56. mlx_stack/utils/__init__.py +1 -0
  57. mlx_stack-0.1.0.dist-info/METADATA +397 -0
  58. mlx_stack-0.1.0.dist-info/RECORD +61 -0
  59. mlx_stack-0.1.0.dist-info/WHEEL +4 -0
  60. mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
  61. mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,524 @@
1
+ """Stack initialization logic for mlx-stack.
2
+
3
+ Generates stack definition YAML and LiteLLM config files from a
4
+ recommendation result. Handles port allocation, vllm_flags generation,
5
+ cloud fallback, missing model detection, and overwrite protection.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import socket
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ import yaml
16
+
17
+ from mlx_stack.core.catalog import CatalogEntry, get_entry_by_id, load_catalog
18
+ from mlx_stack.core.config import ConfigCorruptError, get_value
19
+ from mlx_stack.core.hardware import detect_hardware, load_profile, save_profile
20
+ from mlx_stack.core.litellm_gen import generate_litellm_config, render_litellm_yaml
21
+ from mlx_stack.core.paths import ensure_data_home, get_data_home, get_stacks_dir
22
+ from mlx_stack.core.scoring import (
23
+ VALID_INTENTS,
24
+ RecommendationResult,
25
+ ScoringError,
26
+ TierAssignment,
27
+ )
28
+ from mlx_stack.core.scoring import recommend as run_recommend
29
+
30
+ # --------------------------------------------------------------------------- #
31
+ # Constants
32
+ # --------------------------------------------------------------------------- #
33
+
34
+ # Default starting port for vllm-mlx instances
35
+ _VLLM_BASE_PORT = 8000
36
+
37
+ # Schema version for stack definition files
38
+ STACK_SCHEMA_VERSION = 1
39
+
40
+ # Default stack name
41
+ DEFAULT_STACK_NAME = "default"
42
+
43
+
44
+ # --------------------------------------------------------------------------- #
45
+ # Exceptions
46
+ # --------------------------------------------------------------------------- #
47
+
48
+
49
+ class InitError(Exception):
50
+ """Raised when stack initialization fails."""
51
+
52
+
53
+ # --------------------------------------------------------------------------- #
54
+ # Port allocation
55
+ # --------------------------------------------------------------------------- #
56
+
57
+
58
+ def _is_port_available(port: int) -> bool:
59
+ """Check if a TCP port is available for binding.
60
+
61
+ Args:
62
+ port: The port number to check.
63
+
64
+ Returns:
65
+ True if the port is available, False otherwise.
66
+ """
67
+ try:
68
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
69
+ sock.settimeout(1)
70
+ sock.bind(("127.0.0.1", port))
71
+ return True
72
+ except OSError:
73
+ return False
74
+
75
+
76
+ def allocate_ports(
77
+ num_tiers: int,
78
+ litellm_port: int = 4000,
79
+ base_port: int = _VLLM_BASE_PORT,
80
+ ) -> list[int]:
81
+ """Allocate unique ports for vllm-mlx instances.
82
+
83
+ Ensures no port conflicts with the LiteLLM port and skips ports
84
+ that are already in use (detected via socket binding). Selects
85
+ deterministic alternates by incrementing the port number.
86
+
87
+ Args:
88
+ num_tiers: Number of tiers needing ports.
89
+ litellm_port: The LiteLLM proxy port to avoid.
90
+ base_port: Starting port for allocation.
91
+
92
+ Returns:
93
+ List of unique port numbers, one per tier.
94
+
95
+ Raises:
96
+ InitError: If not enough ports can be allocated within a
97
+ reasonable range (base_port .. base_port + 100).
98
+ """
99
+ ports: list[int] = []
100
+ port = base_port
101
+ max_port = base_port + 100 # Safety limit to prevent infinite loops
102
+
103
+ for _ in range(num_tiers):
104
+ # Skip the LiteLLM port and ports already in use
105
+ while port == litellm_port or not _is_port_available(port):
106
+ port += 1
107
+ if port > max_port:
108
+ msg = (
109
+ f"Could not allocate {num_tiers} free ports starting "
110
+ f"from {base_port}. All ports in range "
111
+ f"{base_port}–{max_port} are in use or reserved."
112
+ )
113
+ raise InitError(msg)
114
+ ports.append(port)
115
+ port += 1
116
+
117
+ return ports
118
+
119
+
120
+ # --------------------------------------------------------------------------- #
121
+ # vllm flags generation
122
+ # --------------------------------------------------------------------------- #
123
+
124
+
125
+ def build_vllm_flags(entry: CatalogEntry) -> dict[str, Any]:
126
+ """Build vllm_flags for a model based on its catalog capabilities.
127
+
128
+ All models get:
129
+ - continuous_batching: true
130
+ - use_paged_cache: true
131
+
132
+ Tool-calling models additionally get:
133
+ - enable_auto_tool_choice: true
134
+ - tool_call_parser: <parser from catalog>
135
+
136
+ Thinking models additionally get:
137
+ - reasoning_parser: <parser from catalog>
138
+
139
+ Args:
140
+ entry: The catalog entry for the model.
141
+
142
+ Returns:
143
+ A dict of vllm flags.
144
+ """
145
+ flags: dict[str, Any] = {
146
+ "continuous_batching": True,
147
+ "use_paged_cache": True,
148
+ }
149
+
150
+ if entry.capabilities.tool_calling:
151
+ flags["enable_auto_tool_choice"] = True
152
+ if entry.capabilities.tool_call_parser:
153
+ flags["tool_call_parser"] = entry.capabilities.tool_call_parser
154
+
155
+ if entry.capabilities.thinking and entry.capabilities.reasoning_parser:
156
+ flags["reasoning_parser"] = entry.capabilities.reasoning_parser
157
+
158
+ return flags
159
+
160
+
161
+ # --------------------------------------------------------------------------- #
162
+ # Stack definition generation
163
+ # --------------------------------------------------------------------------- #
164
+
165
+
166
+ def _build_tier_entry(
167
+ assignment: TierAssignment,
168
+ port: int,
169
+ catalog: list[CatalogEntry],
170
+ ) -> dict[str, Any]:
171
+ """Build a single tier entry for the stack definition.
172
+
173
+ Args:
174
+ assignment: The tier assignment from the scoring engine.
175
+ port: The allocated port for this tier.
176
+ catalog: The full catalog (for capability lookup).
177
+
178
+ Returns:
179
+ A dict representing the tier in the stack YAML.
180
+ """
181
+ entry = assignment.model.entry
182
+ quant = assignment.quant
183
+
184
+ # Get the source HF repo for this quant
185
+ source = ""
186
+ if quant in entry.sources:
187
+ source = entry.sources[quant].hf_repo
188
+
189
+ return {
190
+ "name": assignment.tier,
191
+ "model": entry.id,
192
+ "quant": quant,
193
+ "source": source,
194
+ "port": port,
195
+ "vllm_flags": build_vllm_flags(entry),
196
+ }
197
+
198
+
199
+ def generate_stack_definition(
200
+ recommendation: RecommendationResult,
201
+ ports: list[int],
202
+ catalog: list[CatalogEntry],
203
+ stack_name: str = DEFAULT_STACK_NAME,
204
+ cloud_fallback: dict[str, Any] | None = None,
205
+ ) -> dict[str, Any]:
206
+ """Generate a stack definition YAML structure.
207
+
208
+ Args:
209
+ recommendation: The recommendation result with tier assignments.
210
+ ports: Allocated ports, one per tier.
211
+ catalog: The full catalog for capability lookups.
212
+ stack_name: Name of the stack (default: 'default').
213
+ cloud_fallback: Optional cloud fallback configuration.
214
+
215
+ Returns:
216
+ A dict representing the full stack definition.
217
+
218
+ Raises:
219
+ InitError: If generation fails.
220
+ """
221
+ if len(ports) != len(recommendation.tiers):
222
+ msg = f"Port count ({len(ports)}) doesn't match tier count ({len(recommendation.tiers)})"
223
+ raise InitError(msg)
224
+
225
+ tiers: list[dict[str, Any]] = []
226
+ for assignment, port in zip(recommendation.tiers, ports):
227
+ tiers.append(_build_tier_entry(assignment, port, catalog))
228
+
229
+ stack: dict[str, Any] = {
230
+ "schema_version": STACK_SCHEMA_VERSION,
231
+ "name": stack_name,
232
+ "hardware_profile": recommendation.hardware_profile.profile_id,
233
+ "intent": recommendation.intent,
234
+ "created": datetime.now(timezone.utc).isoformat(),
235
+ "tiers": tiers,
236
+ }
237
+
238
+ if cloud_fallback:
239
+ stack["cloud_fallback"] = cloud_fallback
240
+
241
+ return stack
242
+
243
+
244
+ # --------------------------------------------------------------------------- #
245
+ # Missing model detection
246
+ # --------------------------------------------------------------------------- #
247
+
248
+
249
+ def detect_missing_models(
250
+ tiers: list[dict[str, Any]],
251
+ models_dir: Path | None = None,
252
+ ) -> list[str]:
253
+ """Detect models referenced in the stack that are not locally available.
254
+
255
+ Args:
256
+ tiers: List of tier entries from the stack definition.
257
+ models_dir: The models directory to check. If None, uses config.
258
+
259
+ Returns:
260
+ List of model IDs that are not found locally.
261
+ """
262
+ if models_dir is None:
263
+ try:
264
+ models_dir = Path(str(get_value("model-dir"))).expanduser()
265
+ except (ConfigCorruptError, Exception):
266
+ models_dir = get_data_home() / "models"
267
+
268
+ missing: list[str] = []
269
+ for tier in tiers:
270
+ model_id = tier["model"]
271
+ # Check if model directory exists (simple heuristic)
272
+ # Models would be stored in subdirectories matching the source repo pattern
273
+ # or the model ID
274
+ model_path = models_dir / model_id
275
+ source = tier.get("source", "")
276
+ # Also check by HF repo name (directory name from hf_repo)
277
+ source_dir_name = source.rsplit("/", 1)[-1] if "/" in source else source
278
+ source_path = models_dir / source_dir_name if source_dir_name else None
279
+
280
+ if not model_path.exists() and (source_path is None or not source_path.exists()):
281
+ missing.append(model_id)
282
+
283
+ return missing
284
+
285
+
286
+ # --------------------------------------------------------------------------- #
287
+ # Main init entry point
288
+ # --------------------------------------------------------------------------- #
289
+
290
+
291
+ def run_init(
292
+ intent: str = "balanced",
293
+ budget_pct: int | None = None,
294
+ add_models: list[str] | None = None,
295
+ remove_tiers: list[str] | None = None,
296
+ force: bool = False,
297
+ stack_name: str = DEFAULT_STACK_NAME,
298
+ ) -> dict[str, Any]:
299
+ """Run the full init flow: profile -> recommend -> generate configs.
300
+
301
+ Args:
302
+ intent: Recommendation intent (balanced or agent-fleet).
303
+ budget_pct: Memory budget percentage override (uses config default if None).
304
+ add_models: Additional model IDs to add as tiers.
305
+ remove_tiers: Tier names to remove from recommendation.
306
+ force: Whether to overwrite existing stack files.
307
+ stack_name: Name for the stack definition.
308
+
309
+ Returns:
310
+ A dict with keys:
311
+ - stack_path: Path to the generated stack YAML
312
+ - litellm_path: Path to the generated LiteLLM config
313
+ - stack: The stack definition dict
314
+ - litellm_config: The LiteLLM config dict
315
+ - missing_models: List of models not found locally
316
+
317
+ Raises:
318
+ InitError: If initialization fails.
319
+ """
320
+ # --- Validate intent ---
321
+ if intent not in VALID_INTENTS:
322
+ valid = ", ".join(sorted(VALID_INTENTS))
323
+ msg = f"Invalid intent '{intent}'. Valid intents: {valid}"
324
+ raise InitError(msg)
325
+
326
+ # --- Resolve hardware profile ---
327
+ profile = load_profile()
328
+ if profile is None:
329
+ try:
330
+ profile = detect_hardware()
331
+ save_profile(profile)
332
+ except Exception as exc:
333
+ msg = f"Hardware detection failed: {exc}"
334
+ raise InitError(msg) from None
335
+
336
+ # --- Read config values ---
337
+ try:
338
+ litellm_port = int(get_value("litellm-port"))
339
+ except (ConfigCorruptError, ValueError):
340
+ litellm_port = 4000
341
+
342
+ if budget_pct is None:
343
+ try:
344
+ budget_pct = int(get_value("memory-budget-pct"))
345
+ except (ConfigCorruptError, ValueError):
346
+ budget_pct = 40
347
+
348
+ try:
349
+ openrouter_key = str(get_value("openrouter-key"))
350
+ except (ConfigCorruptError, Exception):
351
+ openrouter_key = ""
352
+
353
+ # --- Check for existing stack ---
354
+ stacks_dir = get_stacks_dir()
355
+ stack_path = stacks_dir / f"{stack_name}.yaml"
356
+ litellm_path = get_data_home() / "litellm.yaml"
357
+
358
+ if stack_path.exists() and not force:
359
+ msg = (
360
+ f"Stack '{stack_name}' already exists at {stack_path}. "
361
+ f"Use --force to overwrite."
362
+ )
363
+ raise InitError(msg)
364
+
365
+ # --- Load catalog ---
366
+ try:
367
+ catalog = load_catalog()
368
+ except Exception as exc:
369
+ msg = f"Could not load model catalog: {exc}"
370
+ raise InitError(msg) from None
371
+
372
+ # --- Run recommendation ---
373
+ try:
374
+ recommendation = run_recommend(
375
+ catalog=catalog,
376
+ profile=profile,
377
+ intent=intent,
378
+ budget_pct=budget_pct,
379
+ )
380
+ except ScoringError as exc:
381
+ msg = f"Recommendation failed: {exc}"
382
+ raise InitError(msg) from None
383
+
384
+ if not recommendation.tiers:
385
+ msg = (
386
+ f"No models fit within the {recommendation.memory_budget_gb:.1f} GB budget. "
387
+ f"Try increasing memory-budget-pct in config."
388
+ )
389
+ raise InitError(msg)
390
+
391
+ # --- Apply --add/--remove customizations ---
392
+ tiers = list(recommendation.tiers)
393
+
394
+ if remove_tiers:
395
+ valid_tier_names = {t.tier for t in tiers}
396
+ for tier_name in remove_tiers:
397
+ if tier_name not in valid_tier_names:
398
+ valid = ", ".join(sorted(valid_tier_names))
399
+ msg = (
400
+ f"Cannot remove tier '{tier_name}': not in the current stack. "
401
+ f"Valid tiers: {valid}"
402
+ )
403
+ raise InitError(msg)
404
+ tiers = [t for t in tiers if t.tier not in set(remove_tiers)]
405
+
406
+ warnings: list[str] = []
407
+
408
+ if add_models:
409
+ for model_id in add_models:
410
+ entry = get_entry_by_id(catalog, model_id)
411
+ if entry is None:
412
+ msg = (
413
+ f"Unknown model '{model_id}'. "
414
+ f"Run 'mlx-stack models --catalog' to see available models."
415
+ )
416
+ raise InitError(msg)
417
+
418
+ # Check if model already assigned
419
+ assigned_ids = {t.model.entry.id for t in tiers}
420
+ if model_id in assigned_ids:
421
+ continue # Skip duplicates silently
422
+
423
+ # Create a tier name like 'added-<model_id>'
424
+ from mlx_stack.core.scoring import INTENT_WEIGHTS, TierAssignment, score_model
425
+
426
+ weights = INTENT_WEIGHTS.get(intent, INTENT_WEIGHTS["balanced"])
427
+ try:
428
+ scored = score_model(
429
+ entry, profile, weights, recommendation.memory_budget_gb,
430
+ )
431
+ except ScoringError as exc:
432
+ msg = f"Cannot add model '{model_id}': {exc}"
433
+ raise InitError(msg) from None
434
+
435
+ # Warn if exceeding budget (per spec: warn, not block)
436
+ total_memory = sum(t.model.memory_gb for t in tiers) + scored.memory_gb
437
+ if total_memory > recommendation.memory_budget_gb:
438
+ warnings.append(
439
+ f"Adding '{model_id}' exceeds memory budget "
440
+ f"({total_memory:.1f} GB > {recommendation.memory_budget_gb:.1f} GB)."
441
+ )
442
+
443
+ tier_name = f"added-{model_id}"
444
+ tiers.append(TierAssignment(
445
+ tier=tier_name,
446
+ model=scored,
447
+ quant="int4",
448
+ ))
449
+
450
+ if not tiers:
451
+ msg = "No tiers remaining after customization. Cannot generate stack."
452
+ raise InitError(msg)
453
+
454
+ # --- Allocate ports ---
455
+ ports = allocate_ports(len(tiers), litellm_port=litellm_port)
456
+
457
+ # --- Generate stack definition ---
458
+ cloud_fallback: dict[str, Any] | None = None
459
+ if openrouter_key:
460
+ cloud_fallback = {
461
+ "provider": "openrouter",
462
+ "models": ["openai/gpt-4o", "anthropic/claude-sonnet-4-20250514"],
463
+ }
464
+
465
+ stack = generate_stack_definition(
466
+ recommendation=_with_tiers(recommendation, tiers),
467
+ ports=ports,
468
+ catalog=catalog,
469
+ stack_name=stack_name,
470
+ cloud_fallback=cloud_fallback,
471
+ )
472
+
473
+ # --- Generate LiteLLM config ---
474
+ tier_entries = [
475
+ {"name": t["name"], "model": t["model"], "port": t["port"]}
476
+ for t in stack["tiers"]
477
+ ]
478
+ litellm_config = generate_litellm_config(
479
+ tiers=tier_entries,
480
+ litellm_port=litellm_port,
481
+ openrouter_key=openrouter_key,
482
+ )
483
+
484
+ # --- Write files ---
485
+ ensure_data_home()
486
+ stacks_dir.mkdir(parents=True, exist_ok=True)
487
+
488
+ stack_yaml = yaml.dump(stack, default_flow_style=False, sort_keys=False)
489
+ stack_path.write_text(stack_yaml, encoding="utf-8")
490
+
491
+ litellm_yaml = render_litellm_yaml(litellm_config)
492
+ litellm_path.write_text(litellm_yaml, encoding="utf-8")
493
+
494
+ # --- Detect missing models ---
495
+ missing_models = detect_missing_models(stack["tiers"])
496
+
497
+ # --- Compute total estimated memory for selected tiers ---
498
+ total_memory_gb = sum(t.model.memory_gb for t in tiers)
499
+
500
+ return {
501
+ "stack_path": stack_path,
502
+ "litellm_path": litellm_path,
503
+ "stack": stack,
504
+ "litellm_config": litellm_config,
505
+ "missing_models": missing_models,
506
+ "warnings": warnings,
507
+ "profile": profile,
508
+ "memory_budget_gb": recommendation.memory_budget_gb,
509
+ "total_memory_gb": total_memory_gb,
510
+ }
511
+
512
+
513
+ def _with_tiers(result: RecommendationResult, tiers: list[TierAssignment]) -> RecommendationResult:
514
+ """Create a new RecommendationResult with different tiers.
515
+
516
+ RecommendationResult is a frozen dataclass, so we create a new instance.
517
+ """
518
+ return RecommendationResult(
519
+ tiers=tiers,
520
+ all_scored=result.all_scored,
521
+ memory_budget_gb=result.memory_budget_gb,
522
+ intent=result.intent,
523
+ hardware_profile=result.hardware_profile,
524
+ )