mlx-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlx_stack/__init__.py +5 -0
- mlx_stack/_version.py +24 -0
- mlx_stack/cli/__init__.py +5 -0
- mlx_stack/cli/bench.py +221 -0
- mlx_stack/cli/config.py +166 -0
- mlx_stack/cli/down.py +109 -0
- mlx_stack/cli/init.py +180 -0
- mlx_stack/cli/install.py +165 -0
- mlx_stack/cli/logs.py +234 -0
- mlx_stack/cli/main.py +187 -0
- mlx_stack/cli/models.py +304 -0
- mlx_stack/cli/profile.py +65 -0
- mlx_stack/cli/pull.py +134 -0
- mlx_stack/cli/recommend.py +397 -0
- mlx_stack/cli/status.py +111 -0
- mlx_stack/cli/up.py +163 -0
- mlx_stack/cli/watch.py +252 -0
- mlx_stack/core/__init__.py +1 -0
- mlx_stack/core/benchmark.py +1182 -0
- mlx_stack/core/catalog.py +560 -0
- mlx_stack/core/config.py +471 -0
- mlx_stack/core/deps.py +323 -0
- mlx_stack/core/hardware.py +304 -0
- mlx_stack/core/launchd.py +531 -0
- mlx_stack/core/litellm_gen.py +188 -0
- mlx_stack/core/log_rotation.py +231 -0
- mlx_stack/core/log_viewer.py +386 -0
- mlx_stack/core/models.py +639 -0
- mlx_stack/core/paths.py +79 -0
- mlx_stack/core/process.py +887 -0
- mlx_stack/core/pull.py +815 -0
- mlx_stack/core/scoring.py +611 -0
- mlx_stack/core/stack_down.py +317 -0
- mlx_stack/core/stack_init.py +524 -0
- mlx_stack/core/stack_status.py +229 -0
- mlx_stack/core/stack_up.py +856 -0
- mlx_stack/core/watchdog.py +744 -0
- mlx_stack/data/__init__.py +1 -0
- mlx_stack/data/catalog/__init__.py +1 -0
- mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
- mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
- mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
- mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
- mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
- mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
- mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
- mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
- mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
- mlx_stack/py.typed +1 -0
- mlx_stack/utils/__init__.py +1 -0
- mlx_stack-0.1.0.dist-info/METADATA +397 -0
- mlx_stack-0.1.0.dist-info/RECORD +61 -0
- mlx_stack-0.1.0.dist-info/WHEEL +4 -0
- mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
- mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
"""Stack shutdown logic for mlx-stack.
|
|
2
|
+
|
|
3
|
+
Orchestrates stopping all managed services: terminates processes in
|
|
4
|
+
correct order (LiteLLM first, then model servers in reverse startup
|
|
5
|
+
order), SIGTERM with 10s grace period then SIGKILL, cleans up PID
|
|
6
|
+
files, acquires lockfile during operation. Supports --tier for
|
|
7
|
+
selective stop. Handles stale/corrupt PID files gracefully.
|
|
8
|
+
Reports 'Nothing to stop' when idle.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from mlx_stack.core.catalog import get_entry_by_id, load_catalog
|
|
17
|
+
from mlx_stack.core.process import (
|
|
18
|
+
LockError,
|
|
19
|
+
ProcessError,
|
|
20
|
+
acquire_lock,
|
|
21
|
+
is_process_alive,
|
|
22
|
+
list_pid_files,
|
|
23
|
+
read_pid_file,
|
|
24
|
+
remove_pid_file,
|
|
25
|
+
stop_service,
|
|
26
|
+
)
|
|
27
|
+
from mlx_stack.core.stack_up import (
|
|
28
|
+
LITELLM_SERVICE_NAME,
|
|
29
|
+
load_stack_definition,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# --------------------------------------------------------------------------- #
|
|
33
|
+
# Exceptions
|
|
34
|
+
# --------------------------------------------------------------------------- #
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DownError(Exception):
|
|
38
|
+
"""Raised when the down command encounters a fatal error."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# --------------------------------------------------------------------------- #
|
|
42
|
+
# Data classes
|
|
43
|
+
# --------------------------------------------------------------------------- #
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ServiceStopResult:
|
|
48
|
+
"""Result of stopping a single service."""
|
|
49
|
+
|
|
50
|
+
name: str
|
|
51
|
+
pid: int | None
|
|
52
|
+
status: str # "stopped", "stale", "corrupt", "not-running"
|
|
53
|
+
graceful: bool | None = None # None if not applicable
|
|
54
|
+
error: str | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class DownResult:
|
|
59
|
+
"""Result of the down command execution."""
|
|
60
|
+
|
|
61
|
+
services: list[ServiceStopResult] = field(default_factory=list)
|
|
62
|
+
nothing_to_stop: bool = False
|
|
63
|
+
warnings: list[str] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# --------------------------------------------------------------------------- #
|
|
67
|
+
# Tier ordering
|
|
68
|
+
# --------------------------------------------------------------------------- #
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_tier_names_from_stack(stack_name: str = "default") -> list[str]:
|
|
72
|
+
"""Get tier names from the stack definition in startup order.
|
|
73
|
+
|
|
74
|
+
Returns tier names sorted by model size descending (largest first),
|
|
75
|
+
matching the startup order used by ``mlx-stack up``.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
stack_name: Stack definition name.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
List of tier names in startup order (largest model first).
|
|
82
|
+
Empty list if the stack cannot be loaded.
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
stack = load_stack_definition(stack_name)
|
|
86
|
+
except Exception:
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
tiers = stack.get("tiers", [])
|
|
90
|
+
|
|
91
|
+
# Try to sort by params_b from catalog (same as up command)
|
|
92
|
+
try:
|
|
93
|
+
catalog = load_catalog()
|
|
94
|
+
except Exception:
|
|
95
|
+
catalog = None
|
|
96
|
+
|
|
97
|
+
if catalog is not None:
|
|
98
|
+
def sort_key(tier: dict[str, Any]) -> tuple[float, str]:
|
|
99
|
+
model_id = tier.get("model", "")
|
|
100
|
+
entry = get_entry_by_id(catalog, model_id)
|
|
101
|
+
params_b = entry.params_b if entry else 0.0
|
|
102
|
+
return (-params_b, tier.get("name", ""))
|
|
103
|
+
|
|
104
|
+
tiers = sorted(tiers, key=sort_key)
|
|
105
|
+
|
|
106
|
+
return [t["name"] for t in tiers]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _get_valid_tier_names(stack_name: str = "default") -> list[str]:
|
|
110
|
+
"""Get valid tier names from the stack definition.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
stack_name: Stack definition name.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of tier names (unsorted). Empty if stack can't be loaded.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
stack = load_stack_definition(stack_name)
|
|
120
|
+
except Exception:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
tiers = stack.get("tiers", [])
|
|
124
|
+
return [t["name"] for t in tiers]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# --------------------------------------------------------------------------- #
|
|
128
|
+
# Single service shutdown with stale/corrupt handling
|
|
129
|
+
# --------------------------------------------------------------------------- #
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _stop_single_service(service_name: str) -> ServiceStopResult:
|
|
133
|
+
"""Stop a single service, handling stale and corrupt PID files.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
service_name: Name of the service to stop.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
A ServiceStopResult describing the outcome.
|
|
140
|
+
"""
|
|
141
|
+
# Try to read the PID file
|
|
142
|
+
try:
|
|
143
|
+
pid = read_pid_file(service_name)
|
|
144
|
+
except ProcessError:
|
|
145
|
+
# Corrupt PID file — remove and report
|
|
146
|
+
remove_pid_file(service_name)
|
|
147
|
+
return ServiceStopResult(
|
|
148
|
+
name=service_name,
|
|
149
|
+
pid=None,
|
|
150
|
+
status="corrupt",
|
|
151
|
+
error="PID file contained non-numeric content; removed.",
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if pid is None:
|
|
155
|
+
# No PID file exists
|
|
156
|
+
return ServiceStopResult(
|
|
157
|
+
name=service_name,
|
|
158
|
+
pid=None,
|
|
159
|
+
status="not-running",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if not is_process_alive(pid):
|
|
163
|
+
# Stale PID — process already dead
|
|
164
|
+
remove_pid_file(service_name)
|
|
165
|
+
return ServiceStopResult(
|
|
166
|
+
name=service_name,
|
|
167
|
+
pid=pid,
|
|
168
|
+
status="stale",
|
|
169
|
+
error=f"Process {pid} already dead; cleaned up stale PID file.",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Process is alive — stop it
|
|
173
|
+
shutdown = stop_service(service_name)
|
|
174
|
+
|
|
175
|
+
if shutdown is not None:
|
|
176
|
+
return ServiceStopResult(
|
|
177
|
+
name=service_name,
|
|
178
|
+
pid=shutdown.pid,
|
|
179
|
+
status="stopped",
|
|
180
|
+
graceful=shutdown.graceful,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Shouldn't reach here, but handle gracefully
|
|
184
|
+
return ServiceStopResult(
|
|
185
|
+
name=service_name,
|
|
186
|
+
pid=pid,
|
|
187
|
+
status="stopped",
|
|
188
|
+
graceful=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# --------------------------------------------------------------------------- #
|
|
193
|
+
# Main shutdown orchestration
|
|
194
|
+
# --------------------------------------------------------------------------- #
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def run_down(
|
|
198
|
+
tier_filter: str | None = None,
|
|
199
|
+
stack_name: str = "default",
|
|
200
|
+
) -> DownResult:
|
|
201
|
+
"""Execute the full stack shutdown flow.
|
|
202
|
+
|
|
203
|
+
1. Acquire lockfile.
|
|
204
|
+
2. Enumerate PID files to determine what's running.
|
|
205
|
+
3. If --tier specified, stop only that tier.
|
|
206
|
+
4. Otherwise stop LiteLLM first, then model servers in reverse
|
|
207
|
+
startup order (smallest model first = reverse of largest-first).
|
|
208
|
+
5. Clean up PID files.
|
|
209
|
+
6. Release lockfile.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
tier_filter: If set, stop only this tier.
|
|
213
|
+
stack_name: Stack definition name.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
A DownResult with the outcome.
|
|
217
|
+
|
|
218
|
+
Raises:
|
|
219
|
+
DownError: On fatal errors (invalid tier filter).
|
|
220
|
+
LockError: If the lockfile is held by another process.
|
|
221
|
+
"""
|
|
222
|
+
result = DownResult()
|
|
223
|
+
|
|
224
|
+
# --- Validate --tier filter ---
|
|
225
|
+
if tier_filter is not None:
|
|
226
|
+
valid_tiers = _get_valid_tier_names(stack_name)
|
|
227
|
+
if valid_tiers and tier_filter not in valid_tiers:
|
|
228
|
+
valid_list = ", ".join(sorted(valid_tiers))
|
|
229
|
+
msg = (
|
|
230
|
+
f"Unknown tier '{tier_filter}'. "
|
|
231
|
+
f"Valid tiers: {valid_list}"
|
|
232
|
+
)
|
|
233
|
+
raise DownError(msg)
|
|
234
|
+
|
|
235
|
+
# --- Check if anything is running ---
|
|
236
|
+
pid_files = list_pid_files()
|
|
237
|
+
if not pid_files:
|
|
238
|
+
result.nothing_to_stop = True
|
|
239
|
+
return result
|
|
240
|
+
|
|
241
|
+
# --- Acquire lockfile and shut down ---
|
|
242
|
+
try:
|
|
243
|
+
with acquire_lock():
|
|
244
|
+
return _run_shutdown(
|
|
245
|
+
tier_filter=tier_filter,
|
|
246
|
+
stack_name=stack_name,
|
|
247
|
+
result=result,
|
|
248
|
+
)
|
|
249
|
+
except LockError:
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _run_shutdown(
|
|
254
|
+
tier_filter: str | None,
|
|
255
|
+
stack_name: str,
|
|
256
|
+
result: DownResult,
|
|
257
|
+
) -> DownResult:
|
|
258
|
+
"""Execute the actual shutdown sequence under the lock.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
tier_filter: If set, stop only this tier.
|
|
262
|
+
stack_name: Stack definition name.
|
|
263
|
+
result: The DownResult to populate.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The populated DownResult.
|
|
267
|
+
"""
|
|
268
|
+
pid_files = list_pid_files()
|
|
269
|
+
|
|
270
|
+
if not pid_files:
|
|
271
|
+
result.nothing_to_stop = True
|
|
272
|
+
return result
|
|
273
|
+
|
|
274
|
+
# --- Selective tier stop ---
|
|
275
|
+
if tier_filter is not None:
|
|
276
|
+
if tier_filter in pid_files:
|
|
277
|
+
svc_result = _stop_single_service(tier_filter)
|
|
278
|
+
result.services.append(svc_result)
|
|
279
|
+
else:
|
|
280
|
+
result.services.append(ServiceStopResult(
|
|
281
|
+
name=tier_filter,
|
|
282
|
+
pid=None,
|
|
283
|
+
status="not-running",
|
|
284
|
+
))
|
|
285
|
+
return result
|
|
286
|
+
|
|
287
|
+
# --- Full shutdown: determine order ---
|
|
288
|
+
# Get startup order from stack definition
|
|
289
|
+
startup_order = _get_tier_names_from_stack(stack_name)
|
|
290
|
+
|
|
291
|
+
# Reverse startup order for shutdown of model servers
|
|
292
|
+
# (smallest first, since startup is largest first)
|
|
293
|
+
shutdown_order_tiers = list(reversed(startup_order))
|
|
294
|
+
|
|
295
|
+
# Collect all PID file service names
|
|
296
|
+
all_services = set(pid_files.keys())
|
|
297
|
+
|
|
298
|
+
# Step 1: Stop LiteLLM first (if running)
|
|
299
|
+
if LITELLM_SERVICE_NAME in all_services:
|
|
300
|
+
svc_result = _stop_single_service(LITELLM_SERVICE_NAME)
|
|
301
|
+
result.services.append(svc_result)
|
|
302
|
+
all_services.discard(LITELLM_SERVICE_NAME)
|
|
303
|
+
|
|
304
|
+
# Step 2: Stop model servers in reverse startup order
|
|
305
|
+
for tier_name in shutdown_order_tiers:
|
|
306
|
+
if tier_name in all_services:
|
|
307
|
+
svc_result = _stop_single_service(tier_name)
|
|
308
|
+
result.services.append(svc_result)
|
|
309
|
+
all_services.discard(tier_name)
|
|
310
|
+
|
|
311
|
+
# Step 3: Stop any remaining services not in the stack definition
|
|
312
|
+
# (orphaned PID files from previous stacks, etc.)
|
|
313
|
+
for service_name in sorted(all_services):
|
|
314
|
+
svc_result = _stop_single_service(service_name)
|
|
315
|
+
result.services.append(svc_result)
|
|
316
|
+
|
|
317
|
+
return result
|