mlx-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlx_stack/__init__.py +5 -0
- mlx_stack/_version.py +24 -0
- mlx_stack/cli/__init__.py +5 -0
- mlx_stack/cli/bench.py +221 -0
- mlx_stack/cli/config.py +166 -0
- mlx_stack/cli/down.py +109 -0
- mlx_stack/cli/init.py +180 -0
- mlx_stack/cli/install.py +165 -0
- mlx_stack/cli/logs.py +234 -0
- mlx_stack/cli/main.py +187 -0
- mlx_stack/cli/models.py +304 -0
- mlx_stack/cli/profile.py +65 -0
- mlx_stack/cli/pull.py +134 -0
- mlx_stack/cli/recommend.py +397 -0
- mlx_stack/cli/status.py +111 -0
- mlx_stack/cli/up.py +163 -0
- mlx_stack/cli/watch.py +252 -0
- mlx_stack/core/__init__.py +1 -0
- mlx_stack/core/benchmark.py +1182 -0
- mlx_stack/core/catalog.py +560 -0
- mlx_stack/core/config.py +471 -0
- mlx_stack/core/deps.py +323 -0
- mlx_stack/core/hardware.py +304 -0
- mlx_stack/core/launchd.py +531 -0
- mlx_stack/core/litellm_gen.py +188 -0
- mlx_stack/core/log_rotation.py +231 -0
- mlx_stack/core/log_viewer.py +386 -0
- mlx_stack/core/models.py +639 -0
- mlx_stack/core/paths.py +79 -0
- mlx_stack/core/process.py +887 -0
- mlx_stack/core/pull.py +815 -0
- mlx_stack/core/scoring.py +611 -0
- mlx_stack/core/stack_down.py +317 -0
- mlx_stack/core/stack_init.py +524 -0
- mlx_stack/core/stack_status.py +229 -0
- mlx_stack/core/stack_up.py +856 -0
- mlx_stack/core/watchdog.py +744 -0
- mlx_stack/data/__init__.py +1 -0
- mlx_stack/data/catalog/__init__.py +1 -0
- mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
- mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
- mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
- mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
- mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
- mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
- mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
- mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
- mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
- mlx_stack/py.typed +1 -0
- mlx_stack/utils/__init__.py +1 -0
- mlx_stack-0.1.0.dist-info/METADATA +397 -0
- mlx_stack-0.1.0.dist-info/RECORD +61 -0
- mlx_stack-0.1.0.dist-info/WHEEL +4 -0
- mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
- mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Stack status logic for mlx-stack.
|
|
2
|
+
|
|
3
|
+
Orchestrates health-checking all managed services and producing a
|
|
4
|
+
unified status report. Read-only: does not modify any files, clean up
|
|
5
|
+
PID files, restart services, or acquire the lockfile. Can run
|
|
6
|
+
concurrently with ``up`` or ``down``.
|
|
7
|
+
|
|
8
|
+
Implements 5-state reporting per service:
|
|
9
|
+
- healthy: PID alive and HTTP 200 within 2s
|
|
10
|
+
- degraded: PID alive and HTTP 200 but response time > 2s and <= 5s
|
|
11
|
+
- down: PID alive but no HTTP response within 5s
|
|
12
|
+
- crashed: PID file exists but process is dead
|
|
13
|
+
- stopped: No PID file
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from mlx_stack.core.paths import get_stacks_dir
|
|
22
|
+
from mlx_stack.core.process import (
|
|
23
|
+
format_uptime,
|
|
24
|
+
get_service_status,
|
|
25
|
+
)
|
|
26
|
+
from mlx_stack.core.stack_up import LITELLM_HEALTH_PATH, LITELLM_SERVICE_NAME
|
|
27
|
+
|
|
28
|
+
# --------------------------------------------------------------------------- #
|
|
29
|
+
# Constants
|
|
30
|
+
# --------------------------------------------------------------------------- #
|
|
31
|
+
|
|
32
|
+
# Health check path for vllm-mlx model servers
|
|
33
|
+
VLLM_HEALTH_PATH = "/v1/models"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# --------------------------------------------------------------------------- #
|
|
37
|
+
# Data classes
|
|
38
|
+
# --------------------------------------------------------------------------- #
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class ServiceStatus:
|
|
43
|
+
"""Status of a single managed service."""
|
|
44
|
+
|
|
45
|
+
tier: str
|
|
46
|
+
model: str
|
|
47
|
+
port: int
|
|
48
|
+
status: str # "healthy", "degraded", "down", "crashed", "stopped"
|
|
49
|
+
uptime: float | None # seconds, None for stopped/crashed
|
|
50
|
+
uptime_display: str # human-readable string or "-"
|
|
51
|
+
response_time: float | None # seconds, None if no HTTP response
|
|
52
|
+
pid: int | None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class StatusResult:
|
|
57
|
+
"""Result of the status command execution."""
|
|
58
|
+
|
|
59
|
+
services: list[ServiceStatus] = field(default_factory=list)
|
|
60
|
+
no_stack: bool = False
|
|
61
|
+
message: str | None = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# --------------------------------------------------------------------------- #
|
|
65
|
+
# Stack definition reading (read-only, no exceptions propagated)
|
|
66
|
+
# --------------------------------------------------------------------------- #
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _load_stack_for_status(stack_name: str = "default") -> dict[str, Any] | None:
|
|
70
|
+
"""Load a stack definition for status reporting.
|
|
71
|
+
|
|
72
|
+
Unlike ``stack_up.load_stack_definition``, this never raises
|
|
73
|
+
exceptions — it returns None when the stack cannot be loaded,
|
|
74
|
+
allowing the status command to report "no stack configured".
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
stack_name: Name of the stack to load.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The parsed stack definition dict, or None if unavailable.
|
|
81
|
+
"""
|
|
82
|
+
import yaml
|
|
83
|
+
|
|
84
|
+
stack_path = get_stacks_dir() / f"{stack_name}.yaml"
|
|
85
|
+
if not stack_path.exists():
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
content = stack_path.read_text(encoding="utf-8")
|
|
90
|
+
stack = yaml.safe_load(content)
|
|
91
|
+
if isinstance(stack, dict) and isinstance(stack.get("tiers"), list):
|
|
92
|
+
return stack
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_litellm_port() -> int:
|
|
100
|
+
"""Read the configured LiteLLM port.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The configured port, or 4000 as default.
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
from mlx_stack.core.config import get_value
|
|
107
|
+
|
|
108
|
+
port = get_value("litellm-port")
|
|
109
|
+
if isinstance(port, int):
|
|
110
|
+
return port
|
|
111
|
+
return int(port)
|
|
112
|
+
except Exception:
|
|
113
|
+
return 4000
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# --------------------------------------------------------------------------- #
|
|
117
|
+
# Main status orchestration
|
|
118
|
+
# --------------------------------------------------------------------------- #
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def run_status(stack_name: str = "default") -> StatusResult:
|
|
122
|
+
"""Execute the full status check flow.
|
|
123
|
+
|
|
124
|
+
1. Load the stack definition (read-only).
|
|
125
|
+
2. For each tier, check PID file and HTTP health.
|
|
126
|
+
3. Check LiteLLM service.
|
|
127
|
+
4. Return a StatusResult with all service statuses.
|
|
128
|
+
|
|
129
|
+
This function is entirely read-only: it does not modify PID files,
|
|
130
|
+
restart services, acquire the lockfile, or write any files.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
stack_name: Stack definition name.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
A StatusResult with the outcome.
|
|
137
|
+
"""
|
|
138
|
+
result = StatusResult()
|
|
139
|
+
|
|
140
|
+
# --- Load stack definition ---
|
|
141
|
+
stack = _load_stack_for_status(stack_name)
|
|
142
|
+
|
|
143
|
+
if stack is None:
|
|
144
|
+
result.no_stack = True
|
|
145
|
+
result.message = (
|
|
146
|
+
"No stack configured — run 'mlx-stack init' to create a stack configuration."
|
|
147
|
+
)
|
|
148
|
+
return result
|
|
149
|
+
|
|
150
|
+
tiers = stack.get("tiers", [])
|
|
151
|
+
if not tiers:
|
|
152
|
+
result.no_stack = True
|
|
153
|
+
result.message = (
|
|
154
|
+
"No stack configured — run 'mlx-stack init' to create a stack configuration."
|
|
155
|
+
)
|
|
156
|
+
return result
|
|
157
|
+
|
|
158
|
+
# --- Check each tier ---
|
|
159
|
+
for tier in tiers:
|
|
160
|
+
tier_name = tier.get("name", "unknown")
|
|
161
|
+
model = tier.get("model", "unknown")
|
|
162
|
+
port = tier.get("port", 0)
|
|
163
|
+
|
|
164
|
+
svc_status = get_service_status(
|
|
165
|
+
service_name=tier_name,
|
|
166
|
+
port=port,
|
|
167
|
+
health_path=VLLM_HEALTH_PATH,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
result.services.append(ServiceStatus(
|
|
171
|
+
tier=tier_name,
|
|
172
|
+
model=model,
|
|
173
|
+
port=port,
|
|
174
|
+
status=svc_status["status"],
|
|
175
|
+
uptime=svc_status["uptime"],
|
|
176
|
+
uptime_display=format_uptime(svc_status["uptime"]),
|
|
177
|
+
response_time=svc_status["response_time"],
|
|
178
|
+
pid=svc_status["pid"],
|
|
179
|
+
))
|
|
180
|
+
|
|
181
|
+
# --- Check LiteLLM ---
|
|
182
|
+
litellm_port = _get_litellm_port()
|
|
183
|
+
litellm_status = get_service_status(
|
|
184
|
+
service_name=LITELLM_SERVICE_NAME,
|
|
185
|
+
port=litellm_port,
|
|
186
|
+
health_path=LITELLM_HEALTH_PATH,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
result.services.append(ServiceStatus(
|
|
190
|
+
tier="litellm",
|
|
191
|
+
model="proxy",
|
|
192
|
+
port=litellm_port,
|
|
193
|
+
status=litellm_status["status"],
|
|
194
|
+
uptime=litellm_status["uptime"],
|
|
195
|
+
uptime_display=format_uptime(litellm_status["uptime"]),
|
|
196
|
+
response_time=litellm_status["response_time"],
|
|
197
|
+
pid=litellm_status["pid"],
|
|
198
|
+
))
|
|
199
|
+
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def status_to_dict(result: StatusResult) -> dict[str, Any]:
|
|
204
|
+
"""Convert a StatusResult to a JSON-serialisable dict.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
result: The StatusResult to convert.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
A dict suitable for ``json.dumps``.
|
|
211
|
+
"""
|
|
212
|
+
services_list: list[dict[str, Any]] = []
|
|
213
|
+
for svc in result.services:
|
|
214
|
+
services_list.append({
|
|
215
|
+
"tier": svc.tier,
|
|
216
|
+
"model": svc.model,
|
|
217
|
+
"port": svc.port,
|
|
218
|
+
"status": svc.status,
|
|
219
|
+
"uptime": svc.uptime,
|
|
220
|
+
"uptime_display": svc.uptime_display,
|
|
221
|
+
"pid": svc.pid,
|
|
222
|
+
"response_time": svc.response_time,
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
"services": services_list,
|
|
227
|
+
"no_stack": result.no_stack,
|
|
228
|
+
"message": result.message,
|
|
229
|
+
}
|