admina-framework 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- admina/__init__.py +34 -0
- admina/cli/__init__.py +14 -0
- admina/cli/commands/__init__.py +14 -0
- admina/cli/main.py +1522 -0
- admina/cli/templates/admina.yaml.j2 +77 -0
- admina/cli/templates/docker-compose.yml.j2 +254 -0
- admina/cli/templates/env.j2 +10 -0
- admina/cli/templates/main.py.j2 +95 -0
- admina/cli/templates/plugin.py.j2 +145 -0
- admina/cli/templates/plugin_pyproject.toml.j2 +15 -0
- admina/cli/templates/plugin_readme.md.j2 +27 -0
- admina/cli/templates/plugin_test.py.j2 +48 -0
- admina/core/__init__.py +14 -0
- admina/core/config.py +497 -0
- admina/core/event_bus.py +112 -0
- admina/core/secrets.py +257 -0
- admina/core/types.py +146 -0
- admina/dashboard/__init__.py +8 -0
- admina/dashboard/static/heimdall.png +0 -0
- admina/dashboard/static/index.html +1045 -0
- admina/dashboard/static/vendor/alpinejs.min.js +5 -0
- admina/domains/__init__.py +14 -0
- admina/domains/agent_security/__init__.py +41 -0
- admina/domains/agent_security/firewall.py +634 -0
- admina/domains/agent_security/loop_breaker.py +176 -0
- admina/domains/ai_infra/__init__.py +79 -0
- admina/domains/ai_infra/llm_engine.py +477 -0
- admina/domains/ai_infra/rag.py +817 -0
- admina/domains/ai_infra/webui.py +292 -0
- admina/domains/compliance/__init__.py +109 -0
- admina/domains/compliance/cross_regulation.py +314 -0
- admina/domains/compliance/eu_ai_act.py +367 -0
- admina/domains/compliance/forensic.py +380 -0
- admina/domains/compliance/gdpr.py +331 -0
- admina/domains/compliance/nis2.py +258 -0
- admina/domains/compliance/oisg.py +658 -0
- admina/domains/compliance/otel.py +101 -0
- admina/domains/data_sovereignty/__init__.py +42 -0
- admina/domains/data_sovereignty/classification.py +102 -0
- admina/domains/data_sovereignty/pii.py +260 -0
- admina/domains/data_sovereignty/residency.py +121 -0
- admina/integrations/__init__.py +14 -0
- admina/integrations/_engines.py +63 -0
- admina/integrations/cheshirecat/__init__.py +13 -0
- admina/integrations/cheshirecat/admina-plugin/admina_governance.py +207 -0
- admina/integrations/crewai/__init__.py +13 -0
- admina/integrations/crewai/callbacks.py +347 -0
- admina/integrations/langchain/__init__.py +13 -0
- admina/integrations/langchain/callbacks.py +341 -0
- admina/integrations/n8n/__init__.py +14 -0
- admina/integrations/openclaw/__init__.py +14 -0
- admina/plugins/__init__.py +49 -0
- admina/plugins/base.py +633 -0
- admina/plugins/builtin/__init__.py +14 -0
- admina/plugins/builtin/adapters/__init__.py +14 -0
- admina/plugins/builtin/adapters/ollama.py +120 -0
- admina/plugins/builtin/adapters/openai.py +138 -0
- admina/plugins/builtin/alerts/__init__.py +14 -0
- admina/plugins/builtin/alerts/log.py +66 -0
- admina/plugins/builtin/alerts/webhook.py +102 -0
- admina/plugins/builtin/auth/__init__.py +14 -0
- admina/plugins/builtin/auth/apikey.py +138 -0
- admina/plugins/builtin/compliance/__init__.py +14 -0
- admina/plugins/builtin/compliance/eu_ai_act.py +202 -0
- admina/plugins/builtin/connectors/__init__.py +14 -0
- admina/plugins/builtin/connectors/chromadb.py +137 -0
- admina/plugins/builtin/connectors/filesystem.py +111 -0
- admina/plugins/builtin/forensic/__init__.py +14 -0
- admina/plugins/builtin/forensic/filesystem.py +163 -0
- admina/plugins/builtin/forensic/minio.py +180 -0
- admina/plugins/builtin/guards/__init__.py +0 -0
- admina/plugins/builtin/guards/guardrailsai_guard.py +172 -0
- admina/plugins/builtin/pii/__init__.py +14 -0
- admina/plugins/builtin/pii/spacy_regex.py +160 -0
- admina/plugins/builtin/transports/__init__.py +14 -0
- admina/plugins/builtin/transports/http_rest.py +97 -0
- admina/plugins/builtin/transports/mcp.py +173 -0
- admina/plugins/registry.py +356 -0
- admina/proxy/__init__.py +15 -0
- admina/proxy/api/__init__.py +17 -0
- admina/proxy/api/dashboard.py +925 -0
- admina/proxy/api/integration.py +153 -0
- admina/proxy/config.py +214 -0
- admina/proxy/engine_bridge.py +306 -0
- admina/proxy/governance.py +232 -0
- admina/proxy/main.py +1484 -0
- admina/proxy/multi_upstream.py +156 -0
- admina/proxy/state.py +97 -0
- admina/py.typed +0 -0
- admina/sdk/__init__.py +34 -0
- admina/sdk/_compat.py +43 -0
- admina/sdk/compliance_kit.py +359 -0
- admina/sdk/governed_agent.py +391 -0
- admina/sdk/governed_data.py +434 -0
- admina/sdk/governed_model.py +241 -0
- admina_framework-0.9.0.dist-info/METADATA +575 -0
- admina_framework-0.9.0.dist-info/RECORD +102 -0
- admina_framework-0.9.0.dist-info/WHEEL +5 -0
- admina_framework-0.9.0.dist-info/entry_points.txt +2 -0
- admina_framework-0.9.0.dist-info/licenses/LICENSE +191 -0
- admina_framework-0.9.0.dist-info/licenses/NOTICE +16 -0
- admina_framework-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Admina — LLM engine module.
|
|
16
|
+
|
|
17
|
+
GPU auto-detection, Ollama/vLLM container configuration, model management,
|
|
18
|
+
and hot model switching without downtime. All heavy operations (container
|
|
19
|
+
start, model pull) are expressed as *descriptions* — the actual Docker work
|
|
20
|
+
is done by the CLI ``admina dev`` command that renders the Jinja2
|
|
21
|
+
docker-compose template.
|
|
22
|
+
|
|
23
|
+
This module is pure Python with no runtime dependency on Docker or GPU
|
|
24
|
+
drivers — it only *inspects* the host and returns structured results that
|
|
25
|
+
other layers (CLI, SDK) consume.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import asyncio
|
|
31
|
+
import logging
|
|
32
|
+
import shutil
|
|
33
|
+
import subprocess
|
|
34
|
+
from dataclasses import dataclass, field
|
|
35
|
+
from enum import Enum
|
|
36
|
+
from typing import Any
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger("admina.ai_infra.llm_engine")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ── GPU detection ────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GPUVendor(str, Enum):
|
|
45
|
+
"""Supported GPU vendors."""
|
|
46
|
+
|
|
47
|
+
NVIDIA = "nvidia"
|
|
48
|
+
AMD = "amd"
|
|
49
|
+
NONE = "none"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class GPUInfo:
|
|
54
|
+
"""Detected GPU information."""
|
|
55
|
+
|
|
56
|
+
vendor: GPUVendor
|
|
57
|
+
device_count: int = 0
|
|
58
|
+
devices: list[dict[str, Any]] = field(default_factory=list)
|
|
59
|
+
driver_version: str = ""
|
|
60
|
+
vram_total_mb: int = 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _run_cmd(cmd: list[str], *, timeout: int = 10) -> str | None:
|
|
64
|
+
"""Run a command and return stdout, or *None* on failure."""
|
|
65
|
+
try:
|
|
66
|
+
result = subprocess.run(
|
|
67
|
+
cmd,
|
|
68
|
+
capture_output=True,
|
|
69
|
+
text=True,
|
|
70
|
+
timeout=timeout,
|
|
71
|
+
)
|
|
72
|
+
if result.returncode == 0:
|
|
73
|
+
return result.stdout.strip()
|
|
74
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
75
|
+
pass
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _detect_nvidia() -> GPUInfo | None:
|
|
80
|
+
"""Probe NVIDIA GPUs via ``nvidia-smi``."""
|
|
81
|
+
if shutil.which("nvidia-smi") is None:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
raw = _run_cmd(
|
|
85
|
+
[
|
|
86
|
+
"nvidia-smi",
|
|
87
|
+
"--query-gpu=index,name,memory.total,driver_version",
|
|
88
|
+
"--format=csv,noheader,nounits",
|
|
89
|
+
]
|
|
90
|
+
)
|
|
91
|
+
if raw is None:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
devices: list[dict[str, Any]] = []
|
|
95
|
+
total_vram = 0
|
|
96
|
+
driver = ""
|
|
97
|
+
for line in raw.splitlines():
|
|
98
|
+
parts = [p.strip() for p in line.split(",")]
|
|
99
|
+
if len(parts) < 4:
|
|
100
|
+
continue
|
|
101
|
+
mem_mb = int(parts[2])
|
|
102
|
+
devices.append(
|
|
103
|
+
{
|
|
104
|
+
"index": int(parts[0]),
|
|
105
|
+
"name": parts[1],
|
|
106
|
+
"vram_mb": mem_mb,
|
|
107
|
+
}
|
|
108
|
+
)
|
|
109
|
+
total_vram += mem_mb
|
|
110
|
+
driver = parts[3]
|
|
111
|
+
|
|
112
|
+
if not devices:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
return GPUInfo(
|
|
116
|
+
vendor=GPUVendor.NVIDIA,
|
|
117
|
+
device_count=len(devices),
|
|
118
|
+
devices=devices,
|
|
119
|
+
driver_version=driver,
|
|
120
|
+
vram_total_mb=total_vram,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _detect_amd() -> GPUInfo | None:
|
|
125
|
+
"""Probe AMD GPUs via ``rocm-smi``."""
|
|
126
|
+
if shutil.which("rocm-smi") is None:
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
raw = _run_cmd(["rocm-smi", "--showid", "--showmeminfo", "vram", "--csv"])
|
|
130
|
+
if raw is None:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
# Simple heuristic: count non-header lines for device count.
|
|
134
|
+
lines = [ln for ln in raw.splitlines() if ln and not ln.startswith("device")]
|
|
135
|
+
if not lines:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
return GPUInfo(
|
|
139
|
+
vendor=GPUVendor.AMD,
|
|
140
|
+
device_count=len(lines),
|
|
141
|
+
devices=[{"index": i} for i in range(len(lines))],
|
|
142
|
+
driver_version="",
|
|
143
|
+
vram_total_mb=0,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def detect_gpu() -> GPUInfo:
|
|
148
|
+
"""Auto-detect available GPU hardware.
|
|
149
|
+
|
|
150
|
+
Checks NVIDIA first (via ``nvidia-smi``), then AMD (via ``rocm-smi``).
|
|
151
|
+
Returns :pyattr:`GPUVendor.NONE` when neither is found.
|
|
152
|
+
"""
|
|
153
|
+
info = _detect_nvidia()
|
|
154
|
+
if info is not None:
|
|
155
|
+
logger.info(
|
|
156
|
+
"Detected %d NVIDIA GPU(s), %d MB VRAM total",
|
|
157
|
+
info.device_count,
|
|
158
|
+
info.vram_total_mb,
|
|
159
|
+
)
|
|
160
|
+
return info
|
|
161
|
+
|
|
162
|
+
info = _detect_amd()
|
|
163
|
+
if info is not None:
|
|
164
|
+
logger.info("Detected %d AMD GPU(s) via ROCm", info.device_count)
|
|
165
|
+
return info
|
|
166
|
+
|
|
167
|
+
logger.info("No GPU detected — LLM will run on CPU")
|
|
168
|
+
return GPUInfo(vendor=GPUVendor.NONE)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ── LLM backend configuration ───────────────────────────────
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class LLMBackend(str, Enum):
|
|
175
|
+
"""Supported LLM serving backends."""
|
|
176
|
+
|
|
177
|
+
OLLAMA = "ollama"
|
|
178
|
+
VLLM = "vllm"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@dataclass
|
|
182
|
+
class OllamaConfig:
|
|
183
|
+
"""Container configuration for Ollama."""
|
|
184
|
+
|
|
185
|
+
image: str = "ollama/ollama:latest"
|
|
186
|
+
container_name: str = "admina-ollama"
|
|
187
|
+
port: int = 11434
|
|
188
|
+
model: str = "llama3.1:8b"
|
|
189
|
+
gpu_vendor: GPUVendor = GPUVendor.NONE
|
|
190
|
+
vram_limit_mb: int = 0
|
|
191
|
+
environment: dict[str, str] = field(default_factory=dict)
|
|
192
|
+
|
|
193
|
+
def to_compose_dict(self) -> dict[str, Any]:
|
|
194
|
+
"""Return a docker-compose service fragment."""
|
|
195
|
+
svc: dict[str, Any] = {
|
|
196
|
+
"image": self.image,
|
|
197
|
+
"container_name": self.container_name,
|
|
198
|
+
"ports": [f"{self.port}:11434"],
|
|
199
|
+
"volumes": ["ollama-data:/root/.ollama"],
|
|
200
|
+
"healthcheck": {
|
|
201
|
+
"test": ["CMD", "curl", "-f", "http://localhost:11434/api/tags"],
|
|
202
|
+
"interval": "15s",
|
|
203
|
+
"timeout": "5s",
|
|
204
|
+
"retries": 5,
|
|
205
|
+
},
|
|
206
|
+
"networks": ["admina"],
|
|
207
|
+
"restart": "unless-stopped",
|
|
208
|
+
}
|
|
209
|
+
env = dict(self.environment)
|
|
210
|
+
if self.vram_limit_mb > 0:
|
|
211
|
+
env["OLLAMA_MAX_VRAM"] = str(self.vram_limit_mb)
|
|
212
|
+
if env:
|
|
213
|
+
svc["environment"] = [f"{k}={v}" for k, v in sorted(env.items())]
|
|
214
|
+
|
|
215
|
+
if self.gpu_vendor == GPUVendor.NVIDIA:
|
|
216
|
+
svc["deploy"] = {
|
|
217
|
+
"resources": {
|
|
218
|
+
"reservations": {
|
|
219
|
+
"devices": [
|
|
220
|
+
{
|
|
221
|
+
"driver": "nvidia",
|
|
222
|
+
"count": "all",
|
|
223
|
+
"capabilities": [["gpu"]],
|
|
224
|
+
}
|
|
225
|
+
],
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
}
|
|
229
|
+
elif self.gpu_vendor == GPUVendor.AMD:
|
|
230
|
+
svc["devices"] = ["/dev/kfd", "/dev/dri"]
|
|
231
|
+
|
|
232
|
+
return svc
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@dataclass
|
|
236
|
+
class VLLMConfig:
|
|
237
|
+
"""Container configuration for vLLM (multi-GPU)."""
|
|
238
|
+
|
|
239
|
+
image: str = "vllm/vllm-openai:latest"
|
|
240
|
+
container_name: str = "admina-vllm"
|
|
241
|
+
port: int = 8000
|
|
242
|
+
model: str = "meta-llama/Meta-Llama-3.1-8B"
|
|
243
|
+
tensor_parallel_size: int = 1
|
|
244
|
+
gpu_vendor: GPUVendor = GPUVendor.NONE
|
|
245
|
+
|
|
246
|
+
def to_compose_dict(self) -> dict[str, Any]:
|
|
247
|
+
"""Return a docker-compose service fragment."""
|
|
248
|
+
svc: dict[str, Any] = {
|
|
249
|
+
"image": self.image,
|
|
250
|
+
"container_name": self.container_name,
|
|
251
|
+
"ports": [f"{self.port}:8000"],
|
|
252
|
+
"command": [
|
|
253
|
+
"--model",
|
|
254
|
+
self.model,
|
|
255
|
+
"--tensor-parallel-size",
|
|
256
|
+
str(self.tensor_parallel_size),
|
|
257
|
+
],
|
|
258
|
+
"healthcheck": {
|
|
259
|
+
"test": ["CMD", "curl", "-f", "http://localhost:8000/health"],
|
|
260
|
+
"interval": "15s",
|
|
261
|
+
"timeout": "5s",
|
|
262
|
+
"retries": 5,
|
|
263
|
+
},
|
|
264
|
+
"networks": ["admina"],
|
|
265
|
+
"restart": "unless-stopped",
|
|
266
|
+
}
|
|
267
|
+
if self.gpu_vendor == GPUVendor.NVIDIA:
|
|
268
|
+
svc["deploy"] = {
|
|
269
|
+
"resources": {
|
|
270
|
+
"reservations": {
|
|
271
|
+
"devices": [
|
|
272
|
+
{
|
|
273
|
+
"driver": "nvidia",
|
|
274
|
+
"count": "all",
|
|
275
|
+
"capabilities": [["gpu"]],
|
|
276
|
+
}
|
|
277
|
+
],
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
}
|
|
281
|
+
return svc
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# ── LLM Engine ───────────────────────────────────────────────
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@dataclass
|
|
288
|
+
class ModelStatus:
|
|
289
|
+
"""Runtime status of a loaded model."""
|
|
290
|
+
|
|
291
|
+
model: str
|
|
292
|
+
backend: LLMBackend
|
|
293
|
+
loaded: bool = False
|
|
294
|
+
vram_used_mb: int = 0
|
|
295
|
+
error: str = ""
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@dataclass
|
|
299
|
+
class LLMEngine:
|
|
300
|
+
"""Manages LLM backend lifecycle and model switching.
|
|
301
|
+
|
|
302
|
+
Inspects the host GPU, selects the appropriate backend (Ollama for
|
|
303
|
+
single-GPU / CPU, vLLM for multi-GPU), and produces Docker Compose
|
|
304
|
+
configuration fragments.
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
backend: LLMBackend = LLMBackend.OLLAMA
|
|
308
|
+
model: str = "llama3.1:8b"
|
|
309
|
+
gpu_info: GPUInfo = field(default_factory=lambda: GPUInfo(vendor=GPUVendor.NONE))
|
|
310
|
+
vram_limit_mb: int = 0
|
|
311
|
+
_current_model: str = ""
|
|
312
|
+
|
|
313
|
+
# ── Factory ──────────────────────────────────────────────
|
|
314
|
+
|
|
315
|
+
@classmethod
|
|
316
|
+
def from_config(
|
|
317
|
+
cls,
|
|
318
|
+
*,
|
|
319
|
+
backend: str = "ollama",
|
|
320
|
+
model: str = "llama3.1:8b",
|
|
321
|
+
gpu_autodetect: bool = True,
|
|
322
|
+
vram_limit_mb: int = 0,
|
|
323
|
+
) -> LLMEngine:
|
|
324
|
+
"""Create an engine from admina.yaml values.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
backend: ``"ollama"`` or ``"vllm"``.
|
|
328
|
+
model: Default model to pull / serve.
|
|
329
|
+
gpu_autodetect: Run GPU probe on the host.
|
|
330
|
+
vram_limit_mb: Optional VRAM cap (0 = unlimited).
|
|
331
|
+
"""
|
|
332
|
+
gpu = detect_gpu() if gpu_autodetect else GPUInfo(vendor=GPUVendor.NONE)
|
|
333
|
+
resolved_backend = LLMBackend(backend)
|
|
334
|
+
|
|
335
|
+
# Auto-select vLLM when multiple NVIDIA GPUs are present.
|
|
336
|
+
if (
|
|
337
|
+
resolved_backend == LLMBackend.OLLAMA
|
|
338
|
+
and gpu.device_count > 1
|
|
339
|
+
and gpu.vendor == GPUVendor.NVIDIA
|
|
340
|
+
):
|
|
341
|
+
logger.info(
|
|
342
|
+
"Multiple NVIDIA GPUs detected (%d) — recommending vLLM",
|
|
343
|
+
gpu.device_count,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return cls(
|
|
347
|
+
backend=resolved_backend,
|
|
348
|
+
model=model,
|
|
349
|
+
gpu_info=gpu,
|
|
350
|
+
vram_limit_mb=vram_limit_mb,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# ── Compose generation ───────────────────────────────────
|
|
354
|
+
|
|
355
|
+
def compose_service(self, project_name: str = "admina") -> dict[str, Any]:
|
|
356
|
+
"""Return the docker-compose service dict for the configured backend.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
project_name: Used for container naming.
|
|
360
|
+
"""
|
|
361
|
+
if self.backend == LLMBackend.VLLM:
|
|
362
|
+
cfg = VLLMConfig(
|
|
363
|
+
container_name=f"{project_name}-vllm",
|
|
364
|
+
model=self.model,
|
|
365
|
+
tensor_parallel_size=max(1, self.gpu_info.device_count),
|
|
366
|
+
gpu_vendor=self.gpu_info.vendor,
|
|
367
|
+
)
|
|
368
|
+
return cfg.to_compose_dict()
|
|
369
|
+
|
|
370
|
+
cfg = OllamaConfig(
|
|
371
|
+
container_name=f"{project_name}-ollama",
|
|
372
|
+
model=self.model,
|
|
373
|
+
gpu_vendor=self.gpu_info.vendor,
|
|
374
|
+
vram_limit_mb=self.vram_limit_mb,
|
|
375
|
+
)
|
|
376
|
+
return cfg.to_compose_dict()
|
|
377
|
+
|
|
378
|
+
# ── Model management ─────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
async def pull_model(self, model: str | None = None) -> str:
|
|
381
|
+
"""Request model pull via Ollama CLI (non-blocking).
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
model: Model tag to pull. Defaults to ``self.model``.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
Output from the pull command.
|
|
388
|
+
"""
|
|
389
|
+
tag = model or self.model
|
|
390
|
+
logger.info("Pulling model %s", tag)
|
|
391
|
+
proc = await asyncio.create_subprocess_exec(
|
|
392
|
+
"ollama",
|
|
393
|
+
"pull",
|
|
394
|
+
tag,
|
|
395
|
+
stdout=asyncio.subprocess.PIPE,
|
|
396
|
+
stderr=asyncio.subprocess.PIPE,
|
|
397
|
+
)
|
|
398
|
+
stdout, stderr = await proc.communicate()
|
|
399
|
+
if proc.returncode != 0:
|
|
400
|
+
err = stderr.decode().strip()
|
|
401
|
+
logger.error("Model pull failed: %s", err)
|
|
402
|
+
return f"error: {err}"
|
|
403
|
+
return stdout.decode().strip()
|
|
404
|
+
|
|
405
|
+
def pull_model_sync(self, model: str | None = None) -> str:
|
|
406
|
+
"""Synchronous convenience wrapper for :meth:`pull_model`."""
|
|
407
|
+
return asyncio.get_event_loop().run_until_complete(self.pull_model(model))
|
|
408
|
+
|
|
409
|
+
async def switch_model(self, new_model: str) -> ModelStatus:
|
|
410
|
+
"""Hot-switch to a different model without container restart.
|
|
411
|
+
|
|
412
|
+
For Ollama this works by pulling the new model (Ollama loads it on
|
|
413
|
+
first request and unloads the previous one automatically). For vLLM
|
|
414
|
+
a container restart is required — this method returns a status
|
|
415
|
+
indicating that.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
new_model: The model tag to switch to.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
A :class:`ModelStatus` reflecting the new state.
|
|
422
|
+
"""
|
|
423
|
+
old = self._current_model or self.model
|
|
424
|
+
logger.info("Switching model %s → %s", old, new_model)
|
|
425
|
+
|
|
426
|
+
if self.backend == LLMBackend.VLLM:
|
|
427
|
+
return ModelStatus(
|
|
428
|
+
model=new_model,
|
|
429
|
+
backend=self.backend,
|
|
430
|
+
loaded=False,
|
|
431
|
+
error="vLLM requires container restart for model switch",
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
result = await self.pull_model(new_model)
|
|
435
|
+
if result.startswith("error:"):
|
|
436
|
+
return ModelStatus(
|
|
437
|
+
model=new_model,
|
|
438
|
+
backend=self.backend,
|
|
439
|
+
loaded=False,
|
|
440
|
+
error=result,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
self._current_model = new_model
|
|
444
|
+
self.model = new_model
|
|
445
|
+
return ModelStatus(
|
|
446
|
+
model=new_model,
|
|
447
|
+
backend=self.backend,
|
|
448
|
+
loaded=True,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
async def switch_model_sync(self, new_model: str) -> ModelStatus:
|
|
452
|
+
"""Synchronous convenience wrapper for :meth:`switch_model`."""
|
|
453
|
+
return await self.switch_model(new_model)
|
|
454
|
+
|
|
455
|
+
# ── Status ───────────────────────────────────────────────
|
|
456
|
+
|
|
457
|
+
def status(self) -> ModelStatus:
|
|
458
|
+
"""Return current engine status."""
|
|
459
|
+
return ModelStatus(
|
|
460
|
+
model=self._current_model or self.model,
|
|
461
|
+
backend=self.backend,
|
|
462
|
+
loaded=bool(self._current_model),
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
def summary(self) -> dict[str, Any]:
|
|
466
|
+
"""Return a JSON-serialisable summary of the engine config."""
|
|
467
|
+
return {
|
|
468
|
+
"backend": self.backend.value,
|
|
469
|
+
"model": self.model,
|
|
470
|
+
"gpu": {
|
|
471
|
+
"vendor": self.gpu_info.vendor.value,
|
|
472
|
+
"device_count": self.gpu_info.device_count,
|
|
473
|
+
"vram_total_mb": self.gpu_info.vram_total_mb,
|
|
474
|
+
"driver_version": self.gpu_info.driver_version,
|
|
475
|
+
},
|
|
476
|
+
"vram_limit_mb": self.vram_limit_mb,
|
|
477
|
+
}
|