llama-tui 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_tui-0.1.0/PKG-INFO +18 -0
- llama_tui-0.1.0/README.md +10 -0
- llama_tui-0.1.0/pyproject.toml +16 -0
- llama_tui-0.1.0/src/llama_tui/__init__.py +3 -0
- llama_tui-0.1.0/src/llama_tui/cli.py +159 -0
- llama_tui-0.1.0/src/llama_tui/config.py +44 -0
- llama_tui-0.1.0/src/llama_tui/download.py +63 -0
- llama_tui-0.1.0/src/llama_tui/models.py +52 -0
- llama_tui-0.1.0/src/llama_tui/rpc.py +14 -0
- llama_tui-0.1.0/src/llama_tui/server.py +108 -0
- llama_tui-0.1.0/src/llama_tui/swap.py +47 -0
llama_tui-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: llama-tui
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight TUI manager for llama.cpp models on Raspberry Pi
|
|
5
|
+
Requires-Dist: click-extra>=7.12
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
src/
|
|
10
|
+
└── llama_tui/
|
|
11
|
+
├── __init__.py # main() entry for the console script
|
|
12
|
+
├── cli.py # Click-extra CLI group & commands
|
|
13
|
+
├── config.py # Slotted dataclasses (server, system, RPC)
|
|
14
|
+
├── models.py # Model catalog (slotted, verified)
|
|
15
|
+
├── download.py # hf download wrapper
|
|
16
|
+
├── server.py # llama-server lifecycle manager
|
|
17
|
+
├── swap.py # swap, governor, hugepages helpers
|
|
18
|
+
└── rpc.py # o7p3 RPC connectivity check
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
src/
|
|
2
|
+
└── llama_tui/
|
|
3
|
+
├── __init__.py # main() entry for the console script
|
|
4
|
+
├── cli.py # Click-extra CLI group & commands
|
|
5
|
+
├── config.py # Slotted dataclasses (server, system, RPC)
|
|
6
|
+
├── models.py # Model catalog (slotted, verified)
|
|
7
|
+
├── download.py # hf download wrapper
|
|
8
|
+
├── server.py # llama-server lifecycle manager
|
|
9
|
+
├── swap.py # swap, governor, hugepages helpers
|
|
10
|
+
└── rpc.py # o7p3 RPC connectivity check
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "llama-tui"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Lightweight TUI manager for llama.cpp models on Raspberry Pi"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.14"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"click-extra>=7.12",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.scripts]
|
|
12
|
+
llama-tui = "llama_tui:main"
|
|
13
|
+
|
|
14
|
+
[build-system]
|
|
15
|
+
requires = ["uv_build>=0.11.16,<0.12.0"]
|
|
16
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import click_extra as click
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from .config import Config, DEFAULT_CONFIG
|
|
5
|
+
from .models import registry, ModelRole, MODEL_CATALOG
|
|
6
|
+
from .download import DownloadManager
|
|
7
|
+
from .server import ServerManager
|
|
8
|
+
from .swap import swap_off, swap_on, swap_status, set_governor, enable_hugepages
|
|
9
|
+
from .rpc import rpc_status
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.group()
|
|
13
|
+
@click.version_option(package_name="llama-tui")
|
|
14
|
+
def main() -> None:
|
|
15
|
+
"""🦙 Llama TUI — Lightweight model manager for Raspberry Pi LLM clusters."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@main.command()
|
|
19
|
+
@click.option("--force", is_flag=True, help="Force re-download even if cached")
|
|
20
|
+
@click.option("--workers", default=4, help="Parallel download workers")
|
|
21
|
+
@click.option("--role", "roles", multiple=True,
|
|
22
|
+
type=click.Choice([r.name.lower() for r in ModelRole], case_sensitive=False),
|
|
23
|
+
help="Filter by model role (repeatable)")
|
|
24
|
+
def download(force: bool, workers: int, roles: tuple[str, ...]) -> None:
|
|
25
|
+
"""Download all models into the HuggingFace cache."""
|
|
26
|
+
click.echo("📥 Downloading models...")
|
|
27
|
+
mgr = DownloadManager(force=force, max_workers=workers)
|
|
28
|
+
role_set = [ModelRole[r.upper()] for r in roles] if roles else None
|
|
29
|
+
results = mgr.download_all(roles=role_set)
|
|
30
|
+
|
|
31
|
+
ok = sum(1 for r in results if r.success)
|
|
32
|
+
fail = len(results) - ok
|
|
33
|
+
if fail == 0:
|
|
34
|
+
click.secho(f"✅ All {ok} models downloaded successfully.", fg="green")
|
|
35
|
+
else:
|
|
36
|
+
click.secho(f"⚠️ {ok} ok, {fail} failed.", fg="yellow")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@main.command()
|
|
40
|
+
@click.option("--role", "role_name", default="chat",
|
|
41
|
+
type=click.Choice([r.name.lower() for r in ModelRole], case_sensitive=False),
|
|
42
|
+
help="Which model role to serve")
|
|
43
|
+
@click.option("--port", type=int, help="Override default port")
|
|
44
|
+
@click.option("--ctx-size", type=int, default=8192, help="Context window size")
|
|
45
|
+
@click.option("--rpc/--no-rpc", default=False, help="Enable RPC offload to o7p3")
|
|
46
|
+
def serve(role_name: str, port: int | None, ctx_size: int, rpc: bool) -> None:
|
|
47
|
+
"""Start llama-server for a specific model."""
|
|
48
|
+
role = ModelRole[role_name.upper()]
|
|
49
|
+
entry = registry.get(role)
|
|
50
|
+
if entry is None:
|
|
51
|
+
click.secho(f"❌ No model found for role: {role_name}", fg="red")
|
|
52
|
+
sys.exit(1)
|
|
53
|
+
|
|
54
|
+
cfg = Config()
|
|
55
|
+
cfg.server.model_ref = f"{entry.repo}:{entry.quant}"
|
|
56
|
+
cfg.server.port = port or entry.port or 8383
|
|
57
|
+
cfg.server.ctx_size = ctx_size
|
|
58
|
+
cfg.rpc.enabled = rpc
|
|
59
|
+
|
|
60
|
+
click.echo(f"🚀 Starting {entry.name} on port {cfg.server.port}...")
|
|
61
|
+
|
|
62
|
+
click.echo("🔧 System prep: swap off, performance governor, hugepages...")
|
|
63
|
+
swap_off()
|
|
64
|
+
set_governor("performance")
|
|
65
|
+
enable_hugepages("madvise")
|
|
66
|
+
|
|
67
|
+
mgr = ServerManager(
|
|
68
|
+
config=cfg.server,
|
|
69
|
+
sys_cfg=cfg.system,
|
|
70
|
+
rpc_cfg=cfg.rpc,
|
|
71
|
+
)
|
|
72
|
+
if not mgr.start():
|
|
73
|
+
click.secho("❌ Failed to start server.", fg="red")
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
|
|
76
|
+
click.echo("⏳ Waiting for server to become healthy...")
|
|
77
|
+
if mgr.wait_ready():
|
|
78
|
+
click.secho(f"✅ Server ready at http://127.0.0.1:{cfg.server.port}", fg="green")
|
|
79
|
+
else:
|
|
80
|
+
click.secho("❌ Server did not become healthy.", fg="red")
|
|
81
|
+
mgr.stop()
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@main.command()
|
|
86
|
+
def swap() -> None:
|
|
87
|
+
"""Show swap status."""
|
|
88
|
+
click.echo(f"Swap: {swap_status()}")
|
|
89
|
+
|
|
90
|
+
@main.command()
|
|
91
|
+
def prep() -> None:
|
|
92
|
+
"""Apply all system optimisations for inference."""
|
|
93
|
+
click.echo("🔧 Applying system optimisations...")
|
|
94
|
+
if swap_off():
|
|
95
|
+
click.secho(" ✅ Swap disabled", fg="green")
|
|
96
|
+
else:
|
|
97
|
+
click.secho(" ❌ Swap disable failed", fg="red")
|
|
98
|
+
if set_governor("performance"):
|
|
99
|
+
click.secho(" ✅ CPU governor: performance", fg="green")
|
|
100
|
+
else:
|
|
101
|
+
click.secho(" ❌ Governor set failed", fg="red")
|
|
102
|
+
if enable_hugepages("madvise"):
|
|
103
|
+
click.secho(" ✅ Transparent hugepages: madvise", fg="green")
|
|
104
|
+
else:
|
|
105
|
+
click.secho(" ❌ Hugepages set failed", fg="red")
|
|
106
|
+
click.secho("✅ System ready for inference.", fg="green")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@main.command()
|
|
110
|
+
def restore() -> None:
|
|
111
|
+
"""Restore system defaults after inference."""
|
|
112
|
+
click.echo("🔧 Restoring system defaults...")
|
|
113
|
+
swap_on()
|
|
114
|
+
set_governor("schedutil")
|
|
115
|
+
enable_hugepages("always")
|
|
116
|
+
click.secho("✅ System restored.", fg="green")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@main.command()
|
|
120
|
+
def models() -> None:
|
|
121
|
+
"""List all models in the catalog."""
|
|
122
|
+
from rich.table import Table
|
|
123
|
+
from rich.console import Console
|
|
124
|
+
|
|
125
|
+
console = Console()
|
|
126
|
+
table = Table(title="🦙 Model Catalog")
|
|
127
|
+
table.add_column("Role", style="cyan")
|
|
128
|
+
table.add_column("Name", style="green")
|
|
129
|
+
table.add_column("Quant", style="yellow")
|
|
130
|
+
table.add_column("Size", style="magenta")
|
|
131
|
+
table.add_column("Host", style="blue")
|
|
132
|
+
|
|
133
|
+
for m in MODEL_CATALOG:
|
|
134
|
+
size_str = f"{m.size_mb} MB" if m.size_mb > 0 else "—"
|
|
135
|
+
mm_str = "📷" if m.mmproj else ""
|
|
136
|
+
note = f" [{m.note}]" if m.note else ""
|
|
137
|
+
table.add_row(
|
|
138
|
+
m.role.name, f"{m.name} {mm_str}{note}",
|
|
139
|
+
m.quant, size_str, m.host,
|
|
140
|
+
)
|
|
141
|
+
console.print(table)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@main.command()
|
|
145
|
+
def rpc() -> None:
|
|
146
|
+
"""Check RPC connectivity to o7p3."""
|
|
147
|
+
click.echo(rpc_status())
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@main.command()
|
|
151
|
+
@click.pass_context
|
|
152
|
+
def status(ctx: click.Context) -> None:
|
|
153
|
+
"""Show full system + model status."""
|
|
154
|
+
click.echo("🦙 Llama TUI Status")
|
|
155
|
+
click.echo("=" * 40)
|
|
156
|
+
click.echo(f"Swap: {swap_status()}")
|
|
157
|
+
click.echo(rpc_status())
|
|
158
|
+
click.echo(f"\nModels: {len(MODEL_CATALOG)} in catalog")
|
|
159
|
+
ctx.invoke(models)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
@dataclass(slots=True)
|
|
4
|
+
class ServerConfig:
|
|
5
|
+
model_ref: str = ""
|
|
6
|
+
port: int = 8383
|
|
7
|
+
host: str = "0.0.0.0"
|
|
8
|
+
ngl: int = 0
|
|
9
|
+
flash_attn: bool = True
|
|
10
|
+
ctx_size: int = 8192
|
|
11
|
+
threads: int = 3
|
|
12
|
+
threads_batch: int = 3
|
|
13
|
+
parallel: int = 2
|
|
14
|
+
cache_type_k: str = "q8_0"
|
|
15
|
+
cache_type_v: str = "q8_0"
|
|
16
|
+
mlock: bool = True
|
|
17
|
+
cache_reuse: int = 256
|
|
18
|
+
rope_scaling: str = "linear"
|
|
19
|
+
rope_scale: float = 8.0
|
|
20
|
+
priority: int = 2
|
|
21
|
+
|
|
22
|
+
@dataclass(slots=True)
|
|
23
|
+
class SystemConfig:
|
|
24
|
+
manage_swap: bool = True
|
|
25
|
+
cpu_governor: str = "performance"
|
|
26
|
+
transparent_hugepages: str = "madvise"
|
|
27
|
+
vulkan_disabled: bool = True
|
|
28
|
+
opencl_disabled: bool = True
|
|
29
|
+
blis_single_thread: bool = True
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True)
|
|
32
|
+
class RPCConfig:
|
|
33
|
+
enabled: bool = False
|
|
34
|
+
host: str = "o7p3.stalk-symmetric.ts.net"
|
|
35
|
+
port: int = 50052
|
|
36
|
+
draft_model: str = "MaziyarPanahi/gemma-3-1b-it-GGUF:Q4_K_M"
|
|
37
|
+
|
|
38
|
+
@dataclass(slots=True)
|
|
39
|
+
class Config:
|
|
40
|
+
server: ServerConfig = field(default_factory=ServerConfig)
|
|
41
|
+
system: SystemConfig = field(default_factory=SystemConfig)
|
|
42
|
+
rpc: RPCConfig = field(default_factory=RPCConfig)
|
|
43
|
+
|
|
44
|
+
DEFAULT_CONFIG = Config()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from .models import MODEL_CATALOG, ModelEntry
|
|
6
|
+
|
|
7
|
+
@dataclass(slots=True)
|
|
8
|
+
class DownloadResult:
|
|
9
|
+
model: str
|
|
10
|
+
success: bool
|
|
11
|
+
message: str = ""
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True)
|
|
14
|
+
class DownloadManager:
|
|
15
|
+
force: bool = False
|
|
16
|
+
max_workers: int = 4
|
|
17
|
+
results: list[DownloadResult] = field(default_factory=list)
|
|
18
|
+
|
|
19
|
+
def download(self, entry: ModelEntry) -> DownloadResult:
|
|
20
|
+
repo_quant = f"{entry.repo}:{entry.quant}"
|
|
21
|
+
cmd = [
|
|
22
|
+
"hf", "download", entry.repo,
|
|
23
|
+
"--include", f"*{entry.quant}.gguf",
|
|
24
|
+
"--max-workers", str(self.max_workers),
|
|
25
|
+
]
|
|
26
|
+
if self.force:
|
|
27
|
+
cmd.append("--force-download")
|
|
28
|
+
|
|
29
|
+
# Multimodal projector: separate call to avoid glob vs explicit conflict
|
|
30
|
+
if entry.mmproj:
|
|
31
|
+
proj_cmd = [
|
|
32
|
+
"hf", "download", entry.repo,
|
|
33
|
+
"--include", "mmproj-*f16.gguf",
|
|
34
|
+
"--max-workers", str(self.max_workers),
|
|
35
|
+
]
|
|
36
|
+
try:
|
|
37
|
+
subprocess.run(proj_cmd, check=True, capture_output=True, text=True, timeout=300)
|
|
38
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
|
39
|
+
return DownloadResult(model=repo_quant, success=False,
|
|
40
|
+
message=f"Projector download failed: {e}")
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=600)
|
|
44
|
+
return DownloadResult(model=repo_quant, success=True, message=result.stdout.strip())
|
|
45
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
|
46
|
+
return DownloadResult(model=repo_quant, success=False,
|
|
47
|
+
message=getattr(e, 'stderr', str(e)))
|
|
48
|
+
|
|
49
|
+
def download_all(self, *, roles=None) -> list[DownloadResult]:
|
|
50
|
+
self.results.clear()
|
|
51
|
+
entries = MODEL_CATALOG
|
|
52
|
+
if roles:
|
|
53
|
+
entries = [e for e in MODEL_CATALOG if e.role in roles]
|
|
54
|
+
for entry in entries:
|
|
55
|
+
res = self.download(entry)
|
|
56
|
+
self.results.append(res)
|
|
57
|
+
status = "✅" if res.success else "❌"
|
|
58
|
+
print(f" {status} {res.model}: {res.message[:120]}")
|
|
59
|
+
return self.results
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def all_ok(self) -> bool:
|
|
63
|
+
return all(r.success for r in self.results) if self.results else False
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from enum import Enum, auto
|
|
3
|
+
|
|
4
|
+
class ModelRole(Enum):
|
|
5
|
+
CHAT = auto()
|
|
6
|
+
VISION = auto()
|
|
7
|
+
CODE = auto()
|
|
8
|
+
SECURITY = auto()
|
|
9
|
+
STEM = auto()
|
|
10
|
+
SPEC_DRAFT = auto()
|
|
11
|
+
EMBEDDED = auto()
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True)
|
|
14
|
+
class ModelEntry:
|
|
15
|
+
role: ModelRole
|
|
16
|
+
name: str
|
|
17
|
+
repo: str # HuggingFace user/repo
|
|
18
|
+
quant: str # e.g. Q4_K_M
|
|
19
|
+
host: str # "o7p5" or "o7p3"
|
|
20
|
+
port: int = 0
|
|
21
|
+
size_mb: int = 0
|
|
22
|
+
mmproj: bool = False
|
|
23
|
+
needs_mmproj: bool = False
|
|
24
|
+
note: str = ""
|
|
25
|
+
|
|
26
|
+
# Verified catalog — May 2026
|
|
27
|
+
MODEL_CATALOG: list[ModelEntry] = [
|
|
28
|
+
ModelEntry(ModelRole.CHAT, "Llama 3.2 3B", "unsloth/Llama-3.2-3B-Instruct-GGUF", "Q4_K_M", "o7p5", 8383, 1900),
|
|
29
|
+
ModelEntry(ModelRole.VISION, "MiniCPM-V 4.6", "openbmb/MiniCPM-V-4.6-gguf", "Q4_K_M", "o7p5", 8384, 1600, mmproj=True, needs_mmproj=True),
|
|
30
|
+
ModelEntry(ModelRole.CODE, "Agent.Nano.Coder 2B", "WithinUsAI/Agent.Nano.Coder-2B-gguf", "Q4_K_M", "o7p5", 8385, 1200),
|
|
31
|
+
ModelEntry(ModelRole.SECURITY, "Qwen3 4B SafeRL", "ShahzebKhoso/Qwen3-4B-SafeRL-GGUF", "Q4_K_M", "o7p5", 8386, 2500),
|
|
32
|
+
ModelEntry(ModelRole.STEM, "LFM2.5 1.2B", "bartowski/LiquidAI_LFM2.5-1.2B-Instruct-GGUF", "Q8_0", "o7p5", 8387, 1250),
|
|
33
|
+
ModelEntry(ModelRole.SPEC_DRAFT, "Qwen2.5 Coder 0.5B", "featherless-ai-quants/unsloth-Qwen2.5-Coder-0.5B-GGUF", "Q4_K_M", "o7p5", 0, 398),
|
|
34
|
+
ModelEntry(ModelRole.EMBEDDED, "Gemma 3 1B", "MaziyarPanahi/gemma-3-1b-it-GGUF", "Q4_K_M", "o7p3", 50052, 800),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
@dataclass(slots=True)
|
|
38
|
+
class ModelRegistry:
|
|
39
|
+
_by_role: dict[ModelRole, ModelEntry] = field(default_factory=dict)
|
|
40
|
+
_by_name: dict[str, ModelEntry] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
def __post_init__(self):
|
|
43
|
+
for m in MODEL_CATALOG:
|
|
44
|
+
self._by_role[m.role] = m
|
|
45
|
+
self._by_name[m.name] = m
|
|
46
|
+
|
|
47
|
+
def get(self, key: ModelRole | str) -> ModelEntry | None:
|
|
48
|
+
if isinstance(key, ModelRole):
|
|
49
|
+
return self._by_role.get(key)
|
|
50
|
+
return self._by_name.get(key)
|
|
51
|
+
|
|
52
|
+
registry = ModelRegistry()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
|
|
3
|
+
def check_rpc(host: str = "o7p3.stalk-symmetric.ts.net", port: int = 50052) -> bool:
|
|
4
|
+
try:
|
|
5
|
+
with socket.create_connection((host, port), timeout=5):
|
|
6
|
+
return True
|
|
7
|
+
except (socket.timeout, ConnectionRefusedError, OSError):
|
|
8
|
+
return False
|
|
9
|
+
|
|
10
|
+
def rpc_status(host: str = "o7p3.stalk-symmetric.ts.net", port: int = 50052) -> str:
|
|
11
|
+
reachable = check_rpc(host, port)
|
|
12
|
+
if reachable:
|
|
13
|
+
return f"✅ RPC server reachable at {host}:{port}"
|
|
14
|
+
return f"❌ RPC server unreachable at {host}:{port}"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import signal
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import urllib.request
|
|
7
|
+
import urllib.error
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
from .config import ServerConfig, SystemConfig, RPCConfig, DEFAULT_CONFIG
|
|
11
|
+
|
|
12
|
+
@dataclass(slots=True)
|
|
13
|
+
class ServerManager:
|
|
14
|
+
config: ServerConfig = field(default_factory=lambda: DEFAULT_CONFIG.server)
|
|
15
|
+
sys_cfg: SystemConfig = field(default_factory=lambda: DEFAULT_CONFIG.system)
|
|
16
|
+
rpc_cfg: RPCConfig = field(default_factory=lambda: DEFAULT_CONFIG.rpc)
|
|
17
|
+
_process: subprocess.Popen | None = None
|
|
18
|
+
|
|
19
|
+
def _build_env(self) -> dict[str, str]:
|
|
20
|
+
env = os.environ.copy()
|
|
21
|
+
if self.sys_cfg.vulkan_disabled:
|
|
22
|
+
env["GGML_VULKAN"] = "0"
|
|
23
|
+
if self.sys_cfg.opencl_disabled:
|
|
24
|
+
env["GGML_OPENCL"] = "0"
|
|
25
|
+
if self.sys_cfg.blis_single_thread:
|
|
26
|
+
env["BLIS_NUM_THREADS"] = "1"
|
|
27
|
+
env["OMP_NUM_THREADS"] = "1"
|
|
28
|
+
env["GOMP_SPINCOUNT"] = "0"
|
|
29
|
+
env["LLAMA_ARG_THREADS"] = str(self.config.threads)
|
|
30
|
+
env["LLAMA_ARG_THREADS_BATCH"] = str(self.config.threads_batch)
|
|
31
|
+
return env
|
|
32
|
+
|
|
33
|
+
def _build_cmd(self) -> list[str]:
|
|
34
|
+
cmd = [
|
|
35
|
+
"taskset", "-c", f"0-{self.config.threads - 1}",
|
|
36
|
+
"llama-server",
|
|
37
|
+
"-hf", self.config.model_ref,
|
|
38
|
+
"-ngl", str(self.config.ngl),
|
|
39
|
+
"-fa", "on" if self.config.flash_attn else "off",
|
|
40
|
+
"-ctk", self.config.cache_type_k,
|
|
41
|
+
"-ctv", self.config.cache_type_v,
|
|
42
|
+
"-c", str(self.config.ctx_size),
|
|
43
|
+
"-t", str(self.config.threads),
|
|
44
|
+
"-tb", str(self.config.threads_batch),
|
|
45
|
+
"-np", str(self.config.parallel),
|
|
46
|
+
"--prio", str(self.config.priority),
|
|
47
|
+
"--cache-reuse", str(self.config.cache_reuse),
|
|
48
|
+
"--port", str(self.config.port),
|
|
49
|
+
"--host", self.config.host,
|
|
50
|
+
"--rope-scaling", self.config.rope_scaling,
|
|
51
|
+
"--rope-scale", str(self.config.rope_scale),
|
|
52
|
+
"--spec-type", "ngram-mod",
|
|
53
|
+
"--spec-ngram-mod-n-min", "48",
|
|
54
|
+
"--spec-ngram-mod-n-max", "64",
|
|
55
|
+
"--spec-ngram-mod-n-match", "24",
|
|
56
|
+
"--spec-draft-n-max", "3",
|
|
57
|
+
]
|
|
58
|
+
if self.config.mlock:
|
|
59
|
+
cmd.append("--mlock")
|
|
60
|
+
if self.rpc_cfg.enabled:
|
|
61
|
+
cmd.extend(["--rpc", f"{self.rpc_cfg.host}:{self.rpc_cfg.port}"])
|
|
62
|
+
return cmd
|
|
63
|
+
|
|
64
|
+
def start(self) -> bool:
|
|
65
|
+
if self._process and self._process.poll() is None:
|
|
66
|
+
return True
|
|
67
|
+
cmd = self._build_cmd()
|
|
68
|
+
env = self._build_env()
|
|
69
|
+
try:
|
|
70
|
+
self._process = subprocess.Popen(
|
|
71
|
+
cmd, env=env,
|
|
72
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
73
|
+
start_new_session=True,
|
|
74
|
+
)
|
|
75
|
+
return True
|
|
76
|
+
except FileNotFoundError:
|
|
77
|
+
print("❌ llama-server not found in PATH", file=sys.stderr)
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def stop(self, timeout: int = 10) -> bool:
|
|
81
|
+
if self._process is None or self._process.poll() is not None:
|
|
82
|
+
return True
|
|
83
|
+
self._process.terminate()
|
|
84
|
+
try:
|
|
85
|
+
self._process.wait(timeout=timeout)
|
|
86
|
+
return True
|
|
87
|
+
except subprocess.TimeoutExpired:
|
|
88
|
+
self._process.kill()
|
|
89
|
+
self._process.wait()
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def is_running(self) -> bool:
|
|
94
|
+
return self._process is not None and self._process.poll() is None
|
|
95
|
+
|
|
96
|
+
def wait_ready(self, timeout: int = 300) -> bool:
|
|
97
|
+
deadline = time.monotonic() + timeout
|
|
98
|
+
url = f"http://127.0.0.1:{self.config.port}/health"
|
|
99
|
+
while time.monotonic() < deadline:
|
|
100
|
+
if not self.is_running:
|
|
101
|
+
return False
|
|
102
|
+
try:
|
|
103
|
+
with urllib.request.urlopen(url, timeout=3) as r:
|
|
104
|
+
if r.status == 200:
|
|
105
|
+
return True
|
|
106
|
+
except (urllib.error.URLError, OSError):
|
|
107
|
+
time.sleep(1)
|
|
108
|
+
return False
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
def swap_off() -> bool:
|
|
5
|
+
try:
|
|
6
|
+
subprocess.run(["sudo", "swapoff", "-a"], check=True, capture_output=True, text=True)
|
|
7
|
+
return True
|
|
8
|
+
except subprocess.CalledProcessError as e:
|
|
9
|
+
print(f"⚠️ swapoff failed: {e.stderr.strip()}", file=sys.stderr)
|
|
10
|
+
return False
|
|
11
|
+
|
|
12
|
+
def swap_on() -> bool:
|
|
13
|
+
try:
|
|
14
|
+
subprocess.run(["sudo", "swapon", "-a"], check=True, capture_output=True, text=True)
|
|
15
|
+
return True
|
|
16
|
+
except subprocess.CalledProcessError as e:
|
|
17
|
+
print(f"⚠️ swapon failed: {e.stderr.strip()}", file=sys.stderr)
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
def swap_status() -> str:
|
|
21
|
+
try:
|
|
22
|
+
r = subprocess.run(["swapon", "--show"], capture_output=True, text=True)
|
|
23
|
+
return r.stdout.strip() or "Swap is off"
|
|
24
|
+
except Exception:
|
|
25
|
+
return "Unknown"
|
|
26
|
+
|
|
27
|
+
def set_governor(governor: str = "performance") -> bool:
|
|
28
|
+
try:
|
|
29
|
+
subprocess.run(
|
|
30
|
+
["sudo", "sh", "-c",
|
|
31
|
+
f"echo {governor} | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"],
|
|
32
|
+
check=True, capture_output=True, text=True,
|
|
33
|
+
)
|
|
34
|
+
return True
|
|
35
|
+
except subprocess.CalledProcessError:
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
def enable_hugepages(mode: str = "madvise") -> bool:
|
|
39
|
+
try:
|
|
40
|
+
subprocess.run(
|
|
41
|
+
["sudo", "sh", "-c",
|
|
42
|
+
f"echo {mode} | tee /sys/kernel/mm/transparent_hugepage/enabled"],
|
|
43
|
+
check=True, capture_output=True, text=True,
|
|
44
|
+
)
|
|
45
|
+
return True
|
|
46
|
+
except subprocess.CalledProcessError:
|
|
47
|
+
return False
|