llama-tui 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.3
2
+ Name: llama-tui
3
+ Version: 0.1.0
4
+ Summary: Lightweight TUI manager for llama.cpp models on Raspberry Pi
5
+ Requires-Dist: click-extra>=7.12
6
+ Requires-Python: >=3.14
7
+ Description-Content-Type: text/markdown
8
+
9
+ src/
10
+ └── llama_tui/
11
+ ├── __init__.py # main() entry for the console script
12
+ ├── cli.py # Click-extra CLI group & commands
13
+ ├── config.py # Slotted dataclasses (server, system, RPC)
14
+ ├── models.py # Model catalog (slotted, verified)
15
+ ├── download.py # hf download wrapper
16
+ ├── server.py # llama-server lifecycle manager
17
+ ├── swap.py # swap, governor, hugepages helpers
18
+ └── rpc.py # o7p3 RPC connectivity check
@@ -0,0 +1,10 @@
1
+ src/
2
+ └── llama_tui/
3
+ ├── __init__.py # main() entry for the console script
4
+ ├── cli.py # Click-extra CLI group & commands
5
+ ├── config.py # Slotted dataclasses (server, system, RPC)
6
+ ├── models.py # Model catalog (slotted, verified)
7
+ ├── download.py # hf download wrapper
8
+ ├── server.py # llama-server lifecycle manager
9
+ ├── swap.py # swap, governor, hugepages helpers
10
+ └── rpc.py # o7p3 RPC connectivity check
@@ -0,0 +1,16 @@
1
+ [project]
2
+ name = "llama-tui"
3
+ version = "0.1.0"
4
+ description = "Lightweight TUI manager for llama.cpp models on Raspberry Pi"
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ dependencies = [
8
+ "click-extra>=7.12",
9
+ ]
10
+
11
+ [project.scripts]
12
+ llama-tui = "llama_tui:main"
13
+
14
+ [build-system]
15
+ requires = ["uv_build>=0.11.16,<0.12.0"]
16
+ build-backend = "uv_build"
@@ -0,0 +1,3 @@
1
+ """Llama TUI — lightweight LLM manager for Raspberry Pi clusters."""
2
+
3
+ from llama_tui.cli import main
@@ -0,0 +1,159 @@
1
+ import click_extra as click
2
+ import sys
3
+
4
+ from .config import Config, DEFAULT_CONFIG
5
+ from .models import registry, ModelRole, MODEL_CATALOG
6
+ from .download import DownloadManager
7
+ from .server import ServerManager
8
+ from .swap import swap_off, swap_on, swap_status, set_governor, enable_hugepages
9
+ from .rpc import rpc_status
10
+
11
+
12
+ @click.group()
13
+ @click.version_option(package_name="llama-tui")
14
+ def main() -> None:
15
+ """🦙 Llama TUI — Lightweight model manager for Raspberry Pi LLM clusters."""
16
+
17
+
18
+ @main.command()
19
+ @click.option("--force", is_flag=True, help="Force re-download even if cached")
20
+ @click.option("--workers", default=4, help="Parallel download workers")
21
+ @click.option("--role", "roles", multiple=True,
22
+ type=click.Choice([r.name.lower() for r in ModelRole], case_sensitive=False),
23
+ help="Filter by model role (repeatable)")
24
+ def download(force: bool, workers: int, roles: tuple[str, ...]) -> None:
25
+ """Download all models into the HuggingFace cache."""
26
+ click.echo("📥 Downloading models...")
27
+ mgr = DownloadManager(force=force, max_workers=workers)
28
+ role_set = [ModelRole[r.upper()] for r in roles] if roles else None
29
+ results = mgr.download_all(roles=role_set)
30
+
31
+ ok = sum(1 for r in results if r.success)
32
+ fail = len(results) - ok
33
+ if fail == 0:
34
+ click.secho(f"✅ All {ok} models downloaded successfully.", fg="green")
35
+ else:
36
+ click.secho(f"⚠️ {ok} ok, {fail} failed.", fg="yellow")
37
+
38
+
39
+ @main.command()
40
+ @click.option("--role", "role_name", default="chat",
41
+ type=click.Choice([r.name.lower() for r in ModelRole], case_sensitive=False),
42
+ help="Which model role to serve")
43
+ @click.option("--port", type=int, help="Override default port")
44
+ @click.option("--ctx-size", type=int, default=8192, help="Context window size")
45
+ @click.option("--rpc/--no-rpc", default=False, help="Enable RPC offload to o7p3")
46
+ def serve(role_name: str, port: int | None, ctx_size: int, rpc: bool) -> None:
47
+ """Start llama-server for a specific model."""
48
+ role = ModelRole[role_name.upper()]
49
+ entry = registry.get(role)
50
+ if entry is None:
51
+ click.secho(f"❌ No model found for role: {role_name}", fg="red")
52
+ sys.exit(1)
53
+
54
+ cfg = Config()
55
+ cfg.server.model_ref = f"{entry.repo}:{entry.quant}"
56
+ cfg.server.port = port or entry.port or 8383
57
+ cfg.server.ctx_size = ctx_size
58
+ cfg.rpc.enabled = rpc
59
+
60
+ click.echo(f"🚀 Starting {entry.name} on port {cfg.server.port}...")
61
+
62
+ click.echo("🔧 System prep: swap off, performance governor, hugepages...")
63
+ swap_off()
64
+ set_governor("performance")
65
+ enable_hugepages("madvise")
66
+
67
+ mgr = ServerManager(
68
+ config=cfg.server,
69
+ sys_cfg=cfg.system,
70
+ rpc_cfg=cfg.rpc,
71
+ )
72
+ if not mgr.start():
73
+ click.secho("❌ Failed to start server.", fg="red")
74
+ sys.exit(1)
75
+
76
+ click.echo("⏳ Waiting for server to become healthy...")
77
+ if mgr.wait_ready():
78
+ click.secho(f"✅ Server ready at http://127.0.0.1:{cfg.server.port}", fg="green")
79
+ else:
80
+ click.secho("❌ Server did not become healthy.", fg="red")
81
+ mgr.stop()
82
+ sys.exit(1)
83
+
84
+
85
+ @main.command()
86
+ def swap() -> None:
87
+ """Show swap status."""
88
+ click.echo(f"Swap: {swap_status()}")
89
+
90
+ @main.command()
91
+ def prep() -> None:
92
+ """Apply all system optimisations for inference."""
93
+ click.echo("🔧 Applying system optimisations...")
94
+ if swap_off():
95
+ click.secho(" ✅ Swap disabled", fg="green")
96
+ else:
97
+ click.secho(" ❌ Swap disable failed", fg="red")
98
+ if set_governor("performance"):
99
+ click.secho(" ✅ CPU governor: performance", fg="green")
100
+ else:
101
+ click.secho(" ❌ Governor set failed", fg="red")
102
+ if enable_hugepages("madvise"):
103
+ click.secho(" ✅ Transparent hugepages: madvise", fg="green")
104
+ else:
105
+ click.secho(" ❌ Hugepages set failed", fg="red")
106
+ click.secho("✅ System ready for inference.", fg="green")
107
+
108
+
109
+ @main.command()
110
+ def restore() -> None:
111
+ """Restore system defaults after inference."""
112
+ click.echo("🔧 Restoring system defaults...")
113
+ swap_on()
114
+ set_governor("schedutil")
115
+ enable_hugepages("always")
116
+ click.secho("✅ System restored.", fg="green")
117
+
118
+
119
+ @main.command()
120
+ def models() -> None:
121
+ """List all models in the catalog."""
122
+ from rich.table import Table
123
+ from rich.console import Console
124
+
125
+ console = Console()
126
+ table = Table(title="🦙 Model Catalog")
127
+ table.add_column("Role", style="cyan")
128
+ table.add_column("Name", style="green")
129
+ table.add_column("Quant", style="yellow")
130
+ table.add_column("Size", style="magenta")
131
+ table.add_column("Host", style="blue")
132
+
133
+ for m in MODEL_CATALOG:
134
+ size_str = f"{m.size_mb} MB" if m.size_mb > 0 else "—"
135
+ mm_str = "📷" if m.mmproj else ""
136
+ note = f" [{m.note}]" if m.note else ""
137
+ table.add_row(
138
+ m.role.name, f"{m.name} {mm_str}{note}",
139
+ m.quant, size_str, m.host,
140
+ )
141
+ console.print(table)
142
+
143
+
144
+ @main.command()
145
+ def rpc() -> None:
146
+ """Check RPC connectivity to o7p3."""
147
+ click.echo(rpc_status())
148
+
149
+
150
+ @main.command()
151
+ @click.pass_context
152
+ def status(ctx: click.Context) -> None:
153
+ """Show full system + model status."""
154
+ click.echo("🦙 Llama TUI Status")
155
+ click.echo("=" * 40)
156
+ click.echo(f"Swap: {swap_status()}")
157
+ click.echo(rpc_status())
158
+ click.echo(f"\nModels: {len(MODEL_CATALOG)} in catalog")
159
+ ctx.invoke(models)
@@ -0,0 +1,44 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ @dataclass(slots=True)
4
+ class ServerConfig:
5
+ model_ref: str = ""
6
+ port: int = 8383
7
+ host: str = "0.0.0.0"
8
+ ngl: int = 0
9
+ flash_attn: bool = True
10
+ ctx_size: int = 8192
11
+ threads: int = 3
12
+ threads_batch: int = 3
13
+ parallel: int = 2
14
+ cache_type_k: str = "q8_0"
15
+ cache_type_v: str = "q8_0"
16
+ mlock: bool = True
17
+ cache_reuse: int = 256
18
+ rope_scaling: str = "linear"
19
+ rope_scale: float = 8.0
20
+ priority: int = 2
21
+
22
+ @dataclass(slots=True)
23
+ class SystemConfig:
24
+ manage_swap: bool = True
25
+ cpu_governor: str = "performance"
26
+ transparent_hugepages: str = "madvise"
27
+ vulkan_disabled: bool = True
28
+ opencl_disabled: bool = True
29
+ blis_single_thread: bool = True
30
+
31
+ @dataclass(slots=True)
32
+ class RPCConfig:
33
+ enabled: bool = False
34
+ host: str = "o7p3.stalk-symmetric.ts.net"
35
+ port: int = 50052
36
+ draft_model: str = "MaziyarPanahi/gemma-3-1b-it-GGUF:Q4_K_M"
37
+
38
+ @dataclass(slots=True)
39
+ class Config:
40
+ server: ServerConfig = field(default_factory=ServerConfig)
41
+ system: SystemConfig = field(default_factory=SystemConfig)
42
+ rpc: RPCConfig = field(default_factory=RPCConfig)
43
+
44
+ DEFAULT_CONFIG = Config()
@@ -0,0 +1,63 @@
1
+ import subprocess
2
+ import sys
3
+ from dataclasses import dataclass, field
4
+
5
+ from .models import MODEL_CATALOG, ModelEntry
6
+
7
+ @dataclass(slots=True)
8
+ class DownloadResult:
9
+ model: str
10
+ success: bool
11
+ message: str = ""
12
+
13
+ @dataclass(slots=True)
14
+ class DownloadManager:
15
+ force: bool = False
16
+ max_workers: int = 4
17
+ results: list[DownloadResult] = field(default_factory=list)
18
+
19
+ def download(self, entry: ModelEntry) -> DownloadResult:
20
+ repo_quant = f"{entry.repo}:{entry.quant}"
21
+ cmd = [
22
+ "hf", "download", entry.repo,
23
+ "--include", f"*{entry.quant}.gguf",
24
+ "--max-workers", str(self.max_workers),
25
+ ]
26
+ if self.force:
27
+ cmd.append("--force-download")
28
+
29
+ # Multimodal projector: separate call to avoid glob vs explicit conflict
30
+ if entry.mmproj:
31
+ proj_cmd = [
32
+ "hf", "download", entry.repo,
33
+ "--include", "mmproj-*f16.gguf",
34
+ "--max-workers", str(self.max_workers),
35
+ ]
36
+ try:
37
+ subprocess.run(proj_cmd, check=True, capture_output=True, text=True, timeout=300)
38
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
39
+ return DownloadResult(model=repo_quant, success=False,
40
+ message=f"Projector download failed: {e}")
41
+
42
+ try:
43
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=600)
44
+ return DownloadResult(model=repo_quant, success=True, message=result.stdout.strip())
45
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
46
+ return DownloadResult(model=repo_quant, success=False,
47
+ message=getattr(e, 'stderr', str(e)))
48
+
49
+ def download_all(self, *, roles=None) -> list[DownloadResult]:
50
+ self.results.clear()
51
+ entries = MODEL_CATALOG
52
+ if roles:
53
+ entries = [e for e in MODEL_CATALOG if e.role in roles]
54
+ for entry in entries:
55
+ res = self.download(entry)
56
+ self.results.append(res)
57
+ status = "✅" if res.success else "❌"
58
+ print(f" {status} {res.model}: {res.message[:120]}")
59
+ return self.results
60
+
61
+ @property
62
+ def all_ok(self) -> bool:
63
+ return all(r.success for r in self.results) if self.results else False
@@ -0,0 +1,52 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import Enum, auto
3
+
4
+ class ModelRole(Enum):
5
+ CHAT = auto()
6
+ VISION = auto()
7
+ CODE = auto()
8
+ SECURITY = auto()
9
+ STEM = auto()
10
+ SPEC_DRAFT = auto()
11
+ EMBEDDED = auto()
12
+
13
+ @dataclass(slots=True)
14
+ class ModelEntry:
15
+ role: ModelRole
16
+ name: str
17
+ repo: str # HuggingFace user/repo
18
+ quant: str # e.g. Q4_K_M
19
+ host: str # "o7p5" or "o7p3"
20
+ port: int = 0
21
+ size_mb: int = 0
22
+ mmproj: bool = False
23
+ needs_mmproj: bool = False
24
+ note: str = ""
25
+
26
+ # Verified catalog — May 2026
27
+ MODEL_CATALOG: list[ModelEntry] = [
28
+ ModelEntry(ModelRole.CHAT, "Llama 3.2 3B", "unsloth/Llama-3.2-3B-Instruct-GGUF", "Q4_K_M", "o7p5", 8383, 1900),
29
+ ModelEntry(ModelRole.VISION, "MiniCPM-V 4.6", "openbmb/MiniCPM-V-4.6-gguf", "Q4_K_M", "o7p5", 8384, 1600, mmproj=True, needs_mmproj=True),
30
+ ModelEntry(ModelRole.CODE, "Agent.Nano.Coder 2B", "WithinUsAI/Agent.Nano.Coder-2B-gguf", "Q4_K_M", "o7p5", 8385, 1200),
31
+ ModelEntry(ModelRole.SECURITY, "Qwen3 4B SafeRL", "ShahzebKhoso/Qwen3-4B-SafeRL-GGUF", "Q4_K_M", "o7p5", 8386, 2500),
32
+ ModelEntry(ModelRole.STEM, "LFM2.5 1.2B", "bartowski/LiquidAI_LFM2.5-1.2B-Instruct-GGUF", "Q8_0", "o7p5", 8387, 1250),
33
+ ModelEntry(ModelRole.SPEC_DRAFT, "Qwen2.5 Coder 0.5B", "featherless-ai-quants/unsloth-Qwen2.5-Coder-0.5B-GGUF", "Q4_K_M", "o7p5", 0, 398),
34
+ ModelEntry(ModelRole.EMBEDDED, "Gemma 3 1B", "MaziyarPanahi/gemma-3-1b-it-GGUF", "Q4_K_M", "o7p3", 50052, 800),
35
+ ]
36
+
37
+ @dataclass(slots=True)
38
+ class ModelRegistry:
39
+ _by_role: dict[ModelRole, ModelEntry] = field(default_factory=dict)
40
+ _by_name: dict[str, ModelEntry] = field(default_factory=dict)
41
+
42
+ def __post_init__(self):
43
+ for m in MODEL_CATALOG:
44
+ self._by_role[m.role] = m
45
+ self._by_name[m.name] = m
46
+
47
+ def get(self, key: ModelRole | str) -> ModelEntry | None:
48
+ if isinstance(key, ModelRole):
49
+ return self._by_role.get(key)
50
+ return self._by_name.get(key)
51
+
52
+ registry = ModelRegistry()
@@ -0,0 +1,14 @@
1
+ import socket
2
+
3
+ def check_rpc(host: str = "o7p3.stalk-symmetric.ts.net", port: int = 50052) -> bool:
4
+ try:
5
+ with socket.create_connection((host, port), timeout=5):
6
+ return True
7
+ except (socket.timeout, ConnectionRefusedError, OSError):
8
+ return False
9
+
10
+ def rpc_status(host: str = "o7p3.stalk-symmetric.ts.net", port: int = 50052) -> str:
11
+ reachable = check_rpc(host, port)
12
+ if reachable:
13
+ return f"✅ RPC server reachable at {host}:{port}"
14
+ return f"❌ RPC server unreachable at {host}:{port}"
@@ -0,0 +1,108 @@
1
+ import os
2
+ import signal
3
+ import subprocess
4
+ import sys
5
+ import time
6
+ import urllib.request
7
+ import urllib.error
8
+ from dataclasses import dataclass, field
9
+
10
+ from .config import ServerConfig, SystemConfig, RPCConfig, DEFAULT_CONFIG
11
+
12
+ @dataclass(slots=True)
13
+ class ServerManager:
14
+ config: ServerConfig = field(default_factory=lambda: DEFAULT_CONFIG.server)
15
+ sys_cfg: SystemConfig = field(default_factory=lambda: DEFAULT_CONFIG.system)
16
+ rpc_cfg: RPCConfig = field(default_factory=lambda: DEFAULT_CONFIG.rpc)
17
+ _process: subprocess.Popen | None = None
18
+
19
+ def _build_env(self) -> dict[str, str]:
20
+ env = os.environ.copy()
21
+ if self.sys_cfg.vulkan_disabled:
22
+ env["GGML_VULKAN"] = "0"
23
+ if self.sys_cfg.opencl_disabled:
24
+ env["GGML_OPENCL"] = "0"
25
+ if self.sys_cfg.blis_single_thread:
26
+ env["BLIS_NUM_THREADS"] = "1"
27
+ env["OMP_NUM_THREADS"] = "1"
28
+ env["GOMP_SPINCOUNT"] = "0"
29
+ env["LLAMA_ARG_THREADS"] = str(self.config.threads)
30
+ env["LLAMA_ARG_THREADS_BATCH"] = str(self.config.threads_batch)
31
+ return env
32
+
33
+ def _build_cmd(self) -> list[str]:
34
+ cmd = [
35
+ "taskset", "-c", f"0-{self.config.threads - 1}",
36
+ "llama-server",
37
+ "-hf", self.config.model_ref,
38
+ "-ngl", str(self.config.ngl),
39
+ "-fa", "on" if self.config.flash_attn else "off",
40
+ "-ctk", self.config.cache_type_k,
41
+ "-ctv", self.config.cache_type_v,
42
+ "-c", str(self.config.ctx_size),
43
+ "-t", str(self.config.threads),
44
+ "-tb", str(self.config.threads_batch),
45
+ "-np", str(self.config.parallel),
46
+ "--prio", str(self.config.priority),
47
+ "--cache-reuse", str(self.config.cache_reuse),
48
+ "--port", str(self.config.port),
49
+ "--host", self.config.host,
50
+ "--rope-scaling", self.config.rope_scaling,
51
+ "--rope-scale", str(self.config.rope_scale),
52
+ "--spec-type", "ngram-mod",
53
+ "--spec-ngram-mod-n-min", "48",
54
+ "--spec-ngram-mod-n-max", "64",
55
+ "--spec-ngram-mod-n-match", "24",
56
+ "--spec-draft-n-max", "3",
57
+ ]
58
+ if self.config.mlock:
59
+ cmd.append("--mlock")
60
+ if self.rpc_cfg.enabled:
61
+ cmd.extend(["--rpc", f"{self.rpc_cfg.host}:{self.rpc_cfg.port}"])
62
+ return cmd
63
+
64
+ def start(self) -> bool:
65
+ if self._process and self._process.poll() is None:
66
+ return True
67
+ cmd = self._build_cmd()
68
+ env = self._build_env()
69
+ try:
70
+ self._process = subprocess.Popen(
71
+ cmd, env=env,
72
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
73
+ start_new_session=True,
74
+ )
75
+ return True
76
+ except FileNotFoundError:
77
+ print("❌ llama-server not found in PATH", file=sys.stderr)
78
+ return False
79
+
80
+ def stop(self, timeout: int = 10) -> bool:
81
+ if self._process is None or self._process.poll() is not None:
82
+ return True
83
+ self._process.terminate()
84
+ try:
85
+ self._process.wait(timeout=timeout)
86
+ return True
87
+ except subprocess.TimeoutExpired:
88
+ self._process.kill()
89
+ self._process.wait()
90
+ return False
91
+
92
+ @property
93
+ def is_running(self) -> bool:
94
+ return self._process is not None and self._process.poll() is None
95
+
96
+ def wait_ready(self, timeout: int = 300) -> bool:
97
+ deadline = time.monotonic() + timeout
98
+ url = f"http://127.0.0.1:{self.config.port}/health"
99
+ while time.monotonic() < deadline:
100
+ if not self.is_running:
101
+ return False
102
+ try:
103
+ with urllib.request.urlopen(url, timeout=3) as r:
104
+ if r.status == 200:
105
+ return True
106
+ except (urllib.error.URLError, OSError):
107
+ time.sleep(1)
108
+ return False
@@ -0,0 +1,47 @@
1
+ import subprocess
2
+ import sys
3
+
4
+ def swap_off() -> bool:
5
+ try:
6
+ subprocess.run(["sudo", "swapoff", "-a"], check=True, capture_output=True, text=True)
7
+ return True
8
+ except subprocess.CalledProcessError as e:
9
+ print(f"⚠️ swapoff failed: {e.stderr.strip()}", file=sys.stderr)
10
+ return False
11
+
12
+ def swap_on() -> bool:
13
+ try:
14
+ subprocess.run(["sudo", "swapon", "-a"], check=True, capture_output=True, text=True)
15
+ return True
16
+ except subprocess.CalledProcessError as e:
17
+ print(f"⚠️ swapon failed: {e.stderr.strip()}", file=sys.stderr)
18
+ return False
19
+
20
+ def swap_status() -> str:
21
+ try:
22
+ r = subprocess.run(["swapon", "--show"], capture_output=True, text=True)
23
+ return r.stdout.strip() or "Swap is off"
24
+ except Exception:
25
+ return "Unknown"
26
+
27
+ def set_governor(governor: str = "performance") -> bool:
28
+ try:
29
+ subprocess.run(
30
+ ["sudo", "sh", "-c",
31
+ f"echo {governor} | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"],
32
+ check=True, capture_output=True, text=True,
33
+ )
34
+ return True
35
+ except subprocess.CalledProcessError:
36
+ return False
37
+
38
+ def enable_hugepages(mode: str = "madvise") -> bool:
39
+ try:
40
+ subprocess.run(
41
+ ["sudo", "sh", "-c",
42
+ f"echo {mode} | tee /sys/kernel/mm/transparent_hugepage/enabled"],
43
+ check=True, capture_output=True, text=True,
44
+ )
45
+ return True
46
+ except subprocess.CalledProcessError:
47
+ return False