augint-shell 0.77.0__tar.gz → 0.78.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {augint_shell-0.77.0 → augint_shell-0.78.0}/PKG-INFO +3 -3
- {augint_shell-0.77.0 → augint_shell-0.78.0}/README.md +2 -2
- {augint_shell-0.77.0 → augint_shell-0.78.0}/pyproject.toml +1 -1
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/__init__.py +1 -1
- augint_shell-0.78.0/src/ai_shell/cli/commands/llm.py +590 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/tools.py +2 -4
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/config.py +15 -5
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/container.py +111 -28
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/defaults.py +10 -4
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/ai-shell.yaml +5 -3
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/tmux.py +6 -2
- augint_shell-0.77.0/src/ai_shell/cli/commands/llm.py +0 -310
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/__main__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/manage.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/exceptions.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/gpu.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/interactive.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/local_chrome.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/scaffold.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/selector.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/ai-shell.toml +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/aider/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/claude/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/claude/settings.json +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/codex/__init__.py +0 -0
- {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/opencode/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: augint-shell
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.78.0
|
|
4
4
|
Summary: Launch AI coding tools and local LLMs in Docker containers
|
|
5
5
|
Author: svange
|
|
6
6
|
Requires-Dist: docker>=7.0.0
|
|
@@ -76,7 +76,7 @@ ai-shell opencode
|
|
|
76
76
|
|
|
77
77
|
| Command | Description |
|
|
78
78
|
|---|---|
|
|
79
|
-
| `ai-shell llm up` | Start Ollama
|
|
79
|
+
| `ai-shell llm up` | Start Ollama (add `--webui`, `--n8n`, or `--all` for optional stacks) |
|
|
80
80
|
| `ai-shell llm down` | Stop LLM stack |
|
|
81
81
|
| `ai-shell llm pull` | Pull configured models |
|
|
82
82
|
| `ai-shell llm setup` | First-time setup (up + pull + configure) |
|
|
@@ -105,7 +105,7 @@ image_tag = "latest"
|
|
|
105
105
|
extra_env = { MY_VAR = "value" }
|
|
106
106
|
|
|
107
107
|
[llm]
|
|
108
|
-
primary_model = "qwen3-coder:
|
|
108
|
+
primary_model = "qwen3-coder:30b-a3b-q4_K_M"
|
|
109
109
|
fallback_model = "huihui_ai/llama3.3-abliterated"
|
|
110
110
|
context_size = 32768
|
|
111
111
|
ollama_port = 11434
|
|
@@ -63,7 +63,7 @@ ai-shell opencode
|
|
|
63
63
|
|
|
64
64
|
| Command | Description |
|
|
65
65
|
|---|---|
|
|
66
|
-
| `ai-shell llm up` | Start Ollama
|
|
66
|
+
| `ai-shell llm up` | Start Ollama (add `--webui`, `--n8n`, or `--all` for optional stacks) |
|
|
67
67
|
| `ai-shell llm down` | Stop LLM stack |
|
|
68
68
|
| `ai-shell llm pull` | Pull configured models |
|
|
69
69
|
| `ai-shell llm setup` | First-time setup (up + pull + configure) |
|
|
@@ -92,7 +92,7 @@ image_tag = "latest"
|
|
|
92
92
|
extra_env = { MY_VAR = "value" }
|
|
93
93
|
|
|
94
94
|
[llm]
|
|
95
|
-
primary_model = "qwen3-coder:
|
|
95
|
+
primary_model = "qwen3-coder:30b-a3b-q4_K_M"
|
|
96
96
|
fallback_model = "huihui_ai/llama3.3-abliterated"
|
|
97
97
|
context_size = 32768
|
|
98
98
|
ollama_port = 11434
|
|
@@ -0,0 +1,590 @@
|
|
|
1
|
+
"""LLM stack management commands: up, down, pull, setup, status, logs, shell.
|
|
2
|
+
|
|
3
|
+
Stack flags (applied to up/down/clean/setup):
|
|
4
|
+
--webui Open WebUI (OpenAI-style chat UI backed by Ollama). Kokoro
|
|
5
|
+
TTS starts with it by default (wired as WebUI's "read aloud"
|
|
6
|
+
backend); use --no-voice to skip.
|
|
7
|
+
--voice Kokoro-FastAPI (local OpenAI-compatible TTS) standalone.
|
|
8
|
+
--no-voice Opt-out: skip Kokoro even when --webui is set.
|
|
9
|
+
--n8n n8n workflow automation engine (standalone).
|
|
10
|
+
--all Enable every optional stack.
|
|
11
|
+
|
|
12
|
+
``llm up`` with no flags starts only the base Ollama container.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import socket
|
|
16
|
+
import time
|
|
17
|
+
from http.client import HTTPException, HTTPSConnection
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import click
|
|
21
|
+
from rich.console import Console
|
|
22
|
+
|
|
23
|
+
from ai_shell.cli import CONTEXT_SETTINGS
|
|
24
|
+
from ai_shell.config import load_config
|
|
25
|
+
from ai_shell.container import ContainerManager
|
|
26
|
+
from ai_shell.defaults import (
|
|
27
|
+
KOKORO_CONTAINER,
|
|
28
|
+
N8N_CONTAINER,
|
|
29
|
+
N8N_DATA_VOLUME,
|
|
30
|
+
OLLAMA_CONTAINER,
|
|
31
|
+
OLLAMA_DATA_VOLUME,
|
|
32
|
+
WEBUI_CONTAINER,
|
|
33
|
+
WEBUI_DATA_VOLUME,
|
|
34
|
+
)
|
|
35
|
+
from ai_shell.gpu import get_vram_info, get_vram_processes
|
|
36
|
+
|
|
37
|
+
console = Console(stderr=True)
|
|
38
|
+
|
|
39
|
+
_LOW_MEMORY_THRESHOLD_GIB = 30 # 27B+ models need ~30 GiB
|
|
40
|
+
_OLLAMA_REGISTRY_HOST = "registry.ollama.ai"
|
|
41
|
+
_MANIFEST_PROBE_TIMEOUT = 5.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _parse_model_ref(ref: str) -> tuple[str, str, str]:
|
|
45
|
+
"""Parse an Ollama model reference into (namespace, name, tag).
|
|
46
|
+
|
|
47
|
+
- "foo" -> ("library", "foo", "latest")
|
|
48
|
+
- "foo:tag" -> ("library", "foo", "tag")
|
|
49
|
+
- "ns/foo" -> ("ns", "foo", "latest")
|
|
50
|
+
- "ns/foo:tag" -> ("ns", "foo", "tag")
|
|
51
|
+
"""
|
|
52
|
+
tag = "latest"
|
|
53
|
+
if ":" in ref:
|
|
54
|
+
ref, tag = ref.rsplit(":", 1)
|
|
55
|
+
if "/" in ref:
|
|
56
|
+
namespace, name = ref.split("/", 1)
|
|
57
|
+
else:
|
|
58
|
+
namespace, name = "library", ref
|
|
59
|
+
return namespace, name, tag
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _manifest_exists(model_ref: str) -> bool | None:
|
|
63
|
+
"""Probe the Ollama registry for a model manifest.
|
|
64
|
+
|
|
65
|
+
Returns True if the manifest exists (HTTP 200), False if it
|
|
66
|
+
definitively does not (HTTP 404), or None if the check could not
|
|
67
|
+
be completed (network error, unexpected status). Callers should
|
|
68
|
+
treat None as "don't block" so an unreachable registry never
|
|
69
|
+
prevents a pull that might succeed from a local mirror.
|
|
70
|
+
"""
|
|
71
|
+
namespace, name, tag = _parse_model_ref(model_ref)
|
|
72
|
+
path = f"/v2/{namespace}/{name}/manifests/{tag}"
|
|
73
|
+
connection = HTTPSConnection(_OLLAMA_REGISTRY_HOST, timeout=_MANIFEST_PROBE_TIMEOUT)
|
|
74
|
+
try:
|
|
75
|
+
connection.request(
|
|
76
|
+
"HEAD",
|
|
77
|
+
path,
|
|
78
|
+
headers={"Accept": "application/vnd.docker.distribution.manifest.v2+json"},
|
|
79
|
+
)
|
|
80
|
+
response = connection.getresponse()
|
|
81
|
+
response.read() # drain so the connection is reusable / cleanly closed
|
|
82
|
+
if response.status == 200:
|
|
83
|
+
return True
|
|
84
|
+
if response.status == 404:
|
|
85
|
+
return False
|
|
86
|
+
return None
|
|
87
|
+
except (OSError, HTTPException):
|
|
88
|
+
return None
|
|
89
|
+
finally:
|
|
90
|
+
connection.close()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _tag_list_url(model_ref: str) -> str:
|
|
94
|
+
"""Return the ollama.com tag list URL for a model reference."""
|
|
95
|
+
namespace, name, _ = _parse_model_ref(model_ref)
|
|
96
|
+
if namespace == "library":
|
|
97
|
+
return f"https://ollama.com/library/{name}/tags"
|
|
98
|
+
return f"https://ollama.com/{namespace}/{name}/tags"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _validate_models_or_abort(*model_refs: str) -> None:
|
|
102
|
+
"""Fail fast if any referenced model tag is missing from the registry.
|
|
103
|
+
|
|
104
|
+
Definite 404s abort with a message pointing at the tag list page.
|
|
105
|
+
Network / unexpected errors are ignored so the check never blocks
|
|
106
|
+
a pull when the registry is simply unreachable (offline use, local
|
|
107
|
+
mirror, transient DNS issue, etc.).
|
|
108
|
+
"""
|
|
109
|
+
missing: list[str] = []
|
|
110
|
+
for ref in model_refs:
|
|
111
|
+
if _manifest_exists(ref) is False:
|
|
112
|
+
missing.append(ref)
|
|
113
|
+
if not missing:
|
|
114
|
+
return
|
|
115
|
+
console.print(
|
|
116
|
+
"[bold red]Error:[/bold red] the following model tag(s) were not found "
|
|
117
|
+
"on the Ollama registry:"
|
|
118
|
+
)
|
|
119
|
+
for ref in missing:
|
|
120
|
+
console.print(f" - [cyan]{ref}[/cyan] (tags: {_tag_list_url(ref)})")
|
|
121
|
+
console.print(
|
|
122
|
+
"\nUpdate [bold]primary_model[/bold] / [bold]fallback_model[/bold] in "
|
|
123
|
+
"your ai-shell config to a valid tag and retry."
|
|
124
|
+
)
|
|
125
|
+
raise click.Abort()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _lan_ip() -> str | None:
|
|
129
|
+
"""Return the host's primary LAN IPv4 address, or None if undetectable.
|
|
130
|
+
|
|
131
|
+
Uses a UDP socket's routing-table selection without actually sending
|
|
132
|
+
traffic. Works on Linux, Mac, and WSL2. On WSL2 this returns the
|
|
133
|
+
WSL VM's eth0 address (typically 172.x.x.x), which is reachable from
|
|
134
|
+
the Windows host but not the broader LAN unless WSL mirrored mode or
|
|
135
|
+
a Windows portproxy is configured.
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
|
139
|
+
s.connect(("8.8.8.8", 80))
|
|
140
|
+
ip = str(s.getsockname()[0])
|
|
141
|
+
except OSError:
|
|
142
|
+
return None
|
|
143
|
+
if ip.startswith("127."):
|
|
144
|
+
return None
|
|
145
|
+
return ip
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _warn_if_low_memory() -> None:
|
|
149
|
+
"""Check system memory and warn if it may be insufficient for large models."""
|
|
150
|
+
try:
|
|
151
|
+
meminfo = Path("/proc/meminfo").read_text()
|
|
152
|
+
except OSError:
|
|
153
|
+
return # Not on Linux, skip silently
|
|
154
|
+
|
|
155
|
+
mem_total_gib = 0.0
|
|
156
|
+
swap_total_gib = 0.0
|
|
157
|
+
for line in meminfo.splitlines():
|
|
158
|
+
if line.startswith("MemTotal:"):
|
|
159
|
+
mem_total_gib = int(line.split()[1]) / (1024 * 1024)
|
|
160
|
+
elif line.startswith("SwapTotal:"):
|
|
161
|
+
swap_total_gib = int(line.split()[1]) / (1024 * 1024)
|
|
162
|
+
|
|
163
|
+
total_gib = mem_total_gib + swap_total_gib
|
|
164
|
+
if total_gib < _LOW_MEMORY_THRESHOLD_GIB:
|
|
165
|
+
console.print(
|
|
166
|
+
f"\n[yellow bold]Warning:[/yellow bold] System has "
|
|
167
|
+
f"{mem_total_gib:.1f} GiB RAM + {swap_total_gib:.1f} GiB swap "
|
|
168
|
+
f"= {total_gib:.1f} GiB total."
|
|
169
|
+
)
|
|
170
|
+
console.print(
|
|
171
|
+
"[yellow]Large models (27B+) need ~30 GiB. "
|
|
172
|
+
"To increase, edit [bold]%UserProfile%\\.wslconfig[/bold] on Windows:[/yellow]"
|
|
173
|
+
)
|
|
174
|
+
console.print("[yellow] [wsl2][/yellow]")
|
|
175
|
+
console.print("[yellow] memory=32GB[/yellow]")
|
|
176
|
+
console.print("[yellow] swap=32GB[/yellow]")
|
|
177
|
+
console.print("[yellow]Then run: [bold]wsl --shutdown[/bold]\n[/yellow]")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _get_manager(ctx) -> ContainerManager:
|
|
181
|
+
"""Create ContainerManager from Click context."""
|
|
182
|
+
project = ctx.obj.get("project") if ctx.obj else None
|
|
183
|
+
config = load_config(project_override=project, project_dir=Path.cwd())
|
|
184
|
+
return ContainerManager(config)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _resolve_stacks(
|
|
188
|
+
webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool
|
|
189
|
+
) -> tuple[bool, bool, bool]:
|
|
190
|
+
"""Resolve stack flags into concrete (webui, voice, n8n) enablement.
|
|
191
|
+
|
|
192
|
+
Rules:
|
|
193
|
+
- ``--all`` turns on every optional stack.
|
|
194
|
+
- ``--webui`` implies ``--voice`` (Kokoro is wired as WebUI's TTS backend).
|
|
195
|
+
- ``--no-voice`` is the opt-out and always wins.
|
|
196
|
+
- ``--n8n`` is standalone with no implied sibling stacks.
|
|
197
|
+
|
|
198
|
+
Extension pattern: when we add ``--libre`` / ``--dify`` / ``--hands``,
|
|
199
|
+
they become additional parameters here with the same ``all_`` expansion.
|
|
200
|
+
"""
|
|
201
|
+
if all_:
|
|
202
|
+
webui = True
|
|
203
|
+
voice = True
|
|
204
|
+
n8n = True
|
|
205
|
+
if webui:
|
|
206
|
+
voice = True
|
|
207
|
+
if no_voice:
|
|
208
|
+
voice = False
|
|
209
|
+
return webui, voice, n8n
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# Shared decorators for stack flags on up/down/clean/setup.
|
|
213
|
+
def _stack_flags(func):
|
|
214
|
+
func = click.option("--all", "all_", is_flag=True, help="Enable every optional stack.")(func)
|
|
215
|
+
func = click.option("--n8n", is_flag=True, help="n8n workflow automation engine (port 5678).")(
|
|
216
|
+
func
|
|
217
|
+
)
|
|
218
|
+
func = click.option(
|
|
219
|
+
"--no-voice",
|
|
220
|
+
"no_voice",
|
|
221
|
+
is_flag=True,
|
|
222
|
+
help="Skip Kokoro TTS even when --webui is set.",
|
|
223
|
+
)(func)
|
|
224
|
+
func = click.option(
|
|
225
|
+
"--voice",
|
|
226
|
+
is_flag=True,
|
|
227
|
+
help="Kokoro local TTS (OpenAI-compatible, port 8880). Implied by --webui.",
|
|
228
|
+
)(func)
|
|
229
|
+
func = click.option(
|
|
230
|
+
"--webui", is_flag=True, help="Open WebUI (Kokoro TTS wired automatically)."
|
|
231
|
+
)(func)
|
|
232
|
+
return func
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@click.group("llm", context_settings=CONTEXT_SETTINGS)
|
|
236
|
+
@click.pass_context
|
|
237
|
+
def llm_group(ctx):
|
|
238
|
+
"""Manage the local LLM stack (Ollama + optional Open WebUI / TTS)."""
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@llm_group.command("up")
|
|
242
|
+
@_stack_flags
|
|
243
|
+
@click.pass_context
|
|
244
|
+
def llm_up(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
|
|
245
|
+
"""Start the LLM stack.
|
|
246
|
+
|
|
247
|
+
With no flags, starts only Ollama. ``--webui`` brings up Open WebUI and
|
|
248
|
+
(by default) wires Kokoro TTS as its "read aloud" backend; pass
|
|
249
|
+
``--no-voice`` to skip TTS. ``--voice`` alone runs Kokoro standalone.
|
|
250
|
+
``--n8n`` brings up n8n workflow automation.
|
|
251
|
+
"""
|
|
252
|
+
webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
|
|
253
|
+
manager = _get_manager(ctx)
|
|
254
|
+
config = manager.config
|
|
255
|
+
console.print("[bold]Starting LLM stack...[/bold]")
|
|
256
|
+
_warn_if_low_memory()
|
|
257
|
+
|
|
258
|
+
manager.ensure_ollama()
|
|
259
|
+
console.print(f" Ollama API: http://localhost:{config.ollama_port}")
|
|
260
|
+
|
|
261
|
+
if voice:
|
|
262
|
+
manager.ensure_kokoro()
|
|
263
|
+
console.print(f" Kokoro TTS: http://localhost:{config.kokoro_port}/v1")
|
|
264
|
+
|
|
265
|
+
if webui:
|
|
266
|
+
manager.ensure_webui(voice_enabled=voice)
|
|
267
|
+
console.print(f" Open WebUI: http://localhost:{config.webui_port}")
|
|
268
|
+
|
|
269
|
+
if n8n:
|
|
270
|
+
manager.ensure_n8n()
|
|
271
|
+
console.print(f" n8n: http://localhost:{config.n8n_port}")
|
|
272
|
+
|
|
273
|
+
lan = _lan_ip()
|
|
274
|
+
if lan:
|
|
275
|
+
console.print("\n[bold]LAN access[/bold] (bound to 0.0.0.0):")
|
|
276
|
+
console.print(f" Ollama API: http://{lan}:{config.ollama_port}")
|
|
277
|
+
if voice:
|
|
278
|
+
console.print(f" Kokoro TTS: http://{lan}:{config.kokoro_port}/v1")
|
|
279
|
+
if webui:
|
|
280
|
+
console.print(f" Open WebUI: http://{lan}:{config.webui_port}")
|
|
281
|
+
if n8n:
|
|
282
|
+
console.print(f" n8n: http://{lan}:{config.n8n_port}")
|
|
283
|
+
|
|
284
|
+
console.print("\n[bold green]LLM stack is running.[/bold green]")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@llm_group.command("down")
|
|
288
|
+
@_stack_flags
|
|
289
|
+
@click.pass_context
|
|
290
|
+
def llm_down(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
|
|
291
|
+
"""Stop containers in the LLM stack.
|
|
292
|
+
|
|
293
|
+
With no flags, stops only Ollama. Use stack flags or --all to stop
|
|
294
|
+
additional stacks.
|
|
295
|
+
"""
|
|
296
|
+
webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
|
|
297
|
+
manager = _get_manager(ctx)
|
|
298
|
+
console.print("[bold]Stopping LLM stack...[/bold]")
|
|
299
|
+
|
|
300
|
+
targets = [OLLAMA_CONTAINER]
|
|
301
|
+
if webui:
|
|
302
|
+
targets.append(WEBUI_CONTAINER)
|
|
303
|
+
if voice:
|
|
304
|
+
targets.append(KOKORO_CONTAINER)
|
|
305
|
+
if n8n:
|
|
306
|
+
targets.append(N8N_CONTAINER)
|
|
307
|
+
|
|
308
|
+
for name in targets:
|
|
309
|
+
status = manager.container_status(name)
|
|
310
|
+
if status == "running":
|
|
311
|
+
manager.stop_container(name)
|
|
312
|
+
console.print(f" Stopped: {name}")
|
|
313
|
+
elif status is not None:
|
|
314
|
+
console.print(f" Already stopped: {name}")
|
|
315
|
+
else:
|
|
316
|
+
console.print(f" Not found: {name}")
|
|
317
|
+
|
|
318
|
+
console.print("[bold green]LLM stack stopped.[/bold green]")
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@llm_group.command("clean")
|
|
322
|
+
@_stack_flags
|
|
323
|
+
@click.option(
|
|
324
|
+
"--wipe",
|
|
325
|
+
is_flag=True,
|
|
326
|
+
help="Also wipe persistent data (models, chat history). Irreversible.",
|
|
327
|
+
)
|
|
328
|
+
@click.option("--yes", "-y", "assume_yes", is_flag=True, help="Skip the confirmation prompt.")
|
|
329
|
+
@click.pass_context
|
|
330
|
+
def llm_clean(
|
|
331
|
+
ctx,
|
|
332
|
+
webui: bool,
|
|
333
|
+
voice: bool,
|
|
334
|
+
no_voice: bool,
|
|
335
|
+
n8n: bool,
|
|
336
|
+
all_: bool,
|
|
337
|
+
wipe: bool,
|
|
338
|
+
assume_yes: bool,
|
|
339
|
+
):
|
|
340
|
+
"""Remove LLM containers and (with --wipe) persistent data.
|
|
341
|
+
|
|
342
|
+
With no stack flags, removes the base Ollama container only. Use stack
|
|
343
|
+
flags or --all to also remove other stacks. --wipe additionally deletes
|
|
344
|
+
named Docker volumes.
|
|
345
|
+
"""
|
|
346
|
+
webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
|
|
347
|
+
manager = _get_manager(ctx)
|
|
348
|
+
|
|
349
|
+
targets = [OLLAMA_CONTAINER]
|
|
350
|
+
if webui:
|
|
351
|
+
targets.append(WEBUI_CONTAINER)
|
|
352
|
+
if voice:
|
|
353
|
+
targets.append(KOKORO_CONTAINER)
|
|
354
|
+
if n8n:
|
|
355
|
+
targets.append(N8N_CONTAINER)
|
|
356
|
+
|
|
357
|
+
volumes: list[str] = []
|
|
358
|
+
if wipe:
|
|
359
|
+
volumes.append(OLLAMA_DATA_VOLUME)
|
|
360
|
+
if webui:
|
|
361
|
+
volumes.append(WEBUI_DATA_VOLUME)
|
|
362
|
+
if n8n:
|
|
363
|
+
volumes.append(N8N_DATA_VOLUME)
|
|
364
|
+
|
|
365
|
+
if not assume_yes:
|
|
366
|
+
if wipe:
|
|
367
|
+
scope = "containers + volumes (models and chat history will be deleted)"
|
|
368
|
+
else:
|
|
369
|
+
scope = "containers only (data preserved)"
|
|
370
|
+
console.print(f"[bold]About to remove:[/bold] {scope}")
|
|
371
|
+
if not click.confirm("Continue?", default=False):
|
|
372
|
+
console.print("Aborted.")
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
console.print("[bold]Cleaning LLM stack...[/bold]")
|
|
376
|
+
for name in targets:
|
|
377
|
+
if manager.container_status(name) is None:
|
|
378
|
+
console.print(f" Not found: {name}")
|
|
379
|
+
continue
|
|
380
|
+
manager.remove_container(name)
|
|
381
|
+
console.print(f" Removed: {name}")
|
|
382
|
+
|
|
383
|
+
if wipe:
|
|
384
|
+
for volume in volumes:
|
|
385
|
+
if manager.remove_volume(volume):
|
|
386
|
+
console.print(f" Removed volume: {volume}")
|
|
387
|
+
else:
|
|
388
|
+
console.print(f" Volume not found: {volume}")
|
|
389
|
+
|
|
390
|
+
console.print("[bold green]LLM stack cleaned.[/bold green]")
|
|
391
|
+
console.print("Run [bold]ai-shell llm up[/bold] to recreate containers.")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@llm_group.command("pull")
|
|
395
|
+
@click.pass_context
|
|
396
|
+
def llm_pull(ctx):
|
|
397
|
+
"""Pull LLM models into Ollama."""
|
|
398
|
+
manager = _get_manager(ctx)
|
|
399
|
+
config = manager.config
|
|
400
|
+
|
|
401
|
+
_validate_models_or_abort(config.primary_model, config.fallback_model)
|
|
402
|
+
|
|
403
|
+
console.print(f"[bold]Pulling primary model: {config.primary_model}...[/bold]")
|
|
404
|
+
output = manager.exec_in_ollama(["ollama", "pull", config.primary_model])
|
|
405
|
+
console.print(output)
|
|
406
|
+
|
|
407
|
+
console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
|
|
408
|
+
output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
|
|
409
|
+
console.print(output)
|
|
410
|
+
|
|
411
|
+
console.print("\n[bold]Available models:[/bold]")
|
|
412
|
+
output = manager.exec_in_ollama(["ollama", "list"])
|
|
413
|
+
console.print(output)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@llm_group.command("setup")
|
|
417
|
+
@_stack_flags
|
|
418
|
+
@click.pass_context
|
|
419
|
+
def llm_setup(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
|
|
420
|
+
"""First-time setup: start stack, pull models, configure context.
|
|
421
|
+
|
|
422
|
+
Accepts the same stack flags as ``llm up``. With no flags, sets up only
|
|
423
|
+
the base Ollama container and pulls the configured primary/fallback models.
|
|
424
|
+
"""
|
|
425
|
+
webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
|
|
426
|
+
manager = _get_manager(ctx)
|
|
427
|
+
config = manager.config
|
|
428
|
+
|
|
429
|
+
_validate_models_or_abort(config.primary_model, config.fallback_model)
|
|
430
|
+
|
|
431
|
+
console.print("[bold]Starting LLM stack...[/bold]")
|
|
432
|
+
_warn_if_low_memory()
|
|
433
|
+
manager.ensure_ollama()
|
|
434
|
+
if voice:
|
|
435
|
+
manager.ensure_kokoro()
|
|
436
|
+
if webui:
|
|
437
|
+
manager.ensure_webui(voice_enabled=voice)
|
|
438
|
+
if n8n:
|
|
439
|
+
manager.ensure_n8n()
|
|
440
|
+
|
|
441
|
+
console.print("[bold]Waiting for Ollama to be ready...[/bold]")
|
|
442
|
+
for i in range(10):
|
|
443
|
+
try:
|
|
444
|
+
output = manager.exec_in_ollama(["ollama", "list"])
|
|
445
|
+
if output is not None:
|
|
446
|
+
break
|
|
447
|
+
except Exception:
|
|
448
|
+
pass
|
|
449
|
+
console.print(f" Waiting... ({i + 1}/10)")
|
|
450
|
+
time.sleep(2)
|
|
451
|
+
else:
|
|
452
|
+
console.print("[bold red]Ollama failed to start after 20s[/bold red]")
|
|
453
|
+
raise click.Abort()
|
|
454
|
+
|
|
455
|
+
console.print(f"\n[bold]Pulling primary model: {config.primary_model}...[/bold]")
|
|
456
|
+
output = manager.exec_in_ollama(["ollama", "pull", config.primary_model])
|
|
457
|
+
console.print(output)
|
|
458
|
+
|
|
459
|
+
console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
|
|
460
|
+
output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
|
|
461
|
+
console.print(output)
|
|
462
|
+
|
|
463
|
+
console.print("\n[bold green]============================================[/bold green]")
|
|
464
|
+
console.print("[bold green] Setup complete![/bold green]")
|
|
465
|
+
console.print(f" Ollama API: http://localhost:{config.ollama_port}")
|
|
466
|
+
if voice:
|
|
467
|
+
console.print(f" Kokoro TTS: http://localhost:{config.kokoro_port}/v1")
|
|
468
|
+
if webui:
|
|
469
|
+
console.print(f" Open WebUI: http://localhost:{config.webui_port}")
|
|
470
|
+
if n8n:
|
|
471
|
+
console.print(f" n8n: http://localhost:{config.n8n_port}")
|
|
472
|
+
console.print(f"\n Primary model: {config.primary_model}")
|
|
473
|
+
console.print(f" Fallback model: {config.fallback_model}")
|
|
474
|
+
console.print(f" Context window: {config.context_size} tokens")
|
|
475
|
+
console.print("[bold green]============================================[/bold green]")
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _render_container_row(manager: ContainerManager, name: str, label: str) -> None:
|
|
479
|
+
"""Print one row of the `llm status` grid, colored by runtime state."""
|
|
480
|
+
status = manager.container_status(name)
|
|
481
|
+
if status == "running":
|
|
482
|
+
console.print(f" [green]{label:<20}[/green] [green]running[/green] [dim]({name})[/dim]")
|
|
483
|
+
elif status is not None:
|
|
484
|
+
console.print(
|
|
485
|
+
f" [yellow]{label:<20}[/yellow] [yellow]{status}[/yellow] [dim]({name})[/dim]"
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
console.print(f" [dim]{label:<20} absent ({name})[/dim]")
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
@llm_group.command("status")
|
|
492
|
+
@click.pass_context
|
|
493
|
+
def llm_status(ctx):
|
|
494
|
+
"""Show status of all known LLM containers, URLs, and loaded models."""
|
|
495
|
+
manager = _get_manager(ctx)
|
|
496
|
+
config = manager.config
|
|
497
|
+
|
|
498
|
+
console.print("[bold]Base stack[/bold]")
|
|
499
|
+
_render_container_row(manager, OLLAMA_CONTAINER, "Ollama")
|
|
500
|
+
|
|
501
|
+
console.print("\n[bold]WebUI stack[/bold]")
|
|
502
|
+
_render_container_row(manager, WEBUI_CONTAINER, "Open WebUI")
|
|
503
|
+
|
|
504
|
+
console.print("\n[bold]Voice stack[/bold]")
|
|
505
|
+
_render_container_row(manager, KOKORO_CONTAINER, "Kokoro TTS")
|
|
506
|
+
|
|
507
|
+
console.print("\n[bold]n8n stack[/bold]")
|
|
508
|
+
_render_container_row(manager, N8N_CONTAINER, "n8n")
|
|
509
|
+
|
|
510
|
+
console.print("\n[bold]Access URLs:[/bold]")
|
|
511
|
+
|
|
512
|
+
def _url(label: str, name: str, url: str) -> None:
|
|
513
|
+
running = manager.container_status(name) == "running"
|
|
514
|
+
color = "cyan" if running else "dim"
|
|
515
|
+
suffix = "" if running else " (not running)"
|
|
516
|
+
console.print(f" {label:<18} [{color}]{url}[/{color}]{suffix}")
|
|
517
|
+
|
|
518
|
+
_url("Ollama API:", OLLAMA_CONTAINER, f"http://localhost:{config.ollama_port}")
|
|
519
|
+
_url(" OpenAI-compat:", OLLAMA_CONTAINER, f"http://localhost:{config.ollama_port}/v1")
|
|
520
|
+
_url("Open WebUI:", WEBUI_CONTAINER, f"http://localhost:{config.webui_port}")
|
|
521
|
+
_url("Kokoro TTS:", KOKORO_CONTAINER, f"http://localhost:{config.kokoro_port}/v1")
|
|
522
|
+
_url("n8n:", N8N_CONTAINER, f"http://localhost:{config.n8n_port}")
|
|
523
|
+
|
|
524
|
+
lan = _lan_ip()
|
|
525
|
+
if lan:
|
|
526
|
+
console.print("\n[bold]LAN access[/bold] (bound to 0.0.0.0):")
|
|
527
|
+
console.print(f" Ollama API: http://{lan}:{config.ollama_port}")
|
|
528
|
+
console.print(f" Open WebUI: http://{lan}:{config.webui_port}")
|
|
529
|
+
console.print(f" Kokoro TTS: http://{lan}:{config.kokoro_port}/v1")
|
|
530
|
+
console.print(f" n8n: http://{lan}:{config.n8n_port}")
|
|
531
|
+
|
|
532
|
+
console.print("\n[bold]Configuration:[/bold]")
|
|
533
|
+
console.print(f" Primary model: {config.primary_model}")
|
|
534
|
+
console.print(f" Fallback model: {config.fallback_model}")
|
|
535
|
+
console.print(f" Context window: {config.context_size} tokens")
|
|
536
|
+
|
|
537
|
+
vram = get_vram_info()
|
|
538
|
+
if vram is not None:
|
|
539
|
+
console.print("\n[bold]GPU VRAM:[/bold]")
|
|
540
|
+
console.print(
|
|
541
|
+
f" Total: {vram['total'] / 1024**3:.1f} GiB "
|
|
542
|
+
f"Used: {vram['used'] / 1024**3:.1f} GiB "
|
|
543
|
+
f"Free: {vram['free'] / 1024**3:.1f} GiB"
|
|
544
|
+
)
|
|
545
|
+
processes = get_vram_processes()
|
|
546
|
+
console.print("\n [bold]VRAM consumers:[/bold]")
|
|
547
|
+
if processes:
|
|
548
|
+
for pid, vram_mb, name in sorted(processes, key=lambda x: x[1], reverse=True):
|
|
549
|
+
console.print(f" PID {pid:<8} {name:<20} {vram_mb / 1024:.1f} GiB")
|
|
550
|
+
else:
|
|
551
|
+
console.print(" (none)")
|
|
552
|
+
|
|
553
|
+
if manager.container_status(OLLAMA_CONTAINER) == "running":
|
|
554
|
+
console.print("\n[bold]Available models:[/bold]")
|
|
555
|
+
output = manager.exec_in_ollama(["ollama", "list"])
|
|
556
|
+
console.print(output)
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
@llm_group.command("logs")
|
|
560
|
+
@click.option("--follow", "-f", is_flag=True, help="Follow log output.")
|
|
561
|
+
@click.pass_context
|
|
562
|
+
def llm_logs(ctx, follow):
|
|
563
|
+
"""Tail logs from the LLM stack."""
|
|
564
|
+
manager = _get_manager(ctx)
|
|
565
|
+
if follow:
|
|
566
|
+
manager.container_logs(OLLAMA_CONTAINER, follow=True)
|
|
567
|
+
else:
|
|
568
|
+
for name in [
|
|
569
|
+
OLLAMA_CONTAINER,
|
|
570
|
+
WEBUI_CONTAINER,
|
|
571
|
+
KOKORO_CONTAINER,
|
|
572
|
+
N8N_CONTAINER,
|
|
573
|
+
]:
|
|
574
|
+
status = manager.container_status(name)
|
|
575
|
+
if status is not None:
|
|
576
|
+
console.print(f"\n[bold]--- {name} ---[/bold]")
|
|
577
|
+
manager.container_logs(name, follow=False, tail=50)
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
@llm_group.command("shell")
|
|
581
|
+
@click.pass_context
|
|
582
|
+
def llm_shell(ctx):
|
|
583
|
+
"""Open a bash shell in the Ollama container."""
|
|
584
|
+
manager = _get_manager(ctx)
|
|
585
|
+
status = manager.container_status(OLLAMA_CONTAINER)
|
|
586
|
+
if status != "running":
|
|
587
|
+
console.print("[red]Ollama is not running. Run: ai-shell llm up[/red]")
|
|
588
|
+
raise click.Abort()
|
|
589
|
+
console.print("[bold]Opening shell in Ollama container...[/bold]")
|
|
590
|
+
manager.exec_interactive(OLLAMA_CONTAINER, ["/bin/bash"])
|
|
@@ -37,9 +37,7 @@ def _generate_worktree_name() -> str:
|
|
|
37
37
|
def _print_tmux_quick_start() -> None:
|
|
38
38
|
"""Print a short tmux quick-start before attaching."""
|
|
39
39
|
console.print("[dim]tmux: mouse click=focus drag=resize wheel=scroll[/dim]")
|
|
40
|
-
console.print(
|
|
41
|
-
"[dim] Ctrl-b o=pane c=tab Space=layout p/n=tab z=zoom d=detach &=kill-tab[/dim]"
|
|
42
|
-
)
|
|
40
|
+
console.print("[dim] Ctrl-a o=pane c=tab Space=layout z=zoom d=detach &=kill-tab[/dim]")
|
|
43
41
|
|
|
44
42
|
|
|
45
43
|
def _setup_worktree(container_name: str, container_project_dir: str, name: str) -> str:
|
|
@@ -764,7 +762,7 @@ def _launch_multi(
|
|
|
764
762
|
|
|
765
763
|
# Check for existing tmux session before presenting the selector.
|
|
766
764
|
# The container and session might still be running from a previous
|
|
767
|
-
# invocation (e.g. after the user detached with C-
|
|
765
|
+
# invocation (e.g. after the user detached with C-a d or closed
|
|
768
766
|
# the terminal).
|
|
769
767
|
check_cmd = build_check_session_command(container_name, session_name)
|
|
770
768
|
has_session = subprocess.run(check_cmd, capture_output=True).returncode == 0
|