augint-shell 0.77.0__tar.gz → 0.78.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {augint_shell-0.77.0 → augint_shell-0.78.0}/PKG-INFO +3 -3
  2. {augint_shell-0.77.0 → augint_shell-0.78.0}/README.md +2 -2
  3. {augint_shell-0.77.0 → augint_shell-0.78.0}/pyproject.toml +1 -1
  4. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/__init__.py +1 -1
  5. augint_shell-0.78.0/src/ai_shell/cli/commands/llm.py +590 -0
  6. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/tools.py +2 -4
  7. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/config.py +15 -5
  8. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/container.py +111 -28
  9. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/defaults.py +10 -4
  10. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/ai-shell.yaml +5 -3
  11. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/tmux.py +6 -2
  12. augint_shell-0.77.0/src/ai_shell/cli/commands/llm.py +0 -310
  13. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/__init__.py +0 -0
  14. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/__main__.py +0 -0
  15. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/__init__.py +0 -0
  16. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/cli/commands/manage.py +0 -0
  17. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/exceptions.py +0 -0
  18. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/gpu.py +0 -0
  19. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/interactive.py +0 -0
  20. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/local_chrome.py +0 -0
  21. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/scaffold.py +0 -0
  22. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/selector.py +0 -0
  23. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/__init__.py +0 -0
  24. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/ai-shell.toml +0 -0
  25. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/aider/__init__.py +0 -0
  26. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/claude/__init__.py +0 -0
  27. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/claude/settings.json +0 -0
  28. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/codex/__init__.py +0 -0
  29. {augint_shell-0.77.0 → augint_shell-0.78.0}/src/ai_shell/templates/opencode/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: augint-shell
3
- Version: 0.77.0
3
+ Version: 0.78.0
4
4
  Summary: Launch AI coding tools and local LLMs in Docker containers
5
5
  Author: svange
6
6
  Requires-Dist: docker>=7.0.0
@@ -76,7 +76,7 @@ ai-shell opencode
76
76
 
77
77
  | Command | Description |
78
78
  |---|---|
79
- | `ai-shell llm up` | Start Ollama + Open WebUI |
79
+ | `ai-shell llm up` | Start Ollama (add `--webui`, `--n8n`, or `--all` for optional stacks) |
80
80
  | `ai-shell llm down` | Stop LLM stack |
81
81
  | `ai-shell llm pull` | Pull configured models |
82
82
  | `ai-shell llm setup` | First-time setup (up + pull + configure) |
@@ -105,7 +105,7 @@ image_tag = "latest"
105
105
  extra_env = { MY_VAR = "value" }
106
106
 
107
107
  [llm]
108
- primary_model = "qwen3-coder:32b-a3b-q4_K_M"
108
+ primary_model = "qwen3-coder:30b-a3b-q4_K_M"
109
109
  fallback_model = "huihui_ai/llama3.3-abliterated"
110
110
  context_size = 32768
111
111
  ollama_port = 11434
@@ -63,7 +63,7 @@ ai-shell opencode
63
63
 
64
64
  | Command | Description |
65
65
  |---|---|
66
- | `ai-shell llm up` | Start Ollama + Open WebUI |
66
+ | `ai-shell llm up` | Start Ollama (add `--webui`, `--n8n`, or `--all` for optional stacks) |
67
67
  | `ai-shell llm down` | Stop LLM stack |
68
68
  | `ai-shell llm pull` | Pull configured models |
69
69
  | `ai-shell llm setup` | First-time setup (up + pull + configure) |
@@ -92,7 +92,7 @@ image_tag = "latest"
92
92
  extra_env = { MY_VAR = "value" }
93
93
 
94
94
  [llm]
95
- primary_model = "qwen3-coder:32b-a3b-q4_K_M"
95
+ primary_model = "qwen3-coder:30b-a3b-q4_K_M"
96
96
  fallback_model = "huihui_ai/llama3.3-abliterated"
97
97
  context_size = 32768
98
98
  ollama_port = 11434
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "augint-shell"
3
- version = "0.77.0"
3
+ version = "0.78.0"
4
4
  description = "Launch AI coding tools and local LLMs in Docker containers"
5
5
  authors = [{name = "svange"}]
6
6
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  """augint-shell (ai-shell) - Launch AI coding tools and local LLMs in Docker containers."""
2
2
 
3
- __version__ = "0.77.0"
3
+ __version__ = "0.78.0"
4
4
 
5
5
  __all__ = [
6
6
  "__version__",
@@ -0,0 +1,590 @@
1
+ """LLM stack management commands: up, down, pull, setup, status, logs, shell.
2
+
3
+ Stack flags (applied to up/down/clean/setup):
4
+ --webui Open WebUI (OpenAI-style chat UI backed by Ollama). Kokoro
5
+ TTS starts with it by default (wired as WebUI's "read aloud"
6
+ backend); use --no-voice to skip.
7
+ --voice Kokoro-FastAPI (local OpenAI-compatible TTS) standalone.
8
+ --no-voice Opt-out: skip Kokoro even when --webui is set.
9
+ --n8n n8n workflow automation engine (standalone).
10
+ --all Enable every optional stack.
11
+
12
+ ``llm up`` with no flags starts only the base Ollama container.
13
+ """
14
+
15
+ import socket
16
+ import time
17
+ from http.client import HTTPException, HTTPSConnection
18
+ from pathlib import Path
19
+
20
+ import click
21
+ from rich.console import Console
22
+
23
+ from ai_shell.cli import CONTEXT_SETTINGS
24
+ from ai_shell.config import load_config
25
+ from ai_shell.container import ContainerManager
26
+ from ai_shell.defaults import (
27
+ KOKORO_CONTAINER,
28
+ N8N_CONTAINER,
29
+ N8N_DATA_VOLUME,
30
+ OLLAMA_CONTAINER,
31
+ OLLAMA_DATA_VOLUME,
32
+ WEBUI_CONTAINER,
33
+ WEBUI_DATA_VOLUME,
34
+ )
35
+ from ai_shell.gpu import get_vram_info, get_vram_processes
36
+
37
+ console = Console(stderr=True)
38
+
39
+ _LOW_MEMORY_THRESHOLD_GIB = 30 # 27B+ models need ~30 GiB
40
+ _OLLAMA_REGISTRY_HOST = "registry.ollama.ai"
41
+ _MANIFEST_PROBE_TIMEOUT = 5.0
42
+
43
+
44
+ def _parse_model_ref(ref: str) -> tuple[str, str, str]:
45
+ """Parse an Ollama model reference into (namespace, name, tag).
46
+
47
+ - "foo" -> ("library", "foo", "latest")
48
+ - "foo:tag" -> ("library", "foo", "tag")
49
+ - "ns/foo" -> ("ns", "foo", "latest")
50
+ - "ns/foo:tag" -> ("ns", "foo", "tag")
51
+ """
52
+ tag = "latest"
53
+ if ":" in ref:
54
+ ref, tag = ref.rsplit(":", 1)
55
+ if "/" in ref:
56
+ namespace, name = ref.split("/", 1)
57
+ else:
58
+ namespace, name = "library", ref
59
+ return namespace, name, tag
60
+
61
+
62
+ def _manifest_exists(model_ref: str) -> bool | None:
63
+ """Probe the Ollama registry for a model manifest.
64
+
65
+ Returns True if the manifest exists (HTTP 200), False if it
66
+ definitively does not (HTTP 404), or None if the check could not
67
+ be completed (network error, unexpected status). Callers should
68
+ treat None as "don't block" so an unreachable registry never
69
+ prevents a pull that might succeed from a local mirror.
70
+ """
71
+ namespace, name, tag = _parse_model_ref(model_ref)
72
+ path = f"/v2/{namespace}/{name}/manifests/{tag}"
73
+ connection = HTTPSConnection(_OLLAMA_REGISTRY_HOST, timeout=_MANIFEST_PROBE_TIMEOUT)
74
+ try:
75
+ connection.request(
76
+ "HEAD",
77
+ path,
78
+ headers={"Accept": "application/vnd.docker.distribution.manifest.v2+json"},
79
+ )
80
+ response = connection.getresponse()
81
+ response.read() # drain so the connection is reusable / cleanly closed
82
+ if response.status == 200:
83
+ return True
84
+ if response.status == 404:
85
+ return False
86
+ return None
87
+ except (OSError, HTTPException):
88
+ return None
89
+ finally:
90
+ connection.close()
91
+
92
+
93
+ def _tag_list_url(model_ref: str) -> str:
94
+ """Return the ollama.com tag list URL for a model reference."""
95
+ namespace, name, _ = _parse_model_ref(model_ref)
96
+ if namespace == "library":
97
+ return f"https://ollama.com/library/{name}/tags"
98
+ return f"https://ollama.com/{namespace}/{name}/tags"
99
+
100
+
101
+ def _validate_models_or_abort(*model_refs: str) -> None:
102
+ """Fail fast if any referenced model tag is missing from the registry.
103
+
104
+ Definite 404s abort with a message pointing at the tag list page.
105
+ Network / unexpected errors are ignored so the check never blocks
106
+ a pull when the registry is simply unreachable (offline use, local
107
+ mirror, transient DNS issue, etc.).
108
+ """
109
+ missing: list[str] = []
110
+ for ref in model_refs:
111
+ if _manifest_exists(ref) is False:
112
+ missing.append(ref)
113
+ if not missing:
114
+ return
115
+ console.print(
116
+ "[bold red]Error:[/bold red] the following model tag(s) were not found "
117
+ "on the Ollama registry:"
118
+ )
119
+ for ref in missing:
120
+ console.print(f" - [cyan]{ref}[/cyan] (tags: {_tag_list_url(ref)})")
121
+ console.print(
122
+ "\nUpdate [bold]primary_model[/bold] / [bold]fallback_model[/bold] in "
123
+ "your ai-shell config to a valid tag and retry."
124
+ )
125
+ raise click.Abort()
126
+
127
+
128
+ def _lan_ip() -> str | None:
129
+ """Return the host's primary LAN IPv4 address, or None if undetectable.
130
+
131
+ Uses a UDP socket's routing-table selection without actually sending
132
+ traffic. Works on Linux, Mac, and WSL2. On WSL2 this returns the
133
+ WSL VM's eth0 address (typically 172.x.x.x), which is reachable from
134
+ the Windows host but not the broader LAN unless WSL mirrored mode or
135
+ a Windows portproxy is configured.
136
+ """
137
+ try:
138
+ with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
139
+ s.connect(("8.8.8.8", 80))
140
+ ip = str(s.getsockname()[0])
141
+ except OSError:
142
+ return None
143
+ if ip.startswith("127."):
144
+ return None
145
+ return ip
146
+
147
+
148
+ def _warn_if_low_memory() -> None:
149
+ """Check system memory and warn if it may be insufficient for large models."""
150
+ try:
151
+ meminfo = Path("/proc/meminfo").read_text()
152
+ except OSError:
153
+ return # Not on Linux, skip silently
154
+
155
+ mem_total_gib = 0.0
156
+ swap_total_gib = 0.0
157
+ for line in meminfo.splitlines():
158
+ if line.startswith("MemTotal:"):
159
+ mem_total_gib = int(line.split()[1]) / (1024 * 1024)
160
+ elif line.startswith("SwapTotal:"):
161
+ swap_total_gib = int(line.split()[1]) / (1024 * 1024)
162
+
163
+ total_gib = mem_total_gib + swap_total_gib
164
+ if total_gib < _LOW_MEMORY_THRESHOLD_GIB:
165
+ console.print(
166
+ f"\n[yellow bold]Warning:[/yellow bold] System has "
167
+ f"{mem_total_gib:.1f} GiB RAM + {swap_total_gib:.1f} GiB swap "
168
+ f"= {total_gib:.1f} GiB total."
169
+ )
170
+ console.print(
171
+ "[yellow]Large models (27B+) need ~30 GiB. "
172
+ "To increase, edit [bold]%UserProfile%\\.wslconfig[/bold] on Windows:[/yellow]"
173
+ )
174
+ console.print("[yellow] [wsl2][/yellow]")
175
+ console.print("[yellow] memory=32GB[/yellow]")
176
+ console.print("[yellow] swap=32GB[/yellow]")
177
+ console.print("[yellow]Then run: [bold]wsl --shutdown[/bold]\n[/yellow]")
178
+
179
+
180
+ def _get_manager(ctx) -> ContainerManager:
181
+ """Create ContainerManager from Click context."""
182
+ project = ctx.obj.get("project") if ctx.obj else None
183
+ config = load_config(project_override=project, project_dir=Path.cwd())
184
+ return ContainerManager(config)
185
+
186
+
187
+ def _resolve_stacks(
188
+ webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool
189
+ ) -> tuple[bool, bool, bool]:
190
+ """Resolve stack flags into concrete (webui, voice, n8n) enablement.
191
+
192
+ Rules:
193
+ - ``--all`` turns on every optional stack.
194
+ - ``--webui`` implies ``--voice`` (Kokoro is wired as WebUI's TTS backend).
195
+ - ``--no-voice`` is the opt-out and always wins.
196
+ - ``--n8n`` is standalone with no implied sibling stacks.
197
+
198
+ Extension pattern: when we add ``--libre`` / ``--dify`` / ``--hands``,
199
+ they become additional parameters here with the same ``all_`` expansion.
200
+ """
201
+ if all_:
202
+ webui = True
203
+ voice = True
204
+ n8n = True
205
+ if webui:
206
+ voice = True
207
+ if no_voice:
208
+ voice = False
209
+ return webui, voice, n8n
210
+
211
+
212
+ # Shared decorators for stack flags on up/down/clean/setup.
213
+ def _stack_flags(func):
214
+ func = click.option("--all", "all_", is_flag=True, help="Enable every optional stack.")(func)
215
+ func = click.option("--n8n", is_flag=True, help="n8n workflow automation engine (port 5678).")(
216
+ func
217
+ )
218
+ func = click.option(
219
+ "--no-voice",
220
+ "no_voice",
221
+ is_flag=True,
222
+ help="Skip Kokoro TTS even when --webui is set.",
223
+ )(func)
224
+ func = click.option(
225
+ "--voice",
226
+ is_flag=True,
227
+ help="Kokoro local TTS (OpenAI-compatible, port 8880). Implied by --webui.",
228
+ )(func)
229
+ func = click.option(
230
+ "--webui", is_flag=True, help="Open WebUI (Kokoro TTS wired automatically)."
231
+ )(func)
232
+ return func
233
+
234
+
235
+ @click.group("llm", context_settings=CONTEXT_SETTINGS)
236
+ @click.pass_context
237
+ def llm_group(ctx):
238
+ """Manage the local LLM stack (Ollama + optional Open WebUI / TTS)."""
239
+
240
+
241
+ @llm_group.command("up")
242
+ @_stack_flags
243
+ @click.pass_context
244
+ def llm_up(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
245
+ """Start the LLM stack.
246
+
247
+ With no flags, starts only Ollama. ``--webui`` brings up Open WebUI and
248
+ (by default) wires Kokoro TTS as its "read aloud" backend; pass
249
+ ``--no-voice`` to skip TTS. ``--voice`` alone runs Kokoro standalone.
250
+ ``--n8n`` brings up n8n workflow automation.
251
+ """
252
+ webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
253
+ manager = _get_manager(ctx)
254
+ config = manager.config
255
+ console.print("[bold]Starting LLM stack...[/bold]")
256
+ _warn_if_low_memory()
257
+
258
+ manager.ensure_ollama()
259
+ console.print(f" Ollama API: http://localhost:{config.ollama_port}")
260
+
261
+ if voice:
262
+ manager.ensure_kokoro()
263
+ console.print(f" Kokoro TTS: http://localhost:{config.kokoro_port}/v1")
264
+
265
+ if webui:
266
+ manager.ensure_webui(voice_enabled=voice)
267
+ console.print(f" Open WebUI: http://localhost:{config.webui_port}")
268
+
269
+ if n8n:
270
+ manager.ensure_n8n()
271
+ console.print(f" n8n: http://localhost:{config.n8n_port}")
272
+
273
+ lan = _lan_ip()
274
+ if lan:
275
+ console.print("\n[bold]LAN access[/bold] (bound to 0.0.0.0):")
276
+ console.print(f" Ollama API: http://{lan}:{config.ollama_port}")
277
+ if voice:
278
+ console.print(f" Kokoro TTS: http://{lan}:{config.kokoro_port}/v1")
279
+ if webui:
280
+ console.print(f" Open WebUI: http://{lan}:{config.webui_port}")
281
+ if n8n:
282
+ console.print(f" n8n: http://{lan}:{config.n8n_port}")
283
+
284
+ console.print("\n[bold green]LLM stack is running.[/bold green]")
285
+
286
+
287
+ @llm_group.command("down")
288
+ @_stack_flags
289
+ @click.pass_context
290
+ def llm_down(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
291
+ """Stop containers in the LLM stack.
292
+
293
+ With no flags, stops only Ollama. Use stack flags or --all to stop
294
+ additional stacks.
295
+ """
296
+ webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
297
+ manager = _get_manager(ctx)
298
+ console.print("[bold]Stopping LLM stack...[/bold]")
299
+
300
+ targets = [OLLAMA_CONTAINER]
301
+ if webui:
302
+ targets.append(WEBUI_CONTAINER)
303
+ if voice:
304
+ targets.append(KOKORO_CONTAINER)
305
+ if n8n:
306
+ targets.append(N8N_CONTAINER)
307
+
308
+ for name in targets:
309
+ status = manager.container_status(name)
310
+ if status == "running":
311
+ manager.stop_container(name)
312
+ console.print(f" Stopped: {name}")
313
+ elif status is not None:
314
+ console.print(f" Already stopped: {name}")
315
+ else:
316
+ console.print(f" Not found: {name}")
317
+
318
+ console.print("[bold green]LLM stack stopped.[/bold green]")
319
+
320
+
321
+ @llm_group.command("clean")
322
+ @_stack_flags
323
+ @click.option(
324
+ "--wipe",
325
+ is_flag=True,
326
+ help="Also wipe persistent data (models, chat history). Irreversible.",
327
+ )
328
+ @click.option("--yes", "-y", "assume_yes", is_flag=True, help="Skip the confirmation prompt.")
329
+ @click.pass_context
330
+ def llm_clean(
331
+ ctx,
332
+ webui: bool,
333
+ voice: bool,
334
+ no_voice: bool,
335
+ n8n: bool,
336
+ all_: bool,
337
+ wipe: bool,
338
+ assume_yes: bool,
339
+ ):
340
+ """Remove LLM containers and (with --wipe) persistent data.
341
+
342
+ With no stack flags, removes the base Ollama container only. Use stack
343
+ flags or --all to also remove other stacks. --wipe additionally deletes
344
+ named Docker volumes.
345
+ """
346
+ webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
347
+ manager = _get_manager(ctx)
348
+
349
+ targets = [OLLAMA_CONTAINER]
350
+ if webui:
351
+ targets.append(WEBUI_CONTAINER)
352
+ if voice:
353
+ targets.append(KOKORO_CONTAINER)
354
+ if n8n:
355
+ targets.append(N8N_CONTAINER)
356
+
357
+ volumes: list[str] = []
358
+ if wipe:
359
+ volumes.append(OLLAMA_DATA_VOLUME)
360
+ if webui:
361
+ volumes.append(WEBUI_DATA_VOLUME)
362
+ if n8n:
363
+ volumes.append(N8N_DATA_VOLUME)
364
+
365
+ if not assume_yes:
366
+ if wipe:
367
+ scope = "containers + volumes (models and chat history will be deleted)"
368
+ else:
369
+ scope = "containers only (data preserved)"
370
+ console.print(f"[bold]About to remove:[/bold] {scope}")
371
+ if not click.confirm("Continue?", default=False):
372
+ console.print("Aborted.")
373
+ return
374
+
375
+ console.print("[bold]Cleaning LLM stack...[/bold]")
376
+ for name in targets:
377
+ if manager.container_status(name) is None:
378
+ console.print(f" Not found: {name}")
379
+ continue
380
+ manager.remove_container(name)
381
+ console.print(f" Removed: {name}")
382
+
383
+ if wipe:
384
+ for volume in volumes:
385
+ if manager.remove_volume(volume):
386
+ console.print(f" Removed volume: {volume}")
387
+ else:
388
+ console.print(f" Volume not found: {volume}")
389
+
390
+ console.print("[bold green]LLM stack cleaned.[/bold green]")
391
+ console.print("Run [bold]ai-shell llm up[/bold] to recreate containers.")
392
+
393
+
394
+ @llm_group.command("pull")
395
+ @click.pass_context
396
+ def llm_pull(ctx):
397
+ """Pull LLM models into Ollama."""
398
+ manager = _get_manager(ctx)
399
+ config = manager.config
400
+
401
+ _validate_models_or_abort(config.primary_model, config.fallback_model)
402
+
403
+ console.print(f"[bold]Pulling primary model: {config.primary_model}...[/bold]")
404
+ output = manager.exec_in_ollama(["ollama", "pull", config.primary_model])
405
+ console.print(output)
406
+
407
+ console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
408
+ output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
409
+ console.print(output)
410
+
411
+ console.print("\n[bold]Available models:[/bold]")
412
+ output = manager.exec_in_ollama(["ollama", "list"])
413
+ console.print(output)
414
+
415
+
416
+ @llm_group.command("setup")
417
+ @_stack_flags
418
+ @click.pass_context
419
+ def llm_setup(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bool):
420
+ """First-time setup: start stack, pull models, configure context.
421
+
422
+ Accepts the same stack flags as ``llm up``. With no flags, sets up only
423
+ the base Ollama container and pulls the configured primary/fallback models.
424
+ """
425
+ webui, voice, n8n = _resolve_stacks(webui, voice, no_voice, n8n, all_)
426
+ manager = _get_manager(ctx)
427
+ config = manager.config
428
+
429
+ _validate_models_or_abort(config.primary_model, config.fallback_model)
430
+
431
+ console.print("[bold]Starting LLM stack...[/bold]")
432
+ _warn_if_low_memory()
433
+ manager.ensure_ollama()
434
+ if voice:
435
+ manager.ensure_kokoro()
436
+ if webui:
437
+ manager.ensure_webui(voice_enabled=voice)
438
+ if n8n:
439
+ manager.ensure_n8n()
440
+
441
+ console.print("[bold]Waiting for Ollama to be ready...[/bold]")
442
+ for i in range(10):
443
+ try:
444
+ output = manager.exec_in_ollama(["ollama", "list"])
445
+ if output is not None:
446
+ break
447
+ except Exception:
448
+ pass
449
+ console.print(f" Waiting... ({i + 1}/10)")
450
+ time.sleep(2)
451
+ else:
452
+ console.print("[bold red]Ollama failed to start after 20s[/bold red]")
453
+ raise click.Abort()
454
+
455
+ console.print(f"\n[bold]Pulling primary model: {config.primary_model}...[/bold]")
456
+ output = manager.exec_in_ollama(["ollama", "pull", config.primary_model])
457
+ console.print(output)
458
+
459
+ console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
460
+ output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
461
+ console.print(output)
462
+
463
+ console.print("\n[bold green]============================================[/bold green]")
464
+ console.print("[bold green] Setup complete![/bold green]")
465
+ console.print(f" Ollama API: http://localhost:{config.ollama_port}")
466
+ if voice:
467
+ console.print(f" Kokoro TTS: http://localhost:{config.kokoro_port}/v1")
468
+ if webui:
469
+ console.print(f" Open WebUI: http://localhost:{config.webui_port}")
470
+ if n8n:
471
+ console.print(f" n8n: http://localhost:{config.n8n_port}")
472
+ console.print(f"\n Primary model: {config.primary_model}")
473
+ console.print(f" Fallback model: {config.fallback_model}")
474
+ console.print(f" Context window: {config.context_size} tokens")
475
+ console.print("[bold green]============================================[/bold green]")
476
+
477
+
478
+ def _render_container_row(manager: ContainerManager, name: str, label: str) -> None:
479
+ """Print one row of the `llm status` grid, colored by runtime state."""
480
+ status = manager.container_status(name)
481
+ if status == "running":
482
+ console.print(f" [green]{label:<20}[/green] [green]running[/green] [dim]({name})[/dim]")
483
+ elif status is not None:
484
+ console.print(
485
+ f" [yellow]{label:<20}[/yellow] [yellow]{status}[/yellow] [dim]({name})[/dim]"
486
+ )
487
+ else:
488
+ console.print(f" [dim]{label:<20} absent ({name})[/dim]")
489
+
490
+
491
+ @llm_group.command("status")
492
+ @click.pass_context
493
+ def llm_status(ctx):
494
+ """Show status of all known LLM containers, URLs, and loaded models."""
495
+ manager = _get_manager(ctx)
496
+ config = manager.config
497
+
498
+ console.print("[bold]Base stack[/bold]")
499
+ _render_container_row(manager, OLLAMA_CONTAINER, "Ollama")
500
+
501
+ console.print("\n[bold]WebUI stack[/bold]")
502
+ _render_container_row(manager, WEBUI_CONTAINER, "Open WebUI")
503
+
504
+ console.print("\n[bold]Voice stack[/bold]")
505
+ _render_container_row(manager, KOKORO_CONTAINER, "Kokoro TTS")
506
+
507
+ console.print("\n[bold]n8n stack[/bold]")
508
+ _render_container_row(manager, N8N_CONTAINER, "n8n")
509
+
510
+ console.print("\n[bold]Access URLs:[/bold]")
511
+
512
+ def _url(label: str, name: str, url: str) -> None:
513
+ running = manager.container_status(name) == "running"
514
+ color = "cyan" if running else "dim"
515
+ suffix = "" if running else " (not running)"
516
+ console.print(f" {label:<18} [{color}]{url}[/{color}]{suffix}")
517
+
518
+ _url("Ollama API:", OLLAMA_CONTAINER, f"http://localhost:{config.ollama_port}")
519
+ _url(" OpenAI-compat:", OLLAMA_CONTAINER, f"http://localhost:{config.ollama_port}/v1")
520
+ _url("Open WebUI:", WEBUI_CONTAINER, f"http://localhost:{config.webui_port}")
521
+ _url("Kokoro TTS:", KOKORO_CONTAINER, f"http://localhost:{config.kokoro_port}/v1")
522
+ _url("n8n:", N8N_CONTAINER, f"http://localhost:{config.n8n_port}")
523
+
524
+ lan = _lan_ip()
525
+ if lan:
526
+ console.print("\n[bold]LAN access[/bold] (bound to 0.0.0.0):")
527
+ console.print(f" Ollama API: http://{lan}:{config.ollama_port}")
528
+ console.print(f" Open WebUI: http://{lan}:{config.webui_port}")
529
+ console.print(f" Kokoro TTS: http://{lan}:{config.kokoro_port}/v1")
530
+ console.print(f" n8n: http://{lan}:{config.n8n_port}")
531
+
532
+ console.print("\n[bold]Configuration:[/bold]")
533
+ console.print(f" Primary model: {config.primary_model}")
534
+ console.print(f" Fallback model: {config.fallback_model}")
535
+ console.print(f" Context window: {config.context_size} tokens")
536
+
537
+ vram = get_vram_info()
538
+ if vram is not None:
539
+ console.print("\n[bold]GPU VRAM:[/bold]")
540
+ console.print(
541
+ f" Total: {vram['total'] / 1024**3:.1f} GiB "
542
+ f"Used: {vram['used'] / 1024**3:.1f} GiB "
543
+ f"Free: {vram['free'] / 1024**3:.1f} GiB"
544
+ )
545
+ processes = get_vram_processes()
546
+ console.print("\n [bold]VRAM consumers:[/bold]")
547
+ if processes:
548
+ for pid, vram_mb, name in sorted(processes, key=lambda x: x[1], reverse=True):
549
+ console.print(f" PID {pid:<8} {name:<20} {vram_mb / 1024:.1f} GiB")
550
+ else:
551
+ console.print(" (none)")
552
+
553
+ if manager.container_status(OLLAMA_CONTAINER) == "running":
554
+ console.print("\n[bold]Available models:[/bold]")
555
+ output = manager.exec_in_ollama(["ollama", "list"])
556
+ console.print(output)
557
+
558
+
559
+ @llm_group.command("logs")
560
+ @click.option("--follow", "-f", is_flag=True, help="Follow log output.")
561
+ @click.pass_context
562
+ def llm_logs(ctx, follow):
563
+ """Tail logs from the LLM stack."""
564
+ manager = _get_manager(ctx)
565
+ if follow:
566
+ manager.container_logs(OLLAMA_CONTAINER, follow=True)
567
+ else:
568
+ for name in [
569
+ OLLAMA_CONTAINER,
570
+ WEBUI_CONTAINER,
571
+ KOKORO_CONTAINER,
572
+ N8N_CONTAINER,
573
+ ]:
574
+ status = manager.container_status(name)
575
+ if status is not None:
576
+ console.print(f"\n[bold]--- {name} ---[/bold]")
577
+ manager.container_logs(name, follow=False, tail=50)
578
+
579
+
580
+ @llm_group.command("shell")
581
+ @click.pass_context
582
+ def llm_shell(ctx):
583
+ """Open a bash shell in the Ollama container."""
584
+ manager = _get_manager(ctx)
585
+ status = manager.container_status(OLLAMA_CONTAINER)
586
+ if status != "running":
587
+ console.print("[red]Ollama is not running. Run: ai-shell llm up[/red]")
588
+ raise click.Abort()
589
+ console.print("[bold]Opening shell in Ollama container...[/bold]")
590
+ manager.exec_interactive(OLLAMA_CONTAINER, ["/bin/bash"])
@@ -37,9 +37,7 @@ def _generate_worktree_name() -> str:
37
37
  def _print_tmux_quick_start() -> None:
38
38
  """Print a short tmux quick-start before attaching."""
39
39
  console.print("[dim]tmux: mouse click=focus drag=resize wheel=scroll[/dim]")
40
- console.print(
41
- "[dim] Ctrl-b o=pane c=tab Space=layout p/n=tab z=zoom d=detach &=kill-tab[/dim]"
42
- )
40
+ console.print("[dim] Ctrl-a o=pane c=tab Space=layout z=zoom d=detach &=kill-tab[/dim]")
43
41
 
44
42
 
45
43
  def _setup_worktree(container_name: str, container_project_dir: str, name: str) -> str:
@@ -764,7 +762,7 @@ def _launch_multi(
764
762
 
765
763
  # Check for existing tmux session before presenting the selector.
766
764
  # The container and session might still be running from a previous
767
- # invocation (e.g. after the user detached with C-b d or closed
765
+ # invocation (e.g. after the user detached with C-a d or closed
768
766
  # the terminal).
769
767
  check_cmd = build_check_session_command(container_name, session_name)
770
768
  has_session = subprocess.run(check_cmd, capture_output=True).returncode == 0