augint-shell 0.78.0__tar.gz → 0.80.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {augint_shell-0.78.0 → augint_shell-0.80.0}/PKG-INFO +54 -16
- {augint_shell-0.78.0 → augint_shell-0.80.0}/README.md +53 -15
- {augint_shell-0.78.0 → augint_shell-0.80.0}/pyproject.toml +1 -1
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/__init__.py +1 -1
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/commands/llm.py +28 -24
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/commands/tools.py +6 -1
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/config.py +96 -11
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/container.py +5 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/defaults.py +7 -2
- augint_shell-0.80.0/src/ai_shell/templates/ai-shell.yaml +72 -0
- augint_shell-0.78.0/src/ai_shell/templates/ai-shell.yaml +0 -207
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/__main__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/commands/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/cli/commands/manage.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/exceptions.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/gpu.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/interactive.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/local_chrome.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/scaffold.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/selector.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/ai-shell.toml +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/aider/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/claude/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/claude/settings.json +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/codex/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/templates/opencode/__init__.py +0 -0
- {augint_shell-0.78.0 → augint_shell-0.80.0}/src/ai_shell/tmux.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: augint-shell
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.80.0
|
|
4
4
|
Summary: Launch AI coding tools and local LLMs in Docker containers
|
|
5
5
|
Author: svange
|
|
6
6
|
Requires-Dist: docker>=7.0.0
|
|
@@ -96,29 +96,67 @@ ai-shell opencode
|
|
|
96
96
|
|
|
97
97
|
## Configuration
|
|
98
98
|
|
|
99
|
-
Optional
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
99
|
+
Optional `.ai-shell.yaml` in your project root (YAML is the default; TOML is
|
|
100
|
+
also accepted — see `ai-shell init` for the full generated template with
|
|
101
|
+
per-section rationale):
|
|
102
|
+
|
|
103
|
+
```yaml
|
|
104
|
+
container:
|
|
105
|
+
image: svange/augint-shell
|
|
106
|
+
image_tag: latest
|
|
107
|
+
extra_env:
|
|
108
|
+
MY_VAR: value
|
|
109
|
+
|
|
110
|
+
llm:
|
|
111
|
+
primary_chat_model: qwen3.5:27b
|
|
112
|
+
secondary_chat_model: huihui_ai/qwen3.5-abliterated:27b
|
|
113
|
+
primary_coding_model: qwen3-coder:30b-a3b-q4_K_M
|
|
114
|
+
secondary_coding_model: huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M
|
|
115
|
+
context_size: 32768
|
|
116
|
+
ollama_port: 11434
|
|
117
|
+
webui_port: 3000
|
|
118
|
+
extra_models: [] # additional Ollama tags to pull alongside the 4 slots
|
|
113
119
|
```
|
|
114
120
|
|
|
115
|
-
Global config at `~/.config/ai-shell/config.
|
|
121
|
+
Global config at `~/.ai-shell.yaml` or `~/.config/ai-shell/config.yaml` is
|
|
122
|
+
also supported.
|
|
123
|
+
|
|
124
|
+
> The previous `primary_model` / `fallback_model` keys were removed. They were
|
|
125
|
+
> role-ambiguous (chat vs. coding). If you had them set, move them to the
|
|
126
|
+
> matching slot above. ai-shell will refuse to start with those legacy keys
|
|
127
|
+
> present and print a migration hint.
|
|
116
128
|
|
|
117
129
|
`ai-shell` does not manage tool-specific config files for Codex, OpenCode, or
|
|
118
130
|
Aider. Use `augint-opencodex` or the tools' native config files for those, and
|
|
119
131
|
use `ai-shell` for container/runtime settings such as AWS profiles, local LLM
|
|
120
132
|
ports, and Claude options.
|
|
121
133
|
|
|
134
|
+
### Local LLM stack
|
|
135
|
+
|
|
136
|
+
Four role-specific model slots, each sized for an RTX 4090 (24 GiB VRAM). All
|
|
137
|
+
four defaults together total ~74 GB on disk.
|
|
138
|
+
|
|
139
|
+
| Slot | Default | Size | Role | Routed to |
|
|
140
|
+
|---|---|---|---|---|
|
|
141
|
+
| `primary_chat_model` | `qwen3.5:27b` | 17 GB | Best chat model that fits a 4090 | Open WebUI default |
|
|
142
|
+
| `secondary_chat_model` | `huihui_ai/qwen3.5-abliterated:27b` | 17 GB | Best uncensored chat (abliterated Qwen3.5) | Open WebUI (selectable) |
|
|
143
|
+
| `primary_coding_model` | `qwen3-coder:30b-a3b-q4_K_M` | 19 GB | Best agentic coder with explicit Ollama tools badge | OpenCode / Aider default |
|
|
144
|
+
| `secondary_coding_model` | `huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M` | 19 GB | Best uncensored coder (abliterated Qwen3-Coder) | OpenCode (selectable) |
|
|
145
|
+
|
|
146
|
+
Each pair shares a base model — primary is the standard aligned release;
|
|
147
|
+
secondary is the huihui.ai abliterated variant (refusal directions neutralized
|
|
148
|
+
via weight surgery, benchmark quality preserved). Switching primary <->
|
|
149
|
+
secondary within a slot keeps tool formats and context semantics identical.
|
|
150
|
+
|
|
151
|
+
`ai-shell llm pull` / `ai-shell llm setup` downloads all 4 slots plus any
|
|
152
|
+
`extra_models` entries, deduped.
|
|
153
|
+
|
|
154
|
+
**Three caveats worth knowing:**
|
|
155
|
+
|
|
156
|
+
1. **Qwen3.5 Ollama tool calling is broken** ([ollama #14493](https://github.com/ollama/ollama/issues/14493), open). This does not affect Open WebUI's default chat with web search and RAG — those run server-side in WebUI without touching Ollama's tools API. It does affect agent CLIs routed through Ollama's `/v1/chat/completions` tools array, which is why the chat slots are Qwen3.5 and the coding slots are Qwen3-Coder (explicit tools badge, working parser).
|
|
157
|
+
2. **Ollama `num_ctx` defaults to 4096** for every model, well below what modern agent prompts need (Claude Code sends ~35K tokens). `context_size` in your config is applied via Modelfile override during `llm setup` — leave it at 32768 unless you have a reason.
|
|
158
|
+
3. **Qwen3-Coder tool-count cliff**: reliable native `tool_calls` emission below ~5 registered tools; above that the model may emit XML inside content and some parsers miss it. Keep agent tool sets tight.
|
|
159
|
+
|
|
122
160
|
## How It Works
|
|
123
161
|
|
|
124
162
|
- Pulls a pre-built Docker image from Docker Hub (`svange/augint-shell`)
|
|
@@ -83,29 +83,67 @@ ai-shell opencode
|
|
|
83
83
|
|
|
84
84
|
## Configuration
|
|
85
85
|
|
|
86
|
-
Optional
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
86
|
+
Optional `.ai-shell.yaml` in your project root (YAML is the default; TOML is
|
|
87
|
+
also accepted — see `ai-shell init` for the full generated template with
|
|
88
|
+
per-section rationale):
|
|
89
|
+
|
|
90
|
+
```yaml
|
|
91
|
+
container:
|
|
92
|
+
image: svange/augint-shell
|
|
93
|
+
image_tag: latest
|
|
94
|
+
extra_env:
|
|
95
|
+
MY_VAR: value
|
|
96
|
+
|
|
97
|
+
llm:
|
|
98
|
+
primary_chat_model: qwen3.5:27b
|
|
99
|
+
secondary_chat_model: huihui_ai/qwen3.5-abliterated:27b
|
|
100
|
+
primary_coding_model: qwen3-coder:30b-a3b-q4_K_M
|
|
101
|
+
secondary_coding_model: huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M
|
|
102
|
+
context_size: 32768
|
|
103
|
+
ollama_port: 11434
|
|
104
|
+
webui_port: 3000
|
|
105
|
+
extra_models: [] # additional Ollama tags to pull alongside the 4 slots
|
|
100
106
|
```
|
|
101
107
|
|
|
102
|
-
Global config at `~/.config/ai-shell/config.
|
|
108
|
+
Global config at `~/.ai-shell.yaml` or `~/.config/ai-shell/config.yaml` is
|
|
109
|
+
also supported.
|
|
110
|
+
|
|
111
|
+
> The previous `primary_model` / `fallback_model` keys were removed. They were
|
|
112
|
+
> role-ambiguous (chat vs. coding). If you had them set, move them to the
|
|
113
|
+
> matching slot above. ai-shell will refuse to start with those legacy keys
|
|
114
|
+
> present and print a migration hint.
|
|
103
115
|
|
|
104
116
|
`ai-shell` does not manage tool-specific config files for Codex, OpenCode, or
|
|
105
117
|
Aider. Use `augint-opencodex` or the tools' native config files for those, and
|
|
106
118
|
use `ai-shell` for container/runtime settings such as AWS profiles, local LLM
|
|
107
119
|
ports, and Claude options.
|
|
108
120
|
|
|
121
|
+
### Local LLM stack
|
|
122
|
+
|
|
123
|
+
Four role-specific model slots, each sized for an RTX 4090 (24 GiB VRAM). All
|
|
124
|
+
four defaults together total ~74 GB on disk.
|
|
125
|
+
|
|
126
|
+
| Slot | Default | Size | Role | Routed to |
|
|
127
|
+
|---|---|---|---|---|
|
|
128
|
+
| `primary_chat_model` | `qwen3.5:27b` | 17 GB | Best chat model that fits a 4090 | Open WebUI default |
|
|
129
|
+
| `secondary_chat_model` | `huihui_ai/qwen3.5-abliterated:27b` | 17 GB | Best uncensored chat (abliterated Qwen3.5) | Open WebUI (selectable) |
|
|
130
|
+
| `primary_coding_model` | `qwen3-coder:30b-a3b-q4_K_M` | 19 GB | Best agentic coder with explicit Ollama tools badge | OpenCode / Aider default |
|
|
131
|
+
| `secondary_coding_model` | `huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M` | 19 GB | Best uncensored coder (abliterated Qwen3-Coder) | OpenCode (selectable) |
|
|
132
|
+
|
|
133
|
+
Each pair shares a base model — primary is the standard aligned release;
|
|
134
|
+
secondary is the huihui.ai abliterated variant (refusal directions neutralized
|
|
135
|
+
via weight surgery, benchmark quality preserved). Switching primary <->
|
|
136
|
+
secondary within a slot keeps tool formats and context semantics identical.
|
|
137
|
+
|
|
138
|
+
`ai-shell llm pull` / `ai-shell llm setup` downloads all 4 slots plus any
|
|
139
|
+
`extra_models` entries, deduped.
|
|
140
|
+
|
|
141
|
+
**Three caveats worth knowing:**
|
|
142
|
+
|
|
143
|
+
1. **Qwen3.5 Ollama tool calling is broken** ([ollama #14493](https://github.com/ollama/ollama/issues/14493), open). This does not affect Open WebUI's default chat with web search and RAG — those run server-side in WebUI without touching Ollama's tools API. It does affect agent CLIs routed through Ollama's `/v1/chat/completions` tools array, which is why the chat slots are Qwen3.5 and the coding slots are Qwen3-Coder (explicit tools badge, working parser).
|
|
144
|
+
2. **Ollama `num_ctx` defaults to 4096** for every model, well below what modern agent prompts need (Claude Code sends ~35K tokens). `context_size` in your config is applied via Modelfile override during `llm setup` — leave it at 32768 unless you have a reason.
|
|
145
|
+
3. **Qwen3-Coder tool-count cliff**: reliable native `tool_calls` emission below ~5 registered tools; above that the model may emit XML inside content and some parsers miss it. Keep agent tool sets tight.
|
|
146
|
+
|
|
109
147
|
## How It Works
|
|
110
148
|
|
|
111
149
|
- Pulls a pre-built Docker image from Docker Hub (`svange/augint-shell`)
|
|
@@ -119,8 +119,8 @@ def _validate_models_or_abort(*model_refs: str) -> None:
|
|
|
119
119
|
for ref in missing:
|
|
120
120
|
console.print(f" - [cyan]{ref}[/cyan] (tags: {_tag_list_url(ref)})")
|
|
121
121
|
console.print(
|
|
122
|
-
"\nUpdate [bold]
|
|
123
|
-
"your ai-shell config to a valid tag and retry."
|
|
122
|
+
"\nUpdate the relevant [bold]*_chat_model[/bold] / [bold]*_coding_model[/bold] "
|
|
123
|
+
"entry (or [bold]extra_models[/bold]) in your ai-shell config to a valid tag and retry."
|
|
124
124
|
)
|
|
125
125
|
raise click.Abort()
|
|
126
126
|
|
|
@@ -398,15 +398,13 @@ def llm_pull(ctx):
|
|
|
398
398
|
manager = _get_manager(ctx)
|
|
399
399
|
config = manager.config
|
|
400
400
|
|
|
401
|
-
|
|
401
|
+
models = config.models_to_pull
|
|
402
|
+
_validate_models_or_abort(*models)
|
|
402
403
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
|
|
408
|
-
output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
|
|
409
|
-
console.print(output)
|
|
404
|
+
for model in models:
|
|
405
|
+
console.print(f"[bold]Pulling {model}...[/bold]")
|
|
406
|
+
output = manager.exec_in_ollama(["ollama", "pull", model])
|
|
407
|
+
console.print(output)
|
|
410
408
|
|
|
411
409
|
console.print("\n[bold]Available models:[/bold]")
|
|
412
410
|
output = manager.exec_in_ollama(["ollama", "list"])
|
|
@@ -426,7 +424,8 @@ def llm_setup(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bo
|
|
|
426
424
|
manager = _get_manager(ctx)
|
|
427
425
|
config = manager.config
|
|
428
426
|
|
|
429
|
-
|
|
427
|
+
models = config.models_to_pull
|
|
428
|
+
_validate_models_or_abort(*models)
|
|
430
429
|
|
|
431
430
|
console.print("[bold]Starting LLM stack...[/bold]")
|
|
432
431
|
_warn_if_low_memory()
|
|
@@ -452,13 +451,10 @@ def llm_setup(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bo
|
|
|
452
451
|
console.print("[bold red]Ollama failed to start after 20s[/bold red]")
|
|
453
452
|
raise click.Abort()
|
|
454
453
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
console.print(f"\n[bold]Pulling fallback model: {config.fallback_model}...[/bold]")
|
|
460
|
-
output = manager.exec_in_ollama(["ollama", "pull", config.fallback_model])
|
|
461
|
-
console.print(output)
|
|
454
|
+
for model in models:
|
|
455
|
+
console.print(f"\n[bold]Pulling {model}...[/bold]")
|
|
456
|
+
output = manager.exec_in_ollama(["ollama", "pull", model])
|
|
457
|
+
console.print(output)
|
|
462
458
|
|
|
463
459
|
console.print("\n[bold green]============================================[/bold green]")
|
|
464
460
|
console.print("[bold green] Setup complete![/bold green]")
|
|
@@ -469,9 +465,13 @@ def llm_setup(ctx, webui: bool, voice: bool, no_voice: bool, n8n: bool, all_: bo
|
|
|
469
465
|
console.print(f" Open WebUI: http://localhost:{config.webui_port}")
|
|
470
466
|
if n8n:
|
|
471
467
|
console.print(f" n8n: http://localhost:{config.n8n_port}")
|
|
472
|
-
console.print(f"\n Primary
|
|
473
|
-
console.print(f"
|
|
474
|
-
console.print(f"
|
|
468
|
+
console.print(f"\n Primary chat: {config.primary_chat_model}")
|
|
469
|
+
console.print(f" Secondary chat: {config.secondary_chat_model}")
|
|
470
|
+
console.print(f" Primary coding: {config.primary_coding_model}")
|
|
471
|
+
console.print(f" Secondary coding: {config.secondary_coding_model}")
|
|
472
|
+
if config.extra_models:
|
|
473
|
+
console.print(f" Extra models: {', '.join(config.extra_models)}")
|
|
474
|
+
console.print(f" Context window: {config.context_size} tokens")
|
|
475
475
|
console.print("[bold green]============================================[/bold green]")
|
|
476
476
|
|
|
477
477
|
|
|
@@ -530,9 +530,13 @@ def llm_status(ctx):
|
|
|
530
530
|
console.print(f" n8n: http://{lan}:{config.n8n_port}")
|
|
531
531
|
|
|
532
532
|
console.print("\n[bold]Configuration:[/bold]")
|
|
533
|
-
console.print(f" Primary
|
|
534
|
-
console.print(f"
|
|
535
|
-
console.print(f"
|
|
533
|
+
console.print(f" Primary chat: {config.primary_chat_model}")
|
|
534
|
+
console.print(f" Secondary chat: {config.secondary_chat_model}")
|
|
535
|
+
console.print(f" Primary coding: {config.primary_coding_model}")
|
|
536
|
+
console.print(f" Secondary coding: {config.secondary_coding_model}")
|
|
537
|
+
if config.extra_models:
|
|
538
|
+
console.print(f" Extra models: {', '.join(config.extra_models)}")
|
|
539
|
+
console.print(f" Context window: {config.context_size} tokens")
|
|
536
540
|
|
|
537
541
|
vram = get_vram_info()
|
|
538
542
|
if vram is not None:
|
|
@@ -1154,6 +1154,11 @@ def opencode(
|
|
|
1154
1154
|
manager.ensure_tool_fresh(name, "opencode")
|
|
1155
1155
|
|
|
1156
1156
|
cmd = ["/root/.opencode/bin/opencode"]
|
|
1157
|
+
if not use_bedrock:
|
|
1158
|
+
# Default OpenCode to the primary coding slot (benchmark-optimized,
|
|
1159
|
+
# explicit Ollama tools badge). Users can switch to the secondary
|
|
1160
|
+
# (uncensored) slot in the OpenCode model picker at runtime.
|
|
1161
|
+
cmd.extend(["--model", f"ollama/{config.primary_coding_model}"])
|
|
1157
1162
|
console.print(f"[bold]Launching opencode{bedrock_label} in {name}...[/bold]")
|
|
1158
1163
|
manager.exec_interactive(name, cmd, extra_env=exec_env)
|
|
1159
1164
|
|
|
@@ -1165,7 +1170,7 @@ def opencode(
|
|
|
1165
1170
|
def aider(ctx, safe, extra_args):
|
|
1166
1171
|
"""Launch aider with local LLM in the dev container."""
|
|
1167
1172
|
manager, name, exec_env, config = _get_manager(ctx)
|
|
1168
|
-
aider_model = f"ollama_chat/{config.
|
|
1173
|
+
aider_model = f"ollama_chat/{config.primary_coding_model}"
|
|
1169
1174
|
cmd = ["aider", "--model", aider_model]
|
|
1170
1175
|
if not safe:
|
|
1171
1176
|
cmd.append("--yes-always")
|
|
@@ -24,13 +24,15 @@ from ai_shell import __version__
|
|
|
24
24
|
from ai_shell.defaults import (
|
|
25
25
|
DEFAULT_CONTEXT_SIZE,
|
|
26
26
|
DEFAULT_DEV_PORTS,
|
|
27
|
-
DEFAULT_FALLBACK_MODEL,
|
|
28
27
|
DEFAULT_IMAGE,
|
|
29
28
|
DEFAULT_KOKORO_PORT,
|
|
30
29
|
DEFAULT_KOKORO_VOICE,
|
|
31
30
|
DEFAULT_N8N_PORT,
|
|
32
31
|
DEFAULT_OLLAMA_PORT,
|
|
33
|
-
|
|
32
|
+
DEFAULT_PRIMARY_CHAT_MODEL,
|
|
33
|
+
DEFAULT_PRIMARY_CODING_MODEL,
|
|
34
|
+
DEFAULT_SECONDARY_CHAT_MODEL,
|
|
35
|
+
DEFAULT_SECONDARY_CODING_MODEL,
|
|
34
36
|
DEFAULT_WEBUI_PORT,
|
|
35
37
|
)
|
|
36
38
|
|
|
@@ -47,9 +49,15 @@ class AiShellConfig:
|
|
|
47
49
|
project_name: str = ""
|
|
48
50
|
project_dir: Path = field(default_factory=Path.cwd)
|
|
49
51
|
|
|
50
|
-
# LLM
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
# LLM model slots. Primary = best-available; secondary = best uncensored
|
|
53
|
+
# alternative. Chat slots are routed to Open WebUI, coding slots to
|
|
54
|
+
# OpenCode / Aider. `extra_models` is a free-form list of additional
|
|
55
|
+
# Ollama tags to pull alongside the 4 slots (deduped).
|
|
56
|
+
primary_chat_model: str = DEFAULT_PRIMARY_CHAT_MODEL
|
|
57
|
+
secondary_chat_model: str = DEFAULT_SECONDARY_CHAT_MODEL
|
|
58
|
+
primary_coding_model: str = DEFAULT_PRIMARY_CODING_MODEL
|
|
59
|
+
secondary_coding_model: str = DEFAULT_SECONDARY_CODING_MODEL
|
|
60
|
+
extra_models: list[str] = field(default_factory=list)
|
|
53
61
|
context_size: int = DEFAULT_CONTEXT_SIZE
|
|
54
62
|
ollama_port: int = DEFAULT_OLLAMA_PORT
|
|
55
63
|
webui_port: int = DEFAULT_WEBUI_PORT
|
|
@@ -85,6 +93,28 @@ class AiShellConfig:
|
|
|
85
93
|
"""Return deduplicated, sorted list of dev container ports to expose."""
|
|
86
94
|
return sorted(set(DEFAULT_DEV_PORTS + self.extra_ports))
|
|
87
95
|
|
|
96
|
+
@property
|
|
97
|
+
def models_to_pull(self) -> list[str]:
|
|
98
|
+
"""Return the full deduped list of Ollama model tags to pull.
|
|
99
|
+
|
|
100
|
+
The 4 slots in order, followed by any ``extra_models``. Duplicates
|
|
101
|
+
are removed while preserving first-occurrence order.
|
|
102
|
+
"""
|
|
103
|
+
ordered = [
|
|
104
|
+
self.primary_chat_model,
|
|
105
|
+
self.secondary_chat_model,
|
|
106
|
+
self.primary_coding_model,
|
|
107
|
+
self.secondary_coding_model,
|
|
108
|
+
*self.extra_models,
|
|
109
|
+
]
|
|
110
|
+
seen: set[str] = set()
|
|
111
|
+
deduped: list[str] = []
|
|
112
|
+
for model in ordered:
|
|
113
|
+
if model and model not in seen:
|
|
114
|
+
seen.add(model)
|
|
115
|
+
deduped.append(model)
|
|
116
|
+
return deduped
|
|
117
|
+
|
|
88
118
|
|
|
89
119
|
def load_config(
|
|
90
120
|
project_override: str | None = None,
|
|
@@ -151,6 +181,39 @@ def _load_config_file(path: Path) -> dict:
|
|
|
151
181
|
return tomllib.load(f)
|
|
152
182
|
|
|
153
183
|
|
|
184
|
+
_LEGACY_LLM_KEY_HINT = {
|
|
185
|
+
"primary_model": (
|
|
186
|
+
"renamed to `primary_coding_model` (coding) or `primary_chat_model` "
|
|
187
|
+
"(chat). The new config uses 4 role-specific slots; pick the one "
|
|
188
|
+
"that matches your intent. See the generated .ai-shell.yaml for the "
|
|
189
|
+
"full layout."
|
|
190
|
+
),
|
|
191
|
+
"fallback_model": (
|
|
192
|
+
"removed. The previous `fallback_model` was role-ambiguous. Use "
|
|
193
|
+
"`secondary_chat_model` and `secondary_coding_model` instead "
|
|
194
|
+
"(both default to the best uncensored variants). See the generated "
|
|
195
|
+
".ai-shell.yaml for the full layout."
|
|
196
|
+
),
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _reject_legacy_llm_keys(llm_section: dict, path: Path) -> None:
|
|
201
|
+
"""Raise on deprecated `primary_model` / `fallback_model` keys.
|
|
202
|
+
|
|
203
|
+
These were removed when the llm config split into 4 role-specific slots
|
|
204
|
+
(primary/secondary x chat/coding). Silently aliasing them would corrupt
|
|
205
|
+
intent — e.g. the old `fallback_model` meant different things to chat and
|
|
206
|
+
coding users. Fail loudly with migration guidance.
|
|
207
|
+
"""
|
|
208
|
+
bad = [k for k in _LEGACY_LLM_KEY_HINT if k in llm_section]
|
|
209
|
+
if not bad:
|
|
210
|
+
return
|
|
211
|
+
lines = [f"\nDeprecated llm key(s) found in {path}:"]
|
|
212
|
+
for key in bad:
|
|
213
|
+
lines.append(f" - `{key}`: {_LEGACY_LLM_KEY_HINT[key]}")
|
|
214
|
+
raise ValueError("\n".join(lines))
|
|
215
|
+
|
|
216
|
+
|
|
154
217
|
def _apply_config(config: AiShellConfig, path: Path) -> None:
|
|
155
218
|
"""Apply settings from a YAML or TOML config file."""
|
|
156
219
|
try:
|
|
@@ -176,10 +239,17 @@ def _apply_config(config: AiShellConfig, path: Path) -> None:
|
|
|
176
239
|
|
|
177
240
|
# [llm] section
|
|
178
241
|
llm = data.get("llm", {})
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
242
|
+
_reject_legacy_llm_keys(llm, path)
|
|
243
|
+
if "primary_chat_model" in llm:
|
|
244
|
+
config.primary_chat_model = llm["primary_chat_model"]
|
|
245
|
+
if "secondary_chat_model" in llm:
|
|
246
|
+
config.secondary_chat_model = llm["secondary_chat_model"]
|
|
247
|
+
if "primary_coding_model" in llm:
|
|
248
|
+
config.primary_coding_model = llm["primary_coding_model"]
|
|
249
|
+
if "secondary_coding_model" in llm:
|
|
250
|
+
config.secondary_coding_model = llm["secondary_coding_model"]
|
|
251
|
+
if "extra_models" in llm:
|
|
252
|
+
config.extra_models.extend(str(m) for m in llm["extra_models"])
|
|
183
253
|
if "context_size" in llm:
|
|
184
254
|
config.context_size = int(llm["context_size"])
|
|
185
255
|
if "ollama_port" in llm:
|
|
@@ -214,14 +284,29 @@ def _apply_config(config: AiShellConfig, path: Path) -> None:
|
|
|
214
284
|
config.skip_updates = bool(container["skip_updates"])
|
|
215
285
|
|
|
216
286
|
|
|
287
|
+
_LEGACY_ENV_VARS = {
|
|
288
|
+
"AI_SHELL_PRIMARY_MODEL": ("AI_SHELL_PRIMARY_CODING_MODEL or AI_SHELL_PRIMARY_CHAT_MODEL"),
|
|
289
|
+
"AI_SHELL_FALLBACK_MODEL": ("AI_SHELL_SECONDARY_CHAT_MODEL or AI_SHELL_SECONDARY_CODING_MODEL"),
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
|
|
217
293
|
def _apply_env_vars(config: AiShellConfig) -> None:
|
|
218
294
|
"""Apply AI_SHELL_* environment variable overrides."""
|
|
295
|
+
bad_env = [k for k in _LEGACY_ENV_VARS if os.environ.get(k) is not None]
|
|
296
|
+
if bad_env:
|
|
297
|
+
lines = ["\nDeprecated AI_SHELL_* env var(s) set:"]
|
|
298
|
+
for key in bad_env:
|
|
299
|
+
lines.append(f" - {key}: use {_LEGACY_ENV_VARS[key]} instead")
|
|
300
|
+
raise ValueError("\n".join(lines))
|
|
301
|
+
|
|
219
302
|
env_map: dict[str, tuple[str, type]] = {
|
|
220
303
|
"AI_SHELL_IMAGE": ("image", str),
|
|
221
304
|
"AI_SHELL_IMAGE_TAG": ("image_tag", str),
|
|
222
305
|
"AI_SHELL_PROJECT": ("project_name", str),
|
|
223
|
-
"
|
|
224
|
-
"
|
|
306
|
+
"AI_SHELL_PRIMARY_CHAT_MODEL": ("primary_chat_model", str),
|
|
307
|
+
"AI_SHELL_SECONDARY_CHAT_MODEL": ("secondary_chat_model", str),
|
|
308
|
+
"AI_SHELL_PRIMARY_CODING_MODEL": ("primary_coding_model", str),
|
|
309
|
+
"AI_SHELL_SECONDARY_CODING_MODEL": ("secondary_coding_model", str),
|
|
225
310
|
"AI_SHELL_CONTEXT_SIZE": ("context_size", int),
|
|
226
311
|
"AI_SHELL_OLLAMA_PORT": ("ollama_port", int),
|
|
227
312
|
"AI_SHELL_WEBUI_PORT": ("webui_port", int),
|
|
@@ -357,6 +357,11 @@ class ContainerManager:
|
|
|
357
357
|
environment = {
|
|
358
358
|
"OLLAMA_BASE_URL": f"http://{OLLAMA_CONTAINER}:11434",
|
|
359
359
|
"WEBUI_AUTH": "false",
|
|
360
|
+
# DEFAULT_MODELS is a PersistentConfig: env seeds the DB on first
|
|
361
|
+
# boot and UI edits win after that. Point new chats at the
|
|
362
|
+
# primary chat slot; users can pick the secondary (uncensored)
|
|
363
|
+
# from the model dropdown.
|
|
364
|
+
"DEFAULT_MODELS": self.config.primary_chat_model,
|
|
360
365
|
}
|
|
361
366
|
if voice_enabled:
|
|
362
367
|
environment.update(
|
|
@@ -57,8 +57,13 @@ WEBUI_IMAGE = "ghcr.io/open-webui/open-webui:main"
|
|
|
57
57
|
KOKORO_IMAGE_CPU = "ghcr.io/remsky/kokoro-fastapi-cpu:latest"
|
|
58
58
|
KOKORO_IMAGE_GPU = "ghcr.io/remsky/kokoro-fastapi-gpu:latest"
|
|
59
59
|
N8N_IMAGE = "docker.n8n.io/n8nio/n8n"
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
# Model slots (RTX 4090-sized, validated April 2026). Primary = best available for
|
|
61
|
+
# the role; secondary = best uncensored alternative. See README "Local LLM stack"
|
|
62
|
+
# and the generated .ai-shell.yaml for per-slot rationale and caveats.
|
|
63
|
+
DEFAULT_PRIMARY_CHAT_MODEL = "qwen3.5:27b"
|
|
64
|
+
DEFAULT_SECONDARY_CHAT_MODEL = "huihui_ai/qwen3.5-abliterated:27b"
|
|
65
|
+
DEFAULT_PRIMARY_CODING_MODEL = "qwen3-coder:30b-a3b-q4_K_M"
|
|
66
|
+
DEFAULT_SECONDARY_CODING_MODEL = "huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M"
|
|
62
67
|
DEFAULT_CONTEXT_SIZE = 32768
|
|
63
68
|
DEFAULT_OLLAMA_PORT = 11434
|
|
64
69
|
DEFAULT_WEBUI_PORT = 3000
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# .ai-shell.yaml - Project configuration for ai-shell
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Priority (highest wins): CLI flags > env vars > this file > global config > defaults
|
|
5
|
+
# Global config: ~/.ai-shell.yaml (applies to all projects)
|
|
6
|
+
# Full docs: https://github.com/svange/augint-shell#local-llm-stack
|
|
7
|
+
|
|
8
|
+
# -----------------------------------------------------------------------------
|
|
9
|
+
# llm - Local LLM stack (Ollama + Open WebUI + optional TTS / n8n)
|
|
10
|
+
# -----------------------------------------------------------------------------
|
|
11
|
+
# Four role-specific model slots. Primary = best-available; secondary =
|
|
12
|
+
# best uncensored (abliterated) alternative of the same base. Chat slots
|
|
13
|
+
# route to Open WebUI (DEFAULT_MODELS); coding slots route to OpenCode /
|
|
14
|
+
# Aider (--model). `ai-shell llm pull` pulls all 4 slots plus any
|
|
15
|
+
# `extra_models` entries, deduped. See README for per-slot rationale and
|
|
16
|
+
# caveats (Qwen3.5 Ollama tool-call bug, num_ctx trap, tool-count cliff).
|
|
17
|
+
llm:
|
|
18
|
+
primary_chat_model: qwen3.5:27b
|
|
19
|
+
secondary_chat_model: huihui_ai/qwen3.5-abliterated:27b
|
|
20
|
+
primary_coding_model: qwen3-coder:30b-a3b-q4_K_M
|
|
21
|
+
secondary_coding_model: huihui_ai/qwen3-coder-abliterated:30b-a3b-instruct-q4_K_M
|
|
22
|
+
context_size: 32768
|
|
23
|
+
# Additional Ollama tags to pull alongside the 4 slots (deduped).
|
|
24
|
+
# Uncomment any line to enable -- indentation is already correct.
|
|
25
|
+
extra_models:
|
|
26
|
+
# - llama3.1:8b # ~5 GB fast general chat
|
|
27
|
+
# - llama3.2:latest # ~2 GB Llama 3.2 3B, very fast
|
|
28
|
+
# - dolphin3:8b # ~5 GB uncensored Llama 3.1 8B
|
|
29
|
+
# - qwen3:30b-a3b-instruct-2507-q4_K_M # ~19 GB Qwen3 MoE chat alt (~196 tok/s)
|
|
30
|
+
# - qwen2.5-coder:32b-q4_k_m # ~19 GB Qwen2.5-Coder (previous gen)
|
|
31
|
+
# - qwen2.5-coder:14b-instruct # ~9 GB smaller Qwen2.5-Coder
|
|
32
|
+
|
|
33
|
+
# -----------------------------------------------------------------------------
|
|
34
|
+
# aws - AWS profile + region. Uncomment to override defaults.
|
|
35
|
+
# -----------------------------------------------------------------------------
|
|
36
|
+
# ai_profile: AWS profile for infra tools (terraform/cdk). Sets AWS_PROFILE
|
|
37
|
+
# in the container.
|
|
38
|
+
# bedrock_profile: AWS profile for Bedrock LLM calls (--aws mode). Often a
|
|
39
|
+
# different account than ai_profile.
|
|
40
|
+
# region: Region for Bedrock. Default: us-east-1
|
|
41
|
+
# Auth: ~/.aws is bind-mounted read-write. `aws sso login` on the host as needed.
|
|
42
|
+
#
|
|
43
|
+
# aws:
|
|
44
|
+
# ai_profile: my-infra-account
|
|
45
|
+
# bedrock_profile: my-ai-account
|
|
46
|
+
# region: us-east-1
|
|
47
|
+
|
|
48
|
+
# -----------------------------------------------------------------------------
|
|
49
|
+
# claude - Claude Code backend selection. Uncomment to use Bedrock instead of
|
|
50
|
+
# the Anthropic API. Equivalent per-session flag: `ai-shell claude --aws`.
|
|
51
|
+
# -----------------------------------------------------------------------------
|
|
52
|
+
# claude:
|
|
53
|
+
# provider: aws
|
|
54
|
+
|
|
55
|
+
# -----------------------------------------------------------------------------
|
|
56
|
+
# container - Docker image, env, mounts, ports. Uncomment to override.
|
|
57
|
+
# -----------------------------------------------------------------------------
|
|
58
|
+
# image / image_tag: Override the default svange/augint-shell:latest image.
|
|
59
|
+
# extra_env: Additional env vars injected into the dev container.
|
|
60
|
+
# extra_volumes: Additional bind mounts ("/host:/container" or ":/path:ro").
|
|
61
|
+
# ports: Extra host ports to expose on the dev container.
|
|
62
|
+
#
|
|
63
|
+
# container:
|
|
64
|
+
# image: svange/augint-shell
|
|
65
|
+
# image_tag: latest
|
|
66
|
+
# extra_env:
|
|
67
|
+
# MY_VAR: value
|
|
68
|
+
# extra_volumes:
|
|
69
|
+
# - /host/path:/container/path
|
|
70
|
+
# ports:
|
|
71
|
+
# - 9000
|
|
72
|
+
# - 9229
|
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
# =============================================================================
|
|
2
|
-
# .ai-shell.yaml - Project configuration for ai-shell
|
|
3
|
-
# =============================================================================
|
|
4
|
-
# Uncomment and modify settings you want to override.
|
|
5
|
-
# Priority (highest wins): CLI flags > env vars > this file > global config > defaults
|
|
6
|
-
# Global config: ~/.config/ai-shell/config.yaml (same format, applies to all projects)
|
|
7
|
-
# Docs: https://github.com/svange/augint-shell
|
|
8
|
-
|
|
9
|
-
# =============================================================================
|
|
10
|
-
# aws - Amazon Web Services configuration
|
|
11
|
-
# =============================================================================
|
|
12
|
-
# ai_profile: AWS profile for the AI's working environment (aws cli, terraform,
|
|
13
|
-
# cdk). Sets AWS_PROFILE in the container. This is the account the AI uses
|
|
14
|
-
# when running infrastructure commands.
|
|
15
|
-
# Override with env var: AI_SHELL_AI_PROFILE
|
|
16
|
-
#
|
|
17
|
-
# bedrock_profile: AWS profile for Bedrock LLM API calls. Often a different
|
|
18
|
-
# account than ai_profile. Overrides AWS_PROFILE specifically for AI tool
|
|
19
|
-
# processes launched with `--aws`.
|
|
20
|
-
# Override with env var: AI_SHELL_BEDROCK_PROFILE
|
|
21
|
-
# Override per-session with: --profile <name> on the CLI
|
|
22
|
-
#
|
|
23
|
-
# region: AWS region for Bedrock API calls. Default: us-east-1
|
|
24
|
-
# Override with env var: AI_SHELL_AWS_REGION
|
|
25
|
-
#
|
|
26
|
-
# Authentication: ~/.aws is bind-mounted into the container (read-write).
|
|
27
|
-
# SSO, credential files, and config are available automatically.
|
|
28
|
-
# If SSO tokens expire, run 'aws sso login --profile <name>' on the host.
|
|
29
|
-
#
|
|
30
|
-
# aws:
|
|
31
|
-
# ai_profile: my-infra-account
|
|
32
|
-
# bedrock_profile: my-ai-account
|
|
33
|
-
# region: us-east-1
|
|
34
|
-
|
|
35
|
-
# =============================================================================
|
|
36
|
-
# claude - Claude Code settings
|
|
37
|
-
# =============================================================================
|
|
38
|
-
# provider: API backend for Claude Code.
|
|
39
|
-
# "anthropic" - Direct Anthropic API (default, uses ~/.claude credentials)
|
|
40
|
-
# "aws" - Amazon Bedrock (uses bedrock_profile from aws section)
|
|
41
|
-
#
|
|
42
|
-
# When provider is "aws":
|
|
43
|
-
# - CLAUDE_CODE_USE_BEDROCK=1 is set in the environment
|
|
44
|
-
# - AWS_PROFILE is set to bedrock_profile for Claude's process
|
|
45
|
-
# - Quick switch with CLI: ai-shell claude --aws
|
|
46
|
-
# - Override per-session: ai-shell claude --aws --profile <name>
|
|
47
|
-
# - Tip: pin Bedrock model versions with ANTHROPIC_DEFAULT_SONNET_MODEL env var
|
|
48
|
-
#
|
|
49
|
-
# Override with env var: AI_SHELL_CLAUDE_PROVIDER
|
|
50
|
-
#
|
|
51
|
-
# claude:
|
|
52
|
-
# provider: aws
|
|
53
|
-
|
|
54
|
-
# Codex runtime note:
|
|
55
|
-
# ai-shell does not manage Codex's own config file, but `ai-shell codex --aws`
|
|
56
|
-
# launches Codex with Bedrock by injecting `CLAUDE_CODE_USE_BEDROCK=1` and
|
|
57
|
-
# setting `AWS_PROFILE` to `aws.bedrock_profile` (or `--profile` if passed).
|
|
58
|
-
# Local-LLM Codex configuration is not managed in this file.
|
|
59
|
-
|
|
60
|
-
# =============================================================================
|
|
61
|
-
# container - Docker container settings
|
|
62
|
-
# =============================================================================
|
|
63
|
-
# image: Docker image (default: svange/augint-shell)
|
|
64
|
-
# image_tag: Image tag (default: current ai-shell version)
|
|
65
|
-
# extra_env: Additional environment variables for the container
|
|
66
|
-
# extra_volumes: Additional bind mounts ("/host/path:/container/path" or ":/path:ro")
|
|
67
|
-
# ports: Additional ports to expose (extends the default dev port set)
|
|
68
|
-
#
|
|
69
|
-
# container:
|
|
70
|
-
# image: svange/augint-shell
|
|
71
|
-
# image_tag: latest
|
|
72
|
-
# extra_env:
|
|
73
|
-
# MY_VAR: value
|
|
74
|
-
# extra_volumes:
|
|
75
|
-
# - /host/path:/container/path
|
|
76
|
-
# ports:
|
|
77
|
-
# - 9000
|
|
78
|
-
# - 9229
|
|
79
|
-
|
|
80
|
-
# =============================================================================
|
|
81
|
-
# llm - Local LLM settings (Ollama + Open WebUI)
|
|
82
|
-
# =============================================================================
|
|
83
|
-
# primary_model: Default Ollama model for inference
|
|
84
|
-
# fallback_model: Backup model if primary unavailable
|
|
85
|
-
# context_size: Context window in tokens (default: 32768)
|
|
86
|
-
# ollama_port: Host port for Ollama API (default: 11434)
|
|
87
|
-
# webui_port: Host port for Open WebUI (default: 3000)
|
|
88
|
-
# n8n_port: Host port for n8n workflow automation (default: 5678)
|
|
89
|
-
#
|
|
90
|
-
# Models are downloaded automatically by `ai-shell llm setup`, which:
|
|
91
|
-
# 1. Starts the Ollama container (GPU-enabled if an NVIDIA card is detected)
|
|
92
|
-
# 2. Runs `ollama pull <primary_model>` and `ollama pull <fallback_model>`
|
|
93
|
-
# 3. Applies the context-window Modelfile so both models run with num_ctx set
|
|
94
|
-
#
|
|
95
|
-
# To pull a model manually at any time:
|
|
96
|
-
# ai-shell llm shell # opens a bash shell inside the Ollama container
|
|
97
|
-
# ollama pull <model> # then run any ollama command directly
|
|
98
|
-
#
|
|
99
|
-
# ai-shell only manages the local LLM endpoint and generic runtime settings.
|
|
100
|
-
# Tool-specific config files for Codex, OpenCode, and Aider should be managed
|
|
101
|
-
# separately, for example via augint-opencodex.
|
|
102
|
-
# Keep tool-specific provider, model, auth, and permission settings out of this
|
|
103
|
-
# file. Put those in the generated tool config files instead.
|
|
104
|
-
#
|
|
105
|
-
# ─── RTX 4090 model guide (24 GiB VRAM) ─────────────────────────────────────
|
|
106
|
-
#
|
|
107
|
-
# General-chat / assistant models
|
|
108
|
-
# ─────────────────────────────────
|
|
109
|
-
# qwen3.5:27b ~15 GiB Q4_K_M fits on 4090 with headroom
|
|
110
|
-
# qwen3.5:27b-q5_k_m ~19 GiB Q5_K_M fits, higher quality
|
|
111
|
-
# qwen3.5:32b-q4_k_m ~19 GiB Q4_K_M tight but fits on 4090
|
|
112
|
-
#
|
|
113
|
-
# Uncensored / instruction-following variants
|
|
114
|
-
# ────────────────────────────────────────────
|
|
115
|
-
# dolphin3:8b ~5 GiB Dolphin 3.0 (uncensored Llama 3.1 8B)
|
|
116
|
-
# huihui_ai/llama3.3-abliterated ~16 GiB Llama 3.3 70B abliterated (uncensored chat)
|
|
117
|
-
# llama3.1:8b ~5 GiB Meta Llama 3.1 8B instruct
|
|
118
|
-
# llama3.1:8b-instruct-q4_k_m ~5 GiB Q4_K_M quantized
|
|
119
|
-
# llama3.2:latest ~2 GiB Meta Llama 3.2 3B (fast/small)
|
|
120
|
-
#
|
|
121
|
-
# Coding-heavy models
|
|
122
|
-
# ────────────────────
|
|
123
|
-
# qwen2.5-coder:32b-q4_k_m ~19 GiB Q4_K_M top coding quality on 4090
|
|
124
|
-
# qwen3:14b-q4_k_m ~9 GiB Q4_K_M fast coder with good accuracy
|
|
125
|
-
# qwen3:32b-q4_k_m ~19 GiB Q4_K_M best local coding on 4090
|
|
126
|
-
# qwen3-coder:30b-a3b-q4_K_M ~19 GiB Q4_K_M Qwen3-Coder 30B A3B (Mixture-of-Experts, ~3B active)
|
|
127
|
-
#
|
|
128
|
-
# Quick-start pull commands (run inside `ai-shell llm shell`):
|
|
129
|
-
# ollama pull qwen3.5:27b
|
|
130
|
-
# ollama pull qwen3-coder:30b-a3b-q4_K_M
|
|
131
|
-
# ollama pull huihui_ai/llama3.3-abliterated
|
|
132
|
-
# ollama pull dolphin3:8b
|
|
133
|
-
# ollama pull llama3.1:8b
|
|
134
|
-
# ollama pull qwen2.5-coder:32b-q4_k_m
|
|
135
|
-
# ollama pull qwen3:14b-q4_k_m
|
|
136
|
-
# ollama pull qwen3:32b-q4_k_m
|
|
137
|
-
#
|
|
138
|
-
# ─── RTX 5070 Ti model guide (12 GiB VRAM) ──────────────────────────────────
|
|
139
|
-
#
|
|
140
|
-
# Chat models
|
|
141
|
-
# ────────────
|
|
142
|
-
# qwen3.5:9b ~7 GiB Q4_K_M best chat, 256K ctx, multimodal
|
|
143
|
-
# huihui_ai/qwen3.5-abliterated:9b ~7 GiB Q4_K uncensored Qwen3.5 (abliterated)
|
|
144
|
-
#
|
|
145
|
-
# Coding models
|
|
146
|
-
# ──────────────
|
|
147
|
-
# qwen2.5-coder:14b-instruct ~9 GiB Q4_K_M largest dedicated coder that fits
|
|
148
|
-
# huihui_ai/qwen3.5-abliterated:9b-OmniCoder ~6 GiB Q4_K uncensored coding variant
|
|
149
|
-
#
|
|
150
|
-
# Quick-start pull commands (run inside `ai-shell llm shell`):
|
|
151
|
-
# ollama pull qwen3.5:9b
|
|
152
|
-
# ollama pull huihui_ai/qwen3.5-abliterated:9b
|
|
153
|
-
# ollama pull qwen2.5-coder:14b-instruct
|
|
154
|
-
# ollama pull huihui_ai/qwen3.5-abliterated:9b-OmniCoder
|
|
155
|
-
#
|
|
156
|
-
# After pulling, set primary_model (and fallback_model) below, then run:
|
|
157
|
-
# ai-shell llm setup # pulls models + applies context-window config
|
|
158
|
-
#
|
|
159
|
-
# llm:
|
|
160
|
-
# primary_model: qwen3-coder:30b-a3b-q4_K_M
|
|
161
|
-
# fallback_model: huihui_ai/llama3.3-abliterated
|
|
162
|
-
# context_size: 32768
|
|
163
|
-
# ollama_port: 11434
|
|
164
|
-
# webui_port: 3000
|
|
165
|
-
# n8n_port: 5678
|
|
166
|
-
#
|
|
167
|
-
# --- Example: 4090 coding-focused setup ---
|
|
168
|
-
# llm:
|
|
169
|
-
# primary_model: qwen2.5-coder:32b-q4_k_m
|
|
170
|
-
# fallback_model: qwen3:14b-q4_k_m
|
|
171
|
-
# context_size: 32768
|
|
172
|
-
#
|
|
173
|
-
# --- Example: 4090 general-chat setup ---
|
|
174
|
-
# llm:
|
|
175
|
-
# primary_model: qwen3.5:27b
|
|
176
|
-
# fallback_model: llama3.1:8b
|
|
177
|
-
# context_size: 32768
|
|
178
|
-
#
|
|
179
|
-
# --- Example: 4090 uncensored setup ---
|
|
180
|
-
# llm:
|
|
181
|
-
# primary_model: dolphin3:8b
|
|
182
|
-
# fallback_model: llama3.1:8b
|
|
183
|
-
# context_size: 32768
|
|
184
|
-
#
|
|
185
|
-
# --- Example: 5070 Ti coding-focused setup ---
|
|
186
|
-
# llm:
|
|
187
|
-
# primary_model: qwen2.5-coder:14b-instruct
|
|
188
|
-
# fallback_model: qwen3.5:9b
|
|
189
|
-
# context_size: 32768
|
|
190
|
-
#
|
|
191
|
-
# --- Example: 5070 Ti general-chat setup ---
|
|
192
|
-
# llm:
|
|
193
|
-
# primary_model: qwen3.5:9b
|
|
194
|
-
# fallback_model: huihui_ai/qwen3.5-abliterated:9b
|
|
195
|
-
# context_size: 32768
|
|
196
|
-
#
|
|
197
|
-
# --- Example: 5070 Ti uncensored-chat setup ---
|
|
198
|
-
# llm:
|
|
199
|
-
# primary_model: huihui_ai/qwen3.5-abliterated:9b
|
|
200
|
-
# fallback_model: dolphin3:8b
|
|
201
|
-
# context_size: 32768
|
|
202
|
-
#
|
|
203
|
-
# --- Example: 5070 Ti uncensored-coding setup ---
|
|
204
|
-
# llm:
|
|
205
|
-
# primary_model: huihui_ai/qwen3.5-abliterated:9b-OmniCoder
|
|
206
|
-
# fallback_model: qwen2.5-coder:14b-instruct
|
|
207
|
-
# context_size: 32768
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|