llmstack-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmstack/__init__.py +3 -0
- llmstack/__main__.py +5 -0
- llmstack/cli/__init__.py +0 -0
- llmstack/cli/app.py +87 -0
- llmstack/cli/commands/__init__.py +0 -0
- llmstack/cli/commands/doctor.py +72 -0
- llmstack/cli/commands/down.py +25 -0
- llmstack/cli/commands/init.py +66 -0
- llmstack/cli/commands/logs.py +25 -0
- llmstack/cli/commands/status.py +45 -0
- llmstack/cli/commands/up.py +30 -0
- llmstack/cli/console.py +13 -0
- llmstack/config/__init__.py +4 -0
- llmstack/config/loader.py +44 -0
- llmstack/config/presets/__init__.py +11 -0
- llmstack/config/presets/agent.py +13 -0
- llmstack/config/presets/chat.py +14 -0
- llmstack/config/presets/rag.py +10 -0
- llmstack/config/schema.py +76 -0
- llmstack/core/__init__.py +0 -0
- llmstack/core/hardware.py +131 -0
- llmstack/core/health.py +23 -0
- llmstack/core/resolver.py +49 -0
- llmstack/core/stack.py +207 -0
- llmstack/docker/__init__.py +0 -0
- llmstack/docker/manager.py +134 -0
- llmstack/gateway/Dockerfile +16 -0
- llmstack/gateway/__init__.py +0 -0
- llmstack/gateway/main.py +52 -0
- llmstack/gateway/middleware/__init__.py +0 -0
- llmstack/gateway/middleware/auth.py +32 -0
- llmstack/gateway/middleware/metrics.py +115 -0
- llmstack/gateway/proxy.py +58 -0
- llmstack/gateway/routes/__init__.py +0 -0
- llmstack/gateway/routes/chat.py +27 -0
- llmstack/gateway/routes/embeddings.py +17 -0
- llmstack/gateway/routes/health.py +55 -0
- llmstack/gateway/routes/models.py +16 -0
- llmstack/plugins/__init__.py +0 -0
- llmstack/plugins/loader.py +5 -0
- llmstack/plugins/spec.py +20 -0
- llmstack/services/__init__.py +0 -0
- llmstack/services/base.py +65 -0
- llmstack/services/cache/__init__.py +0 -0
- llmstack/services/cache/redis.py +33 -0
- llmstack/services/embeddings/__init__.py +0 -0
- llmstack/services/embeddings/tei.py +49 -0
- llmstack/services/gateway/__init__.py +0 -0
- llmstack/services/gateway/service.py +47 -0
- llmstack/services/inference/__init__.py +0 -0
- llmstack/services/inference/ollama.py +60 -0
- llmstack/services/inference/vllm.py +57 -0
- llmstack/services/observe/__init__.py +0 -0
- llmstack/services/observe/prometheus.py +168 -0
- llmstack/services/registry.py +53 -0
- llmstack/services/vectordb/__init__.py +0 -0
- llmstack/services/vectordb/qdrant.py +33 -0
- llmstack_cli-0.1.0.dist-info/METADATA +252 -0
- llmstack_cli-0.1.0.dist-info/RECORD +62 -0
- llmstack_cli-0.1.0.dist-info/WHEEL +4 -0
- llmstack_cli-0.1.0.dist-info/entry_points.txt +2 -0
- llmstack_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
llmstack/__init__.py
ADDED
llmstack/__main__.py
ADDED
llmstack/cli/__init__.py
ADDED
|
File without changes
|
llmstack/cli/app.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""CLI entry point — Typer application."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from llmstack import __version__
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(
|
|
11
|
+
name="llmstack",
|
|
12
|
+
help="One command. Full LLM stack. Zero config.",
|
|
13
|
+
no_args_is_help=True,
|
|
14
|
+
add_completion=False,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def version_callback(value: bool) -> None:
|
|
19
|
+
if value:
|
|
20
|
+
typer.echo(f"llmstack {__version__}")
|
|
21
|
+
raise typer.Exit()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@app.callback()
|
|
25
|
+
def main(
|
|
26
|
+
version: bool = typer.Option(
|
|
27
|
+
False, "--version", "-V",
|
|
28
|
+
callback=version_callback,
|
|
29
|
+
is_eager=True,
|
|
30
|
+
help="Show version and exit.",
|
|
31
|
+
),
|
|
32
|
+
) -> None:
|
|
33
|
+
"""LLMStack — One command. Full LLM stack. Zero config."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command()
|
|
37
|
+
def init(
|
|
38
|
+
preset: str = typer.Option(None, "--preset", "-p", help="Preset: chat, rag, agent"),
|
|
39
|
+
directory: str = typer.Option(None, "--dir", "-d", help="Target directory"),
|
|
40
|
+
) -> None:
|
|
41
|
+
"""Create a new llmstack.yaml configuration file."""
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from llmstack.cli.commands.init import init as _init
|
|
44
|
+
_init(preset=preset, directory=Path(directory) if directory else None)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command()
|
|
48
|
+
def up(
|
|
49
|
+
attach: bool = typer.Option(False, "--attach", "-a", help="Stream logs after starting"),
|
|
50
|
+
) -> None:
|
|
51
|
+
"""Start all services defined in llmstack.yaml."""
|
|
52
|
+
from llmstack.cli.commands.up import up as _up
|
|
53
|
+
_up(attach=attach)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def down(
|
|
58
|
+
volumes: bool = typer.Option(False, "--volumes", "-v", help="Also remove data volumes"),
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Stop and remove all llmstack services."""
|
|
61
|
+
from llmstack.cli.commands.down import down as _down
|
|
62
|
+
_down(volumes=volumes)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@app.command()
|
|
66
|
+
def status() -> None:
|
|
67
|
+
"""Show the status of all running llmstack services."""
|
|
68
|
+
from llmstack.cli.commands.status import status as _status
|
|
69
|
+
_status()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@app.command()
|
|
73
|
+
def logs(
|
|
74
|
+
service: str = typer.Argument(help="Service name (ollama, qdrant, redis)"),
|
|
75
|
+
follow: bool = typer.Option(True, "--follow/--no-follow", "-f"),
|
|
76
|
+
tail: int = typer.Option(50, "--tail", "-n"),
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Stream logs from a specific service."""
|
|
79
|
+
from llmstack.cli.commands.logs import logs as _logs
|
|
80
|
+
_logs(service=service, follow=follow, tail=tail)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@app.command()
|
|
84
|
+
def doctor() -> None:
|
|
85
|
+
"""Check system requirements and diagnose issues."""
|
|
86
|
+
from llmstack.cli.commands.doctor import doctor as _doctor
|
|
87
|
+
_doctor()
|
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""llmstack doctor — diagnose common issues."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import shutil
|
|
6
|
+
import socket
|
|
7
|
+
|
|
8
|
+
from llmstack.cli.console import console
|
|
9
|
+
from llmstack.core.hardware import detect_hardware
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _check_port(port: int) -> bool:
|
|
13
|
+
"""Return True if port is available."""
|
|
14
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
15
|
+
return s.connect_ex(("127.0.0.1", port)) != 0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def doctor() -> None:
|
|
19
|
+
"""Check system requirements and diagnose common issues."""
|
|
20
|
+
console.print("\n[bold]LLMStack Doctor[/]\n")
|
|
21
|
+
issues = 0
|
|
22
|
+
|
|
23
|
+
# Docker
|
|
24
|
+
if shutil.which("docker"):
|
|
25
|
+
console.print(" [green]PASS[/] Docker is installed")
|
|
26
|
+
else:
|
|
27
|
+
console.print(" [red]FAIL[/] Docker is not installed")
|
|
28
|
+
issues += 1
|
|
29
|
+
|
|
30
|
+
# Docker daemon
|
|
31
|
+
try:
|
|
32
|
+
import docker
|
|
33
|
+
client = docker.from_env()
|
|
34
|
+
client.ping()
|
|
35
|
+
console.print(" [green]PASS[/] Docker daemon is running")
|
|
36
|
+
except Exception:
|
|
37
|
+
console.print(" [red]FAIL[/] Docker daemon is not reachable")
|
|
38
|
+
issues += 1
|
|
39
|
+
|
|
40
|
+
# Hardware
|
|
41
|
+
hw = detect_hardware()
|
|
42
|
+
if hw.gpu_vendor != "none":
|
|
43
|
+
console.print(f" [green]PASS[/] GPU detected: {hw.gpu_name}")
|
|
44
|
+
if hw.gpu_vendor == "nvidia" and hw.docker_runtime != "nvidia":
|
|
45
|
+
console.print(" [yellow]WARN[/] nvidia-container-toolkit not found (GPU passthrough may not work)")
|
|
46
|
+
else:
|
|
47
|
+
console.print(" [yellow]WARN[/] No GPU detected (CPU inference only)")
|
|
48
|
+
|
|
49
|
+
console.print(f" [green]INFO[/] RAM: {hw.ram_mb // 1024} GB, CPU: {hw.cpu_cores} cores")
|
|
50
|
+
|
|
51
|
+
# Ports
|
|
52
|
+
for port, service in [(11434, "Ollama"), (6333, "Qdrant"), (6379, "Redis"), (8000, "Gateway")]:
|
|
53
|
+
if _check_port(port):
|
|
54
|
+
console.print(f" [green]PASS[/] Port {port} ({service}) is available")
|
|
55
|
+
else:
|
|
56
|
+
console.print(f" [yellow]WARN[/] Port {port} ({service}) is in use")
|
|
57
|
+
|
|
58
|
+
# Config
|
|
59
|
+
try:
|
|
60
|
+
from llmstack.config.loader import load_config
|
|
61
|
+
load_config()
|
|
62
|
+
console.print(" [green]PASS[/] llmstack.yaml is valid")
|
|
63
|
+
except FileNotFoundError:
|
|
64
|
+
console.print(" [yellow]WARN[/] No llmstack.yaml found (run 'llmstack init')")
|
|
65
|
+
except SystemExit:
|
|
66
|
+
console.print(" [red]FAIL[/] llmstack.yaml has validation errors")
|
|
67
|
+
issues += 1
|
|
68
|
+
|
|
69
|
+
if issues:
|
|
70
|
+
console.print(f"\n[error]{issues} issue(s) found.[/]")
|
|
71
|
+
else:
|
|
72
|
+
console.print("\n[success]All checks passed![/]")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""llmstack down — stop all services."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from llmstack.cli.console import console
|
|
8
|
+
from llmstack.docker.manager import DockerManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def down(
|
|
12
|
+
volumes: bool = typer.Option(False, "--volumes", "-v", help="Also remove data volumes"),
|
|
13
|
+
) -> None:
|
|
14
|
+
"""Stop and remove all llmstack services."""
|
|
15
|
+
docker = DockerManager()
|
|
16
|
+
stopped = docker.stop_all(remove_volumes=volumes)
|
|
17
|
+
|
|
18
|
+
if stopped:
|
|
19
|
+
for name in stopped:
|
|
20
|
+
console.print(f" [info]Stopped {name}[/]")
|
|
21
|
+
if volumes:
|
|
22
|
+
console.print("[warning]Volumes removed.[/]")
|
|
23
|
+
console.print("\n[success]All services stopped.[/]")
|
|
24
|
+
else:
|
|
25
|
+
console.print("[info]No llmstack services are running.[/]")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""llmstack init — create llmstack.yaml with smart defaults."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from llmstack.cli.console import console
|
|
11
|
+
from llmstack.config.loader import save_config, CONFIG_FILENAME
|
|
12
|
+
from llmstack.config.presets import PRESETS
|
|
13
|
+
from llmstack.config.schema import StackConfig
|
|
14
|
+
from llmstack.core.hardware import detect_hardware
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def init(
|
|
18
|
+
preset: Optional[str] = typer.Option(
|
|
19
|
+
None, "--preset", "-p",
|
|
20
|
+
help="Preset to use: chat, rag, agent",
|
|
21
|
+
),
|
|
22
|
+
directory: Optional[Path] = typer.Option(
|
|
23
|
+
None, "--dir", "-d",
|
|
24
|
+
help="Directory to create llmstack.yaml in",
|
|
25
|
+
),
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize a new llmstack.yaml configuration file."""
|
|
28
|
+
target = directory or Path.cwd()
|
|
29
|
+
|
|
30
|
+
if (target / CONFIG_FILENAME).exists():
|
|
31
|
+
console.print(f"[warning]{CONFIG_FILENAME} already exists. Use --dir to specify another location.[/]")
|
|
32
|
+
raise typer.Exit(1)
|
|
33
|
+
|
|
34
|
+
# Detect hardware
|
|
35
|
+
hw = detect_hardware()
|
|
36
|
+
console.print("\n[info]Hardware detected:[/]")
|
|
37
|
+
console.print(f" CPU: {hw.cpu_cores} cores")
|
|
38
|
+
console.print(f" RAM: {hw.ram_mb // 1024} GB")
|
|
39
|
+
if hw.gpu_vendor != "none":
|
|
40
|
+
console.print(f" GPU: {hw.gpu_name} ({hw.gpu_vram_mb // 1024} GB VRAM)")
|
|
41
|
+
else:
|
|
42
|
+
console.print(" GPU: none (will use CPU inference)")
|
|
43
|
+
|
|
44
|
+
# Pick config
|
|
45
|
+
if preset and preset in PRESETS:
|
|
46
|
+
config = PRESETS[preset].model_copy(deep=True)
|
|
47
|
+
console.print(f"\n[info]Using preset:[/] {preset}")
|
|
48
|
+
elif preset:
|
|
49
|
+
console.print(f"[error]Unknown preset '{preset}'. Available: {', '.join(PRESETS.keys())}[/]")
|
|
50
|
+
raise typer.Exit(1)
|
|
51
|
+
else:
|
|
52
|
+
config = StackConfig()
|
|
53
|
+
console.print("\n[info]Using default configuration[/]")
|
|
54
|
+
|
|
55
|
+
# Auto-resolve backend hint
|
|
56
|
+
if hw.gpu_vendor == "nvidia" and hw.gpu_vram_mb >= 16_000:
|
|
57
|
+
config.models.chat.backend = "vllm"
|
|
58
|
+
console.print(" Backend: [success]vLLM[/] (NVIDIA GPU detected)")
|
|
59
|
+
else:
|
|
60
|
+
config.models.chat.backend = "ollama"
|
|
61
|
+
console.print(" Backend: [success]Ollama[/]")
|
|
62
|
+
|
|
63
|
+
# Save
|
|
64
|
+
path = save_config(config, target)
|
|
65
|
+
console.print(f"\n[success]Created {path}[/]")
|
|
66
|
+
console.print("Next: edit the config if needed, then run [bold]llmstack up[/]")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""llmstack logs — stream logs from a service."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from llmstack.cli.console import console
|
|
8
|
+
from llmstack.docker.manager import DockerManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def logs(
|
|
12
|
+
service: str = typer.Argument(help="Service name (e.g., ollama, qdrant, redis)"),
|
|
13
|
+
follow: bool = typer.Option(True, "--follow/--no-follow", "-f", help="Follow log output"),
|
|
14
|
+
tail: int = typer.Option(50, "--tail", "-n", help="Number of lines to show"),
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Stream logs from a specific service."""
|
|
17
|
+
docker = DockerManager()
|
|
18
|
+
try:
|
|
19
|
+
for line in docker.stream_logs(service, follow=follow, tail=tail):
|
|
20
|
+
console.print(line, end="", highlight=False)
|
|
21
|
+
except ValueError as exc:
|
|
22
|
+
console.print(f"[error]{exc}[/]")
|
|
23
|
+
raise typer.Exit(1)
|
|
24
|
+
except KeyboardInterrupt:
|
|
25
|
+
pass
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""llmstack status — show health of all services."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
|
|
7
|
+
from llmstack.cli.console import console
|
|
8
|
+
from llmstack.docker.manager import DockerManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def status() -> None:
|
|
12
|
+
"""Show the status of all running llmstack services."""
|
|
13
|
+
docker = DockerManager()
|
|
14
|
+
services = docker.list_services()
|
|
15
|
+
|
|
16
|
+
if not services:
|
|
17
|
+
console.print("[info]No llmstack services are running.[/]")
|
|
18
|
+
console.print("Run [bold]llmstack up[/] to start.")
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
table = Table(title="LLMStack Status", show_header=True)
|
|
22
|
+
table.add_column("Service", style="cyan")
|
|
23
|
+
table.add_column("Container")
|
|
24
|
+
table.add_column("Status")
|
|
25
|
+
table.add_column("Ports")
|
|
26
|
+
|
|
27
|
+
for svc in services:
|
|
28
|
+
status_style = "green" if svc["status"] == "running" else "red"
|
|
29
|
+
ports_str = ""
|
|
30
|
+
if svc["ports"]:
|
|
31
|
+
port_list = []
|
|
32
|
+
for container_port, host_bindings in svc["ports"].items():
|
|
33
|
+
if host_bindings:
|
|
34
|
+
for binding in host_bindings:
|
|
35
|
+
port_list.append(f"{binding['HostPort']}->{container_port}")
|
|
36
|
+
ports_str = ", ".join(port_list)
|
|
37
|
+
|
|
38
|
+
table.add_row(
|
|
39
|
+
svc["name"],
|
|
40
|
+
svc["container_id"],
|
|
41
|
+
f"[{status_style}]{svc['status']}[/]",
|
|
42
|
+
ports_str,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
console.print(table)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""llmstack up — boot the full stack."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from llmstack.cli.console import console
|
|
10
|
+
from llmstack.config.loader import load_config
|
|
11
|
+
from llmstack.core.stack import Stack
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def up(
|
|
15
|
+
attach: bool = typer.Option(False, "--attach", "-a", help="Stream logs after starting"),
|
|
16
|
+
) -> None:
|
|
17
|
+
"""Start all services defined in llmstack.yaml."""
|
|
18
|
+
config = load_config()
|
|
19
|
+
stack = Stack(config)
|
|
20
|
+
|
|
21
|
+
console.print("\n[bold]Starting LLMStack...[/]\n")
|
|
22
|
+
asyncio.run(stack.up())
|
|
23
|
+
|
|
24
|
+
if attach:
|
|
25
|
+
console.print("[info]Streaming logs (Ctrl+C to detach)...[/]\n")
|
|
26
|
+
try:
|
|
27
|
+
for line in stack.docker.stream_logs("ollama", follow=True, tail=10):
|
|
28
|
+
console.print(line, end="")
|
|
29
|
+
except KeyboardInterrupt:
|
|
30
|
+
console.print("\n[info]Detached.[/]")
|
llmstack/cli/console.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Rich console singleton and display helpers."""
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.theme import Theme
|
|
5
|
+
|
|
6
|
+
theme = Theme({
|
|
7
|
+
"info": "cyan",
|
|
8
|
+
"success": "green",
|
|
9
|
+
"warning": "yellow",
|
|
10
|
+
"error": "red bold",
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
console = Console(theme=theme)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Find, read, and validate llmstack.yaml."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from pydantic import ValidationError
|
|
9
|
+
|
|
10
|
+
from llmstack.config.schema import StackConfig
|
|
11
|
+
|
|
12
|
+
CONFIG_FILENAME = "llmstack.yaml"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def find_config(directory: Path | None = None) -> Path:
|
|
16
|
+
"""Locate llmstack.yaml in the given or current directory."""
|
|
17
|
+
base = directory or Path.cwd()
|
|
18
|
+
path = base / CONFIG_FILENAME
|
|
19
|
+
if not path.exists():
|
|
20
|
+
raise FileNotFoundError(
|
|
21
|
+
f"{CONFIG_FILENAME} not found in {base}. Run 'llmstack init' first."
|
|
22
|
+
)
|
|
23
|
+
return path
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_config(directory: Path | None = None) -> StackConfig:
|
|
27
|
+
"""Load and validate llmstack.yaml, returning a StackConfig."""
|
|
28
|
+
path = find_config(directory)
|
|
29
|
+
raw = yaml.safe_load(path.read_text())
|
|
30
|
+
if raw is None:
|
|
31
|
+
raw = {}
|
|
32
|
+
try:
|
|
33
|
+
return StackConfig(**raw)
|
|
34
|
+
except ValidationError as exc:
|
|
35
|
+
raise SystemExit(f"Invalid {CONFIG_FILENAME}:\n{exc}") from exc
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def save_config(config: StackConfig, directory: Path | None = None) -> Path:
|
|
39
|
+
"""Write a StackConfig to llmstack.yaml."""
|
|
40
|
+
base = directory or Path.cwd()
|
|
41
|
+
path = base / CONFIG_FILENAME
|
|
42
|
+
data = config.model_dump(mode="json", exclude_defaults=False)
|
|
43
|
+
path.write_text(yaml.dump(data, default_flow_style=False, sort_keys=False, allow_unicode=True))
|
|
44
|
+
return path
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from llmstack.config.presets.chat import CHAT_PRESET
|
|
2
|
+
from llmstack.config.presets.rag import RAG_PRESET
|
|
3
|
+
from llmstack.config.presets.agent import AGENT_PRESET
|
|
4
|
+
|
|
5
|
+
PRESETS = {
|
|
6
|
+
"chat": CHAT_PRESET,
|
|
7
|
+
"rag": RAG_PRESET,
|
|
8
|
+
"agent": AGENT_PRESET,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
__all__ = ["PRESETS", "CHAT_PRESET", "RAG_PRESET", "AGENT_PRESET"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Agent preset — large model + long context for agentic workflows."""
|
|
2
|
+
|
|
3
|
+
from llmstack.config.schema import (
|
|
4
|
+
StackConfig, ModelsConfig, ModelSpec, EmbeddingSpec, GatewayConfig,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
AGENT_PRESET = StackConfig(
|
|
8
|
+
models=ModelsConfig(
|
|
9
|
+
chat=ModelSpec(name="llama3.1:70b", backend="auto", context_length=16384),
|
|
10
|
+
embeddings=EmbeddingSpec(name="bge-m3"),
|
|
11
|
+
),
|
|
12
|
+
gateway=GatewayConfig(rate_limit="30/min", request_timeout=300),
|
|
13
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Chat preset — minimal setup for conversational AI."""
|
|
2
|
+
|
|
3
|
+
from llmstack.config.schema import (
|
|
4
|
+
StackConfig, ModelsConfig, ModelSpec, EmbeddingSpec,
|
|
5
|
+
ObserveConfig,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
CHAT_PRESET = StackConfig(
|
|
9
|
+
models=ModelsConfig(
|
|
10
|
+
chat=ModelSpec(name="llama3.2", backend="auto"),
|
|
11
|
+
embeddings=EmbeddingSpec(name="bge-m3"),
|
|
12
|
+
),
|
|
13
|
+
observe=ObserveConfig(metrics=False),
|
|
14
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""RAG preset — chat + vector search + document ingestion."""
|
|
2
|
+
|
|
3
|
+
from llmstack.config.schema import StackConfig, ModelsConfig, ModelSpec, EmbeddingSpec
|
|
4
|
+
|
|
5
|
+
RAG_PRESET = StackConfig(
|
|
6
|
+
models=ModelsConfig(
|
|
7
|
+
chat=ModelSpec(name="llama3.2", backend="auto", context_length=8192),
|
|
8
|
+
embeddings=EmbeddingSpec(name="bge-m3"),
|
|
9
|
+
),
|
|
10
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Pydantic v2 models for llmstack.yaml configuration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ModelSpec(BaseModel):
|
|
11
|
+
name: str = "llama3.2"
|
|
12
|
+
backend: Literal["auto", "ollama", "vllm"] = "auto"
|
|
13
|
+
quantization: str | None = None
|
|
14
|
+
gpu_layers: int = -1
|
|
15
|
+
context_length: int = 8192
|
|
16
|
+
extra_args: dict = Field(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EmbeddingSpec(BaseModel):
|
|
20
|
+
name: str = "bge-m3"
|
|
21
|
+
backend: Literal["auto", "tei"] = "auto"
|
|
22
|
+
dimensions: int | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ModelsConfig(BaseModel):
|
|
26
|
+
chat: ModelSpec = Field(default_factory=ModelSpec)
|
|
27
|
+
embeddings: EmbeddingSpec = Field(default_factory=EmbeddingSpec)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VectorDBConfig(BaseModel):
|
|
31
|
+
provider: Literal["qdrant"] = "qdrant"
|
|
32
|
+
port: int = 6333
|
|
33
|
+
storage_path: str = "./data/vectors"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CacheConfig(BaseModel):
|
|
37
|
+
provider: Literal["redis"] = "redis"
|
|
38
|
+
port: int = 6379
|
|
39
|
+
max_memory: str = "256mb"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ServicesConfig(BaseModel):
|
|
43
|
+
vectors: VectorDBConfig = Field(default_factory=VectorDBConfig)
|
|
44
|
+
cache: CacheConfig = Field(default_factory=CacheConfig)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class GatewayConfig(BaseModel):
|
|
48
|
+
port: int = 8000
|
|
49
|
+
auth: Literal["none", "api_key"] = "api_key"
|
|
50
|
+
api_keys: list[str] = Field(default_factory=list)
|
|
51
|
+
rate_limit: str = "100/min"
|
|
52
|
+
cors: list[str] = Field(default_factory=lambda: ["*"])
|
|
53
|
+
request_timeout: int = 120
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ObserveConfig(BaseModel):
|
|
57
|
+
metrics: bool = True
|
|
58
|
+
dashboard_port: int = 8080
|
|
59
|
+
retention: str = "7d"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class DockerConfig(BaseModel):
|
|
63
|
+
network: str = "llmstack_net"
|
|
64
|
+
gpu: Literal["auto", "true", "false"] = "auto"
|
|
65
|
+
data_dir: str = "~/.llmstack/data"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class StackConfig(BaseModel):
|
|
69
|
+
"""Root config — 1:1 mapping with llmstack.yaml."""
|
|
70
|
+
|
|
71
|
+
version: str = "1"
|
|
72
|
+
models: ModelsConfig = Field(default_factory=ModelsConfig)
|
|
73
|
+
services: ServicesConfig = Field(default_factory=ServicesConfig)
|
|
74
|
+
gateway: GatewayConfig = Field(default_factory=GatewayConfig)
|
|
75
|
+
observe: ObserveConfig = Field(default_factory=ObserveConfig)
|
|
76
|
+
docker: DockerConfig = Field(default_factory=DockerConfig)
|
|
File without changes
|