sage-ai-cli 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sage_ai_cli-1.0.0/PKG-INFO +34 -0
- sage_ai_cli-1.0.0/README.md +66 -0
- sage_ai_cli-1.0.0/backend/__init__.py +1 -0
- sage_ai_cli-1.0.0/backend/app.py +346 -0
- sage_ai_cli-1.0.0/backend/auto_updater.py +210 -0
- sage_ai_cli-1.0.0/backend/config.py +33 -0
- sage_ai_cli-1.0.0/backend/conversations.py +77 -0
- sage_ai_cli-1.0.0/backend/github_client.py +231 -0
- sage_ai_cli-1.0.0/backend/hardware.py +66 -0
- sage_ai_cli-1.0.0/backend/model_catalog.py +82 -0
- sage_ai_cli-1.0.0/backend/model_registry.py +344 -0
- sage_ai_cli-1.0.0/backend/prompt_engine.py +136 -0
- sage_ai_cli-1.0.0/backend/runtime_manager.py +37 -0
- sage_ai_cli-1.0.0/backend/runtimes/__init__.py +1 -0
- sage_ai_cli-1.0.0/backend/runtimes/base.py +27 -0
- sage_ai_cli-1.0.0/backend/runtimes/llama_cpp_runtime.py +62 -0
- sage_ai_cli-1.0.0/backend/runtimes/onnx_runtime.py +165 -0
- sage_ai_cli-1.0.0/backend/runtimes/transformers_runtime.py +69 -0
- sage_ai_cli-1.0.0/backend/runtimes/vllm_runtime.py +48 -0
- sage_ai_cli-1.0.0/backend/schemas.py +114 -0
- sage_ai_cli-1.0.0/pyproject.toml +59 -0
- sage_ai_cli-1.0.0/sage/__init__.py +3 -0
- sage_ai_cli-1.0.0/sage/config.py +181 -0
- sage_ai_cli-1.0.0/sage/core/__init__.py +1 -0
- sage_ai_cli-1.0.0/sage/core/engine.py +86 -0
- sage_ai_cli-1.0.0/sage/core/renderer.py +150 -0
- sage_ai_cli-1.0.0/sage/core/router.py +154 -0
- sage_ai_cli-1.0.0/sage/main.py +639 -0
- sage_ai_cli-1.0.0/sage/providers/__init__.py +5 -0
- sage_ai_cli-1.0.0/sage/providers/base.py +59 -0
- sage_ai_cli-1.0.0/sage/providers/gemini.py +127 -0
- sage_ai_cli-1.0.0/sage/providers/llama_cpp.py +118 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/PKG-INFO +34 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/SOURCES.txt +49 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/dependency_links.txt +1 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/entry_points.txt +2 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/requires.txt +23 -0
- sage_ai_cli-1.0.0/sage_ai_cli.egg-info/top_level.txt +2 -0
- sage_ai_cli-1.0.0/setup.cfg +4 -0
- sage_ai_cli-1.0.0/tests/test_api.py +153 -0
- sage_ai_cli-1.0.0/tests/test_auto_updater.py +65 -0
- sage_ai_cli-1.0.0/tests/test_cli.py +236 -0
- sage_ai_cli-1.0.0/tests/test_config.py +32 -0
- sage_ai_cli-1.0.0/tests/test_conversations.py +67 -0
- sage_ai_cli-1.0.0/tests/test_download_script.py +211 -0
- sage_ai_cli-1.0.0/tests/test_github_client.py +70 -0
- sage_ai_cli-1.0.0/tests/test_hardware.py +19 -0
- sage_ai_cli-1.0.0/tests/test_model_catalog.py +119 -0
- sage_ai_cli-1.0.0/tests/test_model_registry.py +161 -0
- sage_ai_cli-1.0.0/tests/test_prompt_engine.py +119 -0
- sage_ai_cli-1.0.0/tests/test_schemas.py +142 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sage-ai-cli
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Sage — a local-first AI coding CLI (like Claude Code, using free/open models)
|
|
5
|
+
Author: Layne Faler
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/laynef/claude-ai-clone
|
|
8
|
+
Keywords: ai,cli,llm,coding,local,gemini,gguf,llama
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Requires-Dist: typer>=0.12.0
|
|
17
|
+
Requires-Dist: rich>=13.0.0
|
|
18
|
+
Requires-Dist: httpx>=0.27.0
|
|
19
|
+
Provides-Extra: local
|
|
20
|
+
Requires-Dist: llama-cpp-python>=0.2.90; extra == "local"
|
|
21
|
+
Provides-Extra: server
|
|
22
|
+
Requires-Dist: fastapi>=0.115.0; extra == "server"
|
|
23
|
+
Requires-Dist: uvicorn[standard]>=0.30.0; extra == "server"
|
|
24
|
+
Requires-Dist: pydantic>=2.9.0; extra == "server"
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.5.0; extra == "server"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: llama-cpp-python>=0.2.90; extra == "all"
|
|
28
|
+
Requires-Dist: fastapi>=0.115.0; extra == "all"
|
|
29
|
+
Requires-Dist: uvicorn[standard]>=0.30.0; extra == "all"
|
|
30
|
+
Requires-Dist: pydantic>=2.9.0; extra == "all"
|
|
31
|
+
Requires-Dist: pydantic-settings>=2.5.0; extra == "all"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
34
|
+
Requires-Dist: httpx>=0.27.0; extra == "dev"
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Local AI Platform
|
|
2
|
+
|
|
3
|
+
## File Structure
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
ai-platform/
|
|
7
|
+
├── backend/
|
|
8
|
+
├── frontend/
|
|
9
|
+
├── cli/
|
|
10
|
+
├── models/
|
|
11
|
+
└── config/
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Model Sources
|
|
15
|
+
|
|
16
|
+
| Model | Runtime | Source | License | Approx Size | Suggested Hardware |
|
|
17
|
+
|---|---|---|---|---|---|
|
|
18
|
+
| Llama 3.2 3B Instruct GGUF Q4_K_M | llama.cpp | https://github.com/ggml-org/llama.cpp | Meta Llama 3.2 Community License | ~2.0 GB | 8 GB RAM CPU |
|
|
19
|
+
| Qwen2.5 Coder 7B Instruct GGUF Q4_K_M | llama.cpp | https://github.com/QwenLM/Qwen2.5-Coder | Apache-2.0 | ~4.5 GB | 16 GB RAM CPU |
|
|
20
|
+
| Mistral 7B Instruct v0.3 | transformers | https://github.com/mistralai/mistral-inference | Apache-2.0 | ~13 GB fp16 | 16+ GB VRAM or 32 GB RAM |
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
1. Backend:
|
|
25
|
+
```bash
|
|
26
|
+
cd ai-platform
|
|
27
|
+
python -m venv .venv
|
|
28
|
+
source .venv/bin/activate
|
|
29
|
+
pip install -r backend/requirements.txt
|
|
30
|
+
cp .env.example .env
|
|
31
|
+
PYTHONPATH=ai-platform uvicorn backend.app:app --host 0.0.0.0 --port 8090
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
2. Frontend:
|
|
35
|
+
```bash
|
|
36
|
+
cd ai-platform/frontend
|
|
37
|
+
npm install
|
|
38
|
+
npm run dev
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
3. CLI:
|
|
42
|
+
```bash
|
|
43
|
+
cd ai-platform/cli
|
|
44
|
+
pip install .
|
|
45
|
+
ai --host http://127.0.0.1:8090 list
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Model Download Examples
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
ai --host http://127.0.0.1:8090 download \
|
|
52
|
+
--model-id llama32-q4 \
|
|
53
|
+
--runtime llama_cpp \
|
|
54
|
+
--url https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
ai --host http://127.0.0.1:8090 run llama32-q4 --threads 8
|
|
59
|
+
ai --host http://127.0.0.1:8090 chat --model llama32-q4 --temperature 0.2 --max-tokens 512
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Privacy
|
|
63
|
+
|
|
64
|
+
- All inference is local.
|
|
65
|
+
- No telemetry is implemented.
|
|
66
|
+
- After model download setup, inference requires no external API calls.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fastapi import FastAPI, HTTPException
|
|
9
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
10
|
+
from fastapi.responses import StreamingResponse
|
|
11
|
+
from fastapi.staticfiles import StaticFiles
|
|
12
|
+
|
|
13
|
+
from .auto_updater import AutoUpdater
|
|
14
|
+
from .config import runtime_defaults, settings
|
|
15
|
+
from .conversations import ConversationStore
|
|
16
|
+
from .github_client import discover_model_assets, parse_repo_url
|
|
17
|
+
from .hardware import detect_hardware_summary
|
|
18
|
+
from .model_catalog import filter_by_ram, get_recommended_models, list_catalog_models
|
|
19
|
+
from .model_registry import ModelRegistry
|
|
20
|
+
from .prompt_engine import build_messages
|
|
21
|
+
from .runtime_manager import RuntimeManager
|
|
22
|
+
from .schemas import (
|
|
23
|
+
AddSourceReq,
|
|
24
|
+
ChatMessage,
|
|
25
|
+
ChatReq,
|
|
26
|
+
CreateConversationReq,
|
|
27
|
+
DownloadModelReq,
|
|
28
|
+
ImportModelReq,
|
|
29
|
+
LoadModelReq,
|
|
30
|
+
SetActiveVersionReq,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
settings.ensure_dirs()
|
|
34
|
+
logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
|
|
35
|
+
logger = logging.getLogger("ai-platform")
|
|
36
|
+
|
|
37
|
+
app = FastAPI(title="Local AI Platform", version="3.0.0")
|
|
38
|
+
app.add_middleware(
|
|
39
|
+
CORSMiddleware,
|
|
40
|
+
allow_origins=["*"],
|
|
41
|
+
allow_credentials=False,
|
|
42
|
+
allow_methods=["*"],
|
|
43
|
+
allow_headers=["*"],
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
registry = ModelRegistry()
|
|
47
|
+
runtime_manager = RuntimeManager()
|
|
48
|
+
conversation_store = ConversationStore()
|
|
49
|
+
auto_updater = AutoUpdater(registry)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ── Health & Hardware ──────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@app.get("/health")
|
|
56
|
+
def health():
|
|
57
|
+
return {
|
|
58
|
+
"ok": True,
|
|
59
|
+
"version": "3.0.0",
|
|
60
|
+
"loaded_model_id": runtime_manager.loaded_model_id,
|
|
61
|
+
"loaded_runtime": runtime_manager.loaded_runtime,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@app.get("/hardware")
|
|
66
|
+
def hardware():
|
|
67
|
+
return detect_hardware_summary()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ── Model Catalog (free models from GitHub) ────────────────────
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@app.get("/catalog")
|
|
74
|
+
def catalog():
|
|
75
|
+
"""Browse the curated catalog of free, open-weight models."""
|
|
76
|
+
return {"ok": True, "models": list_catalog_models()}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.get("/catalog/recommended")
|
|
80
|
+
def catalog_recommended():
|
|
81
|
+
"""Get recommended starter models."""
|
|
82
|
+
return {"ok": True, "models": get_recommended_models()}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@app.get("/catalog/fits-ram")
|
|
86
|
+
def catalog_fits_ram(max_gb: float = 8.0):
|
|
87
|
+
"""Get models that fit within a RAM budget."""
|
|
88
|
+
return {"ok": True, "models": filter_by_ram(max_gb)}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ── Models ─────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@app.get("/models")
|
|
95
|
+
def list_models():
|
|
96
|
+
return {"ok": True, "models": [m.model_dump() for m in registry.list_models()]}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@app.get("/models/names")
|
|
100
|
+
def model_names():
|
|
101
|
+
"""Return all known model IDs (from registry + tracked sources) for autocomplete."""
|
|
102
|
+
ids = {m.model_id for m in registry.list_models()}
|
|
103
|
+
ids |= {s.model_id for s in auto_updater.list_sources()}
|
|
104
|
+
return {"ok": True, "names": sorted(ids)}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@app.get("/models/{model_id}")
|
|
108
|
+
def get_model(model_id: str):
|
|
109
|
+
try:
|
|
110
|
+
record = registry.get_model(model_id)
|
|
111
|
+
except KeyError as exc:
|
|
112
|
+
raise HTTPException(status_code=404, detail=str(exc))
|
|
113
|
+
return {"ok": True, "model": record.model_dump()}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@app.get("/models/{model_id}/versions")
|
|
117
|
+
def list_versions(model_id: str):
|
|
118
|
+
try:
|
|
119
|
+
versions = registry.list_versions(model_id)
|
|
120
|
+
except KeyError as exc:
|
|
121
|
+
raise HTTPException(status_code=404, detail=str(exc))
|
|
122
|
+
return {"ok": True, "versions": [v.model_dump() for v in versions]}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@app.post("/models/download")
|
|
126
|
+
def download_model(req: DownloadModelReq):
|
|
127
|
+
try:
|
|
128
|
+
record = registry.download_and_register(req)
|
|
129
|
+
except ValueError as exc:
|
|
130
|
+
raise HTTPException(status_code=400, detail=str(exc))
|
|
131
|
+
return {"ok": True, "model": record.model_dump()}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@app.post("/models/import")
|
|
135
|
+
def import_model(req: ImportModelReq):
|
|
136
|
+
try:
|
|
137
|
+
record = registry.import_local(req)
|
|
138
|
+
except (ValueError, FileNotFoundError) as exc:
|
|
139
|
+
raise HTTPException(status_code=400, detail=str(exc))
|
|
140
|
+
return {"ok": True, "model": record.model_dump()}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@app.post("/models/set-version")
|
|
144
|
+
def set_version(req: SetActiveVersionReq):
|
|
145
|
+
try:
|
|
146
|
+
record = registry.set_active_version(req.model_id, req.version)
|
|
147
|
+
except KeyError as exc:
|
|
148
|
+
raise HTTPException(status_code=404, detail=str(exc))
|
|
149
|
+
return {"ok": True, "model": record.model_dump()}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@app.post("/models/load")
|
|
153
|
+
def load_model(req: LoadModelReq):
|
|
154
|
+
try:
|
|
155
|
+
record = registry.get_model(req.model_id)
|
|
156
|
+
except KeyError as exc:
|
|
157
|
+
raise HTTPException(status_code=404, detail=str(exc))
|
|
158
|
+
|
|
159
|
+
if req.version:
|
|
160
|
+
version = next((v for v in record.versions if v.version == req.version), None)
|
|
161
|
+
if not version:
|
|
162
|
+
raise HTTPException(status_code=404, detail=f"Version {req.version} not found")
|
|
163
|
+
else:
|
|
164
|
+
version = record.active()
|
|
165
|
+
|
|
166
|
+
threads = req.threads or runtime_defaults.default_threads or None
|
|
167
|
+
runtime_manager.load(
|
|
168
|
+
record.runtime, req.model_id, version.file_path, threads=threads
|
|
169
|
+
)
|
|
170
|
+
return {
|
|
171
|
+
"ok": True,
|
|
172
|
+
"loaded_model_id": runtime_manager.loaded_model_id,
|
|
173
|
+
"loaded_runtime": runtime_manager.loaded_runtime,
|
|
174
|
+
"loaded_version": version.version,
|
|
175
|
+
"threads": threads,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ── Source Tracking ────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@app.get("/sources")
|
|
183
|
+
def list_sources():
|
|
184
|
+
sources = auto_updater.list_sources()
|
|
185
|
+
return {"ok": True, "sources": [asdict(s) for s in sources]}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.post("/sources/add")
|
|
189
|
+
def add_source(req: AddSourceReq):
|
|
190
|
+
try:
|
|
191
|
+
source = auto_updater.add_source(
|
|
192
|
+
repo_url=req.repo_url,
|
|
193
|
+
model_id=req.model_id,
|
|
194
|
+
runtime=req.runtime,
|
|
195
|
+
asset_pattern=req.asset_pattern,
|
|
196
|
+
license=req.license,
|
|
197
|
+
)
|
|
198
|
+
except ValueError as exc:
|
|
199
|
+
raise HTTPException(status_code=400, detail=str(exc))
|
|
200
|
+
return {"ok": True, "source": asdict(source)}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@app.delete("/sources/{model_id}")
|
|
204
|
+
def remove_source(model_id: str):
|
|
205
|
+
auto_updater.remove_source(model_id)
|
|
206
|
+
return {"ok": True}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ── Auto-Update ────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@app.post("/models/update")
|
|
213
|
+
def check_all_updates():
|
|
214
|
+
"""Check all tracked sources for new model releases."""
|
|
215
|
+
results = auto_updater.check_updates()
|
|
216
|
+
return {
|
|
217
|
+
"ok": True,
|
|
218
|
+
"results": [asdict(r) for r in results],
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@app.post("/models/update/{model_id}")
|
|
223
|
+
def check_model_updates(model_id: str):
|
|
224
|
+
"""Check a specific tracked source for new releases."""
|
|
225
|
+
try:
|
|
226
|
+
results = auto_updater.check_updates(model_id=model_id)
|
|
227
|
+
except KeyError as exc:
|
|
228
|
+
raise HTTPException(status_code=404, detail=str(exc))
|
|
229
|
+
return {
|
|
230
|
+
"ok": True,
|
|
231
|
+
"results": [asdict(r) for r in results],
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# ── GitHub Discovery ──────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@app.get("/github/discover")
|
|
239
|
+
def github_discover(repo_url: str):
|
|
240
|
+
"""Discover model assets in a GitHub repository's releases."""
|
|
241
|
+
try:
|
|
242
|
+
owner, repo = parse_repo_url(repo_url)
|
|
243
|
+
assets = discover_model_assets(owner, repo)
|
|
244
|
+
except Exception as exc:
|
|
245
|
+
raise HTTPException(status_code=400, detail=str(exc))
|
|
246
|
+
return {"ok": True, "owner": owner, "repo": repo, "assets": assets}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# ── Chat ───────────────────────────────────────────────────────
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@app.post("/chat")
|
|
253
|
+
def chat(req: ChatReq):
|
|
254
|
+
runtime_manager.ensure_loaded()
|
|
255
|
+
if runtime_manager.loaded_model_id != req.model_id:
|
|
256
|
+
raise HTTPException(status_code=400, detail="Requested model is not loaded")
|
|
257
|
+
|
|
258
|
+
conv_id = req.conversation_id or str(uuid.uuid4())
|
|
259
|
+
for msg in req.messages:
|
|
260
|
+
conversation_store.append_message(conv_id, msg.role, msg.content)
|
|
261
|
+
|
|
262
|
+
# Inject system prompt — this is what makes it "your own" coding AI
|
|
263
|
+
has_system = any(m.role == "system" for m in req.messages)
|
|
264
|
+
full_messages = build_messages(
|
|
265
|
+
req.messages,
|
|
266
|
+
include_system=not has_system,
|
|
267
|
+
include_fewshot=False,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def stream():
|
|
271
|
+
chunks = []
|
|
272
|
+
for token in runtime_manager.runtime.stream_chat(
|
|
273
|
+
full_messages,
|
|
274
|
+
temperature=req.temperature,
|
|
275
|
+
max_tokens=req.max_tokens,
|
|
276
|
+
):
|
|
277
|
+
chunks.append(token)
|
|
278
|
+
yield f"data: {json.dumps({'token': token})}\n\n"
|
|
279
|
+
final = "".join(chunks).strip()
|
|
280
|
+
conversation_store.append_message(conv_id, "assistant", final)
|
|
281
|
+
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id})}\n\n"
|
|
282
|
+
|
|
283
|
+
if req.stream:
|
|
284
|
+
return StreamingResponse(stream(), media_type="text/event-stream")
|
|
285
|
+
|
|
286
|
+
text = runtime_manager.runtime.chat(
|
|
287
|
+
full_messages,
|
|
288
|
+
temperature=req.temperature,
|
|
289
|
+
max_tokens=req.max_tokens,
|
|
290
|
+
)
|
|
291
|
+
conversation_store.append_message(conv_id, "assistant", text)
|
|
292
|
+
return {"ok": True, "output": text, "conversation_id": conv_id}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ── Conversations ──────────────────────────────────────────────
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@app.post("/conversations")
|
|
299
|
+
def create_conversation(req: CreateConversationReq):
|
|
300
|
+
conv = conversation_store.create(req.title)
|
|
301
|
+
return {"ok": True, "conversation": conv}
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@app.get("/conversations")
|
|
305
|
+
def list_conversations():
|
|
306
|
+
return {"ok": True, "conversations": conversation_store.list_all()}
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
@app.get("/conversations/{conversation_id}")
|
|
310
|
+
def get_conversation(conversation_id: str):
|
|
311
|
+
try:
|
|
312
|
+
conv = conversation_store.get(conversation_id)
|
|
313
|
+
except KeyError:
|
|
314
|
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
|
315
|
+
return {"ok": True, "conversation": conv}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@app.delete("/conversations/{conversation_id}")
|
|
319
|
+
def delete_conversation(conversation_id: str):
|
|
320
|
+
conversation_store.delete(conversation_id)
|
|
321
|
+
return {"ok": True}
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
# ── Static Frontend (production) ──────────────────────────────
|
|
325
|
+
|
|
326
|
+
_FRONTEND_DIST = Path(__file__).resolve().parent.parent / "frontend" / "dist"
|
|
327
|
+
if _FRONTEND_DIST.is_dir():
|
|
328
|
+
# Serve index.html for SPA routes (must come after API routes)
|
|
329
|
+
from fastapi.responses import FileResponse
|
|
330
|
+
|
|
331
|
+
@app.get("/app/{full_path:path}")
|
|
332
|
+
def spa_fallback(full_path: str):
|
|
333
|
+
return FileResponse(_FRONTEND_DIST / "index.html")
|
|
334
|
+
|
|
335
|
+
@app.get("/")
|
|
336
|
+
def root():
|
|
337
|
+
return FileResponse(_FRONTEND_DIST / "index.html")
|
|
338
|
+
|
|
339
|
+
app.mount("/", StaticFiles(directory=str(_FRONTEND_DIST), html=True), name="static")
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def main():
|
|
343
|
+
"""Entry point for `ai-server` console script and direct execution."""
|
|
344
|
+
import uvicorn
|
|
345
|
+
port = int(os.environ.get("PORT", settings.port))
|
|
346
|
+
uvicorn.run(app, host=settings.host, port=port)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Auto-update engine for model discovery and version tracking.
|
|
2
|
+
|
|
3
|
+
Maintains a sources.json of tracked GitHub repositories.
|
|
4
|
+
Periodically checks for new releases, compares against the local
|
|
5
|
+
registry (by version tag + SHA256), and downloads only new versions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import fnmatch
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from dataclasses import asdict, dataclass, field
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from .config import settings
|
|
16
|
+
from .github_client import (
|
|
17
|
+
discover_model_assets,
|
|
18
|
+
extract_version_number,
|
|
19
|
+
parse_repo_url,
|
|
20
|
+
)
|
|
21
|
+
from .model_registry import ModelRegistry
|
|
22
|
+
from .schemas import DownloadModelReq
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger("ai-platform.updater")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class TrackedSource:
|
|
29
|
+
model_id: str
|
|
30
|
+
owner: str
|
|
31
|
+
repo: str
|
|
32
|
+
runtime: str = "llama_cpp"
|
|
33
|
+
asset_pattern: str = "*.gguf" # glob pattern to filter assets
|
|
34
|
+
license: str | None = None
|
|
35
|
+
last_checked: str | None = None
|
|
36
|
+
auto_update: bool = True
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class UpdateResult:
|
|
41
|
+
model_id: str
|
|
42
|
+
new_versions: list[str] = field(default_factory=list)
|
|
43
|
+
skipped_duplicate: int = 0
|
|
44
|
+
skipped_existing: int = 0
|
|
45
|
+
errors: list[str] = field(default_factory=list)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AutoUpdater:
|
|
49
|
+
def __init__(self, registry: ModelRegistry) -> None:
|
|
50
|
+
self.registry = registry
|
|
51
|
+
self.sources_path = settings.config_dir / "sources.json"
|
|
52
|
+
if not self.sources_path.exists():
|
|
53
|
+
self.sources_path.write_text("[]", encoding="utf-8")
|
|
54
|
+
|
|
55
|
+
# ── Source management ──────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
def list_sources(self) -> list[TrackedSource]:
|
|
58
|
+
raw = json.loads(self.sources_path.read_text(encoding="utf-8"))
|
|
59
|
+
return [TrackedSource(**s) for s in raw]
|
|
60
|
+
|
|
61
|
+
def add_source(
|
|
62
|
+
self,
|
|
63
|
+
repo_url: str,
|
|
64
|
+
model_id: str,
|
|
65
|
+
runtime: str = "llama_cpp",
|
|
66
|
+
asset_pattern: str = "*.gguf",
|
|
67
|
+
license: str | None = None,
|
|
68
|
+
) -> TrackedSource:
|
|
69
|
+
owner, repo = parse_repo_url(repo_url)
|
|
70
|
+
|
|
71
|
+
sources = self.list_sources()
|
|
72
|
+
for s in sources:
|
|
73
|
+
if s.model_id == model_id:
|
|
74
|
+
raise ValueError(f"Source already tracked: {model_id}")
|
|
75
|
+
|
|
76
|
+
source = TrackedSource(
|
|
77
|
+
model_id=model_id,
|
|
78
|
+
owner=owner,
|
|
79
|
+
repo=repo,
|
|
80
|
+
runtime=runtime,
|
|
81
|
+
asset_pattern=asset_pattern,
|
|
82
|
+
license=license,
|
|
83
|
+
)
|
|
84
|
+
sources.append(source)
|
|
85
|
+
self._save_sources(sources)
|
|
86
|
+
logger.info("Added source: %s/%s -> %s", owner, repo, model_id)
|
|
87
|
+
return source
|
|
88
|
+
|
|
89
|
+
def remove_source(self, model_id: str) -> None:
|
|
90
|
+
sources = self.list_sources()
|
|
91
|
+
sources = [s for s in sources if s.model_id != model_id]
|
|
92
|
+
self._save_sources(sources)
|
|
93
|
+
logger.info("Removed source: %s", model_id)
|
|
94
|
+
|
|
95
|
+
# ── Update checking ───────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
def check_updates(self, model_id: str | None = None) -> list[UpdateResult]:
|
|
98
|
+
"""Check one or all tracked sources for new releases."""
|
|
99
|
+
sources = self.list_sources()
|
|
100
|
+
if model_id:
|
|
101
|
+
sources = [s for s in sources if s.model_id == model_id]
|
|
102
|
+
if not sources:
|
|
103
|
+
raise KeyError(f"No tracked source: {model_id}")
|
|
104
|
+
|
|
105
|
+
results = []
|
|
106
|
+
for source in sources:
|
|
107
|
+
if not source.auto_update and model_id is None:
|
|
108
|
+
continue
|
|
109
|
+
result = self._check_source(source)
|
|
110
|
+
results.append(result)
|
|
111
|
+
|
|
112
|
+
# Update last_checked timestamps
|
|
113
|
+
all_sources = self.list_sources()
|
|
114
|
+
checked_ids = {r.model_id for r in results}
|
|
115
|
+
for s in all_sources:
|
|
116
|
+
if s.model_id in checked_ids:
|
|
117
|
+
s.last_checked = datetime.now(timezone.utc).isoformat()
|
|
118
|
+
self._save_sources(all_sources)
|
|
119
|
+
|
|
120
|
+
return results
|
|
121
|
+
|
|
122
|
+
def _check_source(self, source: TrackedSource) -> UpdateResult:
|
|
123
|
+
result = UpdateResult(model_id=source.model_id)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
assets = discover_model_assets(source.owner, source.repo)
|
|
127
|
+
except Exception as exc:
|
|
128
|
+
result.errors.append(f"Failed to query GitHub: {exc}")
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
# Filter by asset pattern
|
|
132
|
+
matching = [
|
|
133
|
+
a for a in assets
|
|
134
|
+
if fnmatch.fnmatch(a["asset_name"].lower(), source.asset_pattern.lower())
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
if not matching:
|
|
138
|
+
logger.info("No matching assets for %s (pattern: %s)", source.model_id, source.asset_pattern)
|
|
139
|
+
return result
|
|
140
|
+
|
|
141
|
+
# Get existing version tags and hashes for deduplication
|
|
142
|
+
existing_tags = set()
|
|
143
|
+
existing_hashes = set()
|
|
144
|
+
try:
|
|
145
|
+
record = self.registry.get_model(source.model_id)
|
|
146
|
+
for v in record.versions:
|
|
147
|
+
if v.source_url:
|
|
148
|
+
existing_tags.add(self._tag_from_url_or_version(v))
|
|
149
|
+
if v.sha256:
|
|
150
|
+
existing_hashes.add(v.sha256.lower())
|
|
151
|
+
except KeyError:
|
|
152
|
+
pass # Model not yet registered
|
|
153
|
+
|
|
154
|
+
for asset in matching:
|
|
155
|
+
tag = asset["tag"]
|
|
156
|
+
normalized_tag = extract_version_number(tag)
|
|
157
|
+
|
|
158
|
+
# Dedup by version tag
|
|
159
|
+
if tag in existing_tags or normalized_tag in existing_tags:
|
|
160
|
+
result.skipped_existing += 1
|
|
161
|
+
logger.debug("Skipping existing tag %s for %s", tag, source.model_id)
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Download and register
|
|
165
|
+
try:
|
|
166
|
+
req = DownloadModelReq(
|
|
167
|
+
model_id=source.model_id,
|
|
168
|
+
source_url=asset["download_url"],
|
|
169
|
+
runtime=source.runtime,
|
|
170
|
+
filename=asset["asset_name"],
|
|
171
|
+
license=source.license,
|
|
172
|
+
format=asset.get("format"),
|
|
173
|
+
)
|
|
174
|
+
record = self.registry.download_and_register(req)
|
|
175
|
+
|
|
176
|
+
# Check SHA256 dedup after download
|
|
177
|
+
latest = record.active()
|
|
178
|
+
if latest.sha256 and latest.sha256.lower() in existing_hashes:
|
|
179
|
+
result.skipped_duplicate += 1
|
|
180
|
+
logger.info(
|
|
181
|
+
"Downloaded %s tag %s but SHA256 matches existing version (kept as new version entry)",
|
|
182
|
+
source.model_id,
|
|
183
|
+
tag,
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
existing_hashes.add(latest.sha256.lower() if latest.sha256 else "")
|
|
187
|
+
|
|
188
|
+
existing_tags.add(tag)
|
|
189
|
+
existing_tags.add(normalized_tag)
|
|
190
|
+
result.new_versions.append(tag)
|
|
191
|
+
logger.info("Downloaded new version %s for %s", tag, source.model_id)
|
|
192
|
+
except Exception as exc:
|
|
193
|
+
result.errors.append(f"Failed to download {tag}: {exc}")
|
|
194
|
+
logger.error("Download failed for %s %s: %s", source.model_id, tag, exc)
|
|
195
|
+
|
|
196
|
+
return result
|
|
197
|
+
|
|
198
|
+
# ── Helpers ────────────────────────────────────────────────
|
|
199
|
+
|
|
200
|
+
def _tag_from_url_or_version(self, version) -> str:
|
|
201
|
+
"""Extract a tag-like identifier from a version."""
|
|
202
|
+
if version.source_url and "/releases/download/" in version.source_url:
|
|
203
|
+
parts = version.source_url.split("/releases/download/")
|
|
204
|
+
if len(parts) > 1:
|
|
205
|
+
return parts[1].split("/")[0]
|
|
206
|
+
return f"v{version.version}"
|
|
207
|
+
|
|
208
|
+
def _save_sources(self, sources: list[TrackedSource]) -> None:
|
|
209
|
+
data = [asdict(s) for s in sources]
|
|
210
|
+
self.sources_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
_BASE = Path(__file__).resolve().parent.parent
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RuntimeConfig(BaseModel):
|
|
10
|
+
default_runtime: str = "llama_cpp"
|
|
11
|
+
default_threads: int = 0
|
|
12
|
+
default_temperature: float = 0.3
|
|
13
|
+
default_max_tokens: int = 512
|
|
14
|
+
default_top_p: float = 0.95
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AppSettings(BaseSettings):
|
|
18
|
+
model_config = SettingsConfigDict(env_prefix="AI_PLATFORM_", extra="ignore")
|
|
19
|
+
host: str = "0.0.0.0"
|
|
20
|
+
port: int = 8090
|
|
21
|
+
models_dir: Path = _BASE / "models"
|
|
22
|
+
config_dir: Path = _BASE / "config"
|
|
23
|
+
data_dir: Path = _BASE / "data"
|
|
24
|
+
log_level: str = "INFO"
|
|
25
|
+
|
|
26
|
+
def ensure_dirs(self) -> None:
|
|
27
|
+
self.models_dir.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
self.config_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
settings = AppSettings()
|
|
33
|
+
runtime_defaults = RuntimeConfig()
|