sage-ai-cli 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. sage_ai_cli-1.0.0/PKG-INFO +34 -0
  2. sage_ai_cli-1.0.0/README.md +66 -0
  3. sage_ai_cli-1.0.0/backend/__init__.py +1 -0
  4. sage_ai_cli-1.0.0/backend/app.py +346 -0
  5. sage_ai_cli-1.0.0/backend/auto_updater.py +210 -0
  6. sage_ai_cli-1.0.0/backend/config.py +33 -0
  7. sage_ai_cli-1.0.0/backend/conversations.py +77 -0
  8. sage_ai_cli-1.0.0/backend/github_client.py +231 -0
  9. sage_ai_cli-1.0.0/backend/hardware.py +66 -0
  10. sage_ai_cli-1.0.0/backend/model_catalog.py +82 -0
  11. sage_ai_cli-1.0.0/backend/model_registry.py +344 -0
  12. sage_ai_cli-1.0.0/backend/prompt_engine.py +136 -0
  13. sage_ai_cli-1.0.0/backend/runtime_manager.py +37 -0
  14. sage_ai_cli-1.0.0/backend/runtimes/__init__.py +1 -0
  15. sage_ai_cli-1.0.0/backend/runtimes/base.py +27 -0
  16. sage_ai_cli-1.0.0/backend/runtimes/llama_cpp_runtime.py +62 -0
  17. sage_ai_cli-1.0.0/backend/runtimes/onnx_runtime.py +165 -0
  18. sage_ai_cli-1.0.0/backend/runtimes/transformers_runtime.py +69 -0
  19. sage_ai_cli-1.0.0/backend/runtimes/vllm_runtime.py +48 -0
  20. sage_ai_cli-1.0.0/backend/schemas.py +114 -0
  21. sage_ai_cli-1.0.0/pyproject.toml +59 -0
  22. sage_ai_cli-1.0.0/sage/__init__.py +3 -0
  23. sage_ai_cli-1.0.0/sage/config.py +181 -0
  24. sage_ai_cli-1.0.0/sage/core/__init__.py +1 -0
  25. sage_ai_cli-1.0.0/sage/core/engine.py +86 -0
  26. sage_ai_cli-1.0.0/sage/core/renderer.py +150 -0
  27. sage_ai_cli-1.0.0/sage/core/router.py +154 -0
  28. sage_ai_cli-1.0.0/sage/main.py +639 -0
  29. sage_ai_cli-1.0.0/sage/providers/__init__.py +5 -0
  30. sage_ai_cli-1.0.0/sage/providers/base.py +59 -0
  31. sage_ai_cli-1.0.0/sage/providers/gemini.py +127 -0
  32. sage_ai_cli-1.0.0/sage/providers/llama_cpp.py +118 -0
  33. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/PKG-INFO +34 -0
  34. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/SOURCES.txt +49 -0
  35. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/dependency_links.txt +1 -0
  36. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/entry_points.txt +2 -0
  37. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/requires.txt +23 -0
  38. sage_ai_cli-1.0.0/sage_ai_cli.egg-info/top_level.txt +2 -0
  39. sage_ai_cli-1.0.0/setup.cfg +4 -0
  40. sage_ai_cli-1.0.0/tests/test_api.py +153 -0
  41. sage_ai_cli-1.0.0/tests/test_auto_updater.py +65 -0
  42. sage_ai_cli-1.0.0/tests/test_cli.py +236 -0
  43. sage_ai_cli-1.0.0/tests/test_config.py +32 -0
  44. sage_ai_cli-1.0.0/tests/test_conversations.py +67 -0
  45. sage_ai_cli-1.0.0/tests/test_download_script.py +211 -0
  46. sage_ai_cli-1.0.0/tests/test_github_client.py +70 -0
  47. sage_ai_cli-1.0.0/tests/test_hardware.py +19 -0
  48. sage_ai_cli-1.0.0/tests/test_model_catalog.py +119 -0
  49. sage_ai_cli-1.0.0/tests/test_model_registry.py +161 -0
  50. sage_ai_cli-1.0.0/tests/test_prompt_engine.py +119 -0
  51. sage_ai_cli-1.0.0/tests/test_schemas.py +142 -0
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: sage-ai-cli
3
+ Version: 1.0.0
4
+ Summary: Sage — a local-first AI coding CLI (like Claude Code, using free/open models)
5
+ Author: Layne Faler
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/laynef/claude-ai-clone
8
+ Keywords: ai,cli,llm,coding,local,gemini,gguf,llama
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Code Generators
15
+ Requires-Python: >=3.11
16
+ Requires-Dist: typer>=0.12.0
17
+ Requires-Dist: rich>=13.0.0
18
+ Requires-Dist: httpx>=0.27.0
19
+ Provides-Extra: local
20
+ Requires-Dist: llama-cpp-python>=0.2.90; extra == "local"
21
+ Provides-Extra: server
22
+ Requires-Dist: fastapi>=0.115.0; extra == "server"
23
+ Requires-Dist: uvicorn[standard]>=0.30.0; extra == "server"
24
+ Requires-Dist: pydantic>=2.9.0; extra == "server"
25
+ Requires-Dist: pydantic-settings>=2.5.0; extra == "server"
26
+ Provides-Extra: all
27
+ Requires-Dist: llama-cpp-python>=0.2.90; extra == "all"
28
+ Requires-Dist: fastapi>=0.115.0; extra == "all"
29
+ Requires-Dist: uvicorn[standard]>=0.30.0; extra == "all"
30
+ Requires-Dist: pydantic>=2.9.0; extra == "all"
31
+ Requires-Dist: pydantic-settings>=2.5.0; extra == "all"
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=8.0; extra == "dev"
34
+ Requires-Dist: httpx>=0.27.0; extra == "dev"
@@ -0,0 +1,66 @@
1
+ # Local AI Platform
2
+
3
+ ## File Structure
4
+
5
+ ```text
6
+ ai-platform/
7
+ ├── backend/
8
+ ├── frontend/
9
+ ├── cli/
10
+ ├── models/
11
+ └── config/
12
+ ```
13
+
14
+ ## Model Sources
15
+
16
+ | Model | Runtime | Source | License | Approx Size | Suggested Hardware |
17
+ |---|---|---|---|---|---|
18
+ | Llama 3.2 3B Instruct GGUF Q4_K_M | llama.cpp | https://github.com/ggml-org/llama.cpp | Meta Llama 3.2 Community License | ~2.0 GB | 8 GB RAM CPU |
19
+ | Qwen2.5 Coder 7B Instruct GGUF Q4_K_M | llama.cpp | https://github.com/QwenLM/Qwen2.5-Coder | Apache-2.0 | ~4.5 GB | 16 GB RAM CPU |
20
+ | Mistral 7B Instruct v0.3 | transformers | https://github.com/mistralai/mistral-inference | Apache-2.0 | ~13 GB fp16 | 16+ GB VRAM or 32 GB RAM |
21
+
22
+ ## Quick Start
23
+
24
+ 1. Backend:
25
+ ```bash
26
+ cd ai-platform
27
+ python -m venv .venv
28
+ source .venv/bin/activate
29
+ pip install -r backend/requirements.txt
30
+ cp .env.example .env
31
+ PYTHONPATH=ai-platform uvicorn backend.app:app --host 0.0.0.0 --port 8090
32
+ ```
33
+
34
+ 2. Frontend:
35
+ ```bash
36
+ cd ai-platform/frontend
37
+ npm install
38
+ npm run dev
39
+ ```
40
+
41
+ 3. CLI:
42
+ ```bash
43
+ cd ai-platform/cli
44
+ pip install .
45
+ ai --host http://127.0.0.1:8090 list
46
+ ```
47
+
48
+ ## Model Download Examples
49
+
50
+ ```bash
51
+ ai --host http://127.0.0.1:8090 download \
52
+ --model-id llama32-q4 \
53
+ --runtime llama_cpp \
54
+ --url https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
55
+ ```
56
+
57
+ ```bash
58
+ ai --host http://127.0.0.1:8090 run llama32-q4 --threads 8
59
+ ai --host http://127.0.0.1:8090 chat --model llama32-q4 --temperature 0.2 --max-tokens 512
60
+ ```
61
+
62
+ ## Privacy
63
+
64
+ - All inference is local.
65
+ - No telemetry is implemented.
66
+ - After model download setup, inference requires no external API calls.
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,346 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import uuid
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+
8
+ from fastapi import FastAPI, HTTPException
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import StreamingResponse
11
+ from fastapi.staticfiles import StaticFiles
12
+
13
+ from .auto_updater import AutoUpdater
14
+ from .config import runtime_defaults, settings
15
+ from .conversations import ConversationStore
16
+ from .github_client import discover_model_assets, parse_repo_url
17
+ from .hardware import detect_hardware_summary
18
+ from .model_catalog import filter_by_ram, get_recommended_models, list_catalog_models
19
+ from .model_registry import ModelRegistry
20
+ from .prompt_engine import build_messages
21
+ from .runtime_manager import RuntimeManager
22
+ from .schemas import (
23
+ AddSourceReq,
24
+ ChatMessage,
25
+ ChatReq,
26
+ CreateConversationReq,
27
+ DownloadModelReq,
28
+ ImportModelReq,
29
+ LoadModelReq,
30
+ SetActiveVersionReq,
31
+ )
32
+
33
+ settings.ensure_dirs()
34
+ logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
35
+ logger = logging.getLogger("ai-platform")
36
+
37
+ app = FastAPI(title="Local AI Platform", version="3.0.0")
38
+ app.add_middleware(
39
+ CORSMiddleware,
40
+ allow_origins=["*"],
41
+ allow_credentials=False,
42
+ allow_methods=["*"],
43
+ allow_headers=["*"],
44
+ )
45
+
46
+ registry = ModelRegistry()
47
+ runtime_manager = RuntimeManager()
48
+ conversation_store = ConversationStore()
49
+ auto_updater = AutoUpdater(registry)
50
+
51
+
52
+ # ── Health & Hardware ──────────────────────────────────────────
53
+
54
+
55
+ @app.get("/health")
56
+ def health():
57
+ return {
58
+ "ok": True,
59
+ "version": "3.0.0",
60
+ "loaded_model_id": runtime_manager.loaded_model_id,
61
+ "loaded_runtime": runtime_manager.loaded_runtime,
62
+ }
63
+
64
+
65
+ @app.get("/hardware")
66
+ def hardware():
67
+ return detect_hardware_summary()
68
+
69
+
70
+ # ── Model Catalog (free models from GitHub) ────────────────────
71
+
72
+
73
+ @app.get("/catalog")
74
+ def catalog():
75
+ """Browse the curated catalog of free, open-weight models."""
76
+ return {"ok": True, "models": list_catalog_models()}
77
+
78
+
79
+ @app.get("/catalog/recommended")
80
+ def catalog_recommended():
81
+ """Get recommended starter models."""
82
+ return {"ok": True, "models": get_recommended_models()}
83
+
84
+
85
+ @app.get("/catalog/fits-ram")
86
+ def catalog_fits_ram(max_gb: float = 8.0):
87
+ """Get models that fit within a RAM budget."""
88
+ return {"ok": True, "models": filter_by_ram(max_gb)}
89
+
90
+
91
+ # ── Models ─────────────────────────────────────────────────────
92
+
93
+
94
+ @app.get("/models")
95
+ def list_models():
96
+ return {"ok": True, "models": [m.model_dump() for m in registry.list_models()]}
97
+
98
+
99
+ @app.get("/models/names")
100
+ def model_names():
101
+ """Return all known model IDs (from registry + tracked sources) for autocomplete."""
102
+ ids = {m.model_id for m in registry.list_models()}
103
+ ids |= {s.model_id for s in auto_updater.list_sources()}
104
+ return {"ok": True, "names": sorted(ids)}
105
+
106
+
107
+ @app.get("/models/{model_id}")
108
+ def get_model(model_id: str):
109
+ try:
110
+ record = registry.get_model(model_id)
111
+ except KeyError as exc:
112
+ raise HTTPException(status_code=404, detail=str(exc))
113
+ return {"ok": True, "model": record.model_dump()}
114
+
115
+
116
+ @app.get("/models/{model_id}/versions")
117
+ def list_versions(model_id: str):
118
+ try:
119
+ versions = registry.list_versions(model_id)
120
+ except KeyError as exc:
121
+ raise HTTPException(status_code=404, detail=str(exc))
122
+ return {"ok": True, "versions": [v.model_dump() for v in versions]}
123
+
124
+
125
+ @app.post("/models/download")
126
+ def download_model(req: DownloadModelReq):
127
+ try:
128
+ record = registry.download_and_register(req)
129
+ except ValueError as exc:
130
+ raise HTTPException(status_code=400, detail=str(exc))
131
+ return {"ok": True, "model": record.model_dump()}
132
+
133
+
134
+ @app.post("/models/import")
135
+ def import_model(req: ImportModelReq):
136
+ try:
137
+ record = registry.import_local(req)
138
+ except (ValueError, FileNotFoundError) as exc:
139
+ raise HTTPException(status_code=400, detail=str(exc))
140
+ return {"ok": True, "model": record.model_dump()}
141
+
142
+
143
+ @app.post("/models/set-version")
144
+ def set_version(req: SetActiveVersionReq):
145
+ try:
146
+ record = registry.set_active_version(req.model_id, req.version)
147
+ except KeyError as exc:
148
+ raise HTTPException(status_code=404, detail=str(exc))
149
+ return {"ok": True, "model": record.model_dump()}
150
+
151
+
152
+ @app.post("/models/load")
153
+ def load_model(req: LoadModelReq):
154
+ try:
155
+ record = registry.get_model(req.model_id)
156
+ except KeyError as exc:
157
+ raise HTTPException(status_code=404, detail=str(exc))
158
+
159
+ if req.version:
160
+ version = next((v for v in record.versions if v.version == req.version), None)
161
+ if not version:
162
+ raise HTTPException(status_code=404, detail=f"Version {req.version} not found")
163
+ else:
164
+ version = record.active()
165
+
166
+ threads = req.threads or runtime_defaults.default_threads or None
167
+ runtime_manager.load(
168
+ record.runtime, req.model_id, version.file_path, threads=threads
169
+ )
170
+ return {
171
+ "ok": True,
172
+ "loaded_model_id": runtime_manager.loaded_model_id,
173
+ "loaded_runtime": runtime_manager.loaded_runtime,
174
+ "loaded_version": version.version,
175
+ "threads": threads,
176
+ }
177
+
178
+
179
+ # ── Source Tracking ────────────────────────────────────────────
180
+
181
+
182
+ @app.get("/sources")
183
+ def list_sources():
184
+ sources = auto_updater.list_sources()
185
+ return {"ok": True, "sources": [asdict(s) for s in sources]}
186
+
187
+
188
+ @app.post("/sources/add")
189
+ def add_source(req: AddSourceReq):
190
+ try:
191
+ source = auto_updater.add_source(
192
+ repo_url=req.repo_url,
193
+ model_id=req.model_id,
194
+ runtime=req.runtime,
195
+ asset_pattern=req.asset_pattern,
196
+ license=req.license,
197
+ )
198
+ except ValueError as exc:
199
+ raise HTTPException(status_code=400, detail=str(exc))
200
+ return {"ok": True, "source": asdict(source)}
201
+
202
+
203
+ @app.delete("/sources/{model_id}")
204
+ def remove_source(model_id: str):
205
+ auto_updater.remove_source(model_id)
206
+ return {"ok": True}
207
+
208
+
209
+ # ── Auto-Update ────────────────────────────────────────────────
210
+
211
+
212
+ @app.post("/models/update")
213
+ def check_all_updates():
214
+ """Check all tracked sources for new model releases."""
215
+ results = auto_updater.check_updates()
216
+ return {
217
+ "ok": True,
218
+ "results": [asdict(r) for r in results],
219
+ }
220
+
221
+
222
+ @app.post("/models/update/{model_id}")
223
+ def check_model_updates(model_id: str):
224
+ """Check a specific tracked source for new releases."""
225
+ try:
226
+ results = auto_updater.check_updates(model_id=model_id)
227
+ except KeyError as exc:
228
+ raise HTTPException(status_code=404, detail=str(exc))
229
+ return {
230
+ "ok": True,
231
+ "results": [asdict(r) for r in results],
232
+ }
233
+
234
+
235
+ # ── GitHub Discovery ──────────────────────────────────────────
236
+
237
+
238
+ @app.get("/github/discover")
239
+ def github_discover(repo_url: str):
240
+ """Discover model assets in a GitHub repository's releases."""
241
+ try:
242
+ owner, repo = parse_repo_url(repo_url)
243
+ assets = discover_model_assets(owner, repo)
244
+ except Exception as exc:
245
+ raise HTTPException(status_code=400, detail=str(exc))
246
+ return {"ok": True, "owner": owner, "repo": repo, "assets": assets}
247
+
248
+
249
+ # ── Chat ───────────────────────────────────────────────────────
250
+
251
+
252
+ @app.post("/chat")
253
+ def chat(req: ChatReq):
254
+ runtime_manager.ensure_loaded()
255
+ if runtime_manager.loaded_model_id != req.model_id:
256
+ raise HTTPException(status_code=400, detail="Requested model is not loaded")
257
+
258
+ conv_id = req.conversation_id or str(uuid.uuid4())
259
+ for msg in req.messages:
260
+ conversation_store.append_message(conv_id, msg.role, msg.content)
261
+
262
+ # Inject system prompt — this is what makes it "your own" coding AI
263
+ has_system = any(m.role == "system" for m in req.messages)
264
+ full_messages = build_messages(
265
+ req.messages,
266
+ include_system=not has_system,
267
+ include_fewshot=False,
268
+ )
269
+
270
+ def stream():
271
+ chunks = []
272
+ for token in runtime_manager.runtime.stream_chat(
273
+ full_messages,
274
+ temperature=req.temperature,
275
+ max_tokens=req.max_tokens,
276
+ ):
277
+ chunks.append(token)
278
+ yield f"data: {json.dumps({'token': token})}\n\n"
279
+ final = "".join(chunks).strip()
280
+ conversation_store.append_message(conv_id, "assistant", final)
281
+ yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id})}\n\n"
282
+
283
+ if req.stream:
284
+ return StreamingResponse(stream(), media_type="text/event-stream")
285
+
286
+ text = runtime_manager.runtime.chat(
287
+ full_messages,
288
+ temperature=req.temperature,
289
+ max_tokens=req.max_tokens,
290
+ )
291
+ conversation_store.append_message(conv_id, "assistant", text)
292
+ return {"ok": True, "output": text, "conversation_id": conv_id}
293
+
294
+
295
+ # ── Conversations ──────────────────────────────────────────────
296
+
297
+
298
+ @app.post("/conversations")
299
+ def create_conversation(req: CreateConversationReq):
300
+ conv = conversation_store.create(req.title)
301
+ return {"ok": True, "conversation": conv}
302
+
303
+
304
+ @app.get("/conversations")
305
+ def list_conversations():
306
+ return {"ok": True, "conversations": conversation_store.list_all()}
307
+
308
+
309
+ @app.get("/conversations/{conversation_id}")
310
+ def get_conversation(conversation_id: str):
311
+ try:
312
+ conv = conversation_store.get(conversation_id)
313
+ except KeyError:
314
+ raise HTTPException(status_code=404, detail="Conversation not found")
315
+ return {"ok": True, "conversation": conv}
316
+
317
+
318
+ @app.delete("/conversations/{conversation_id}")
319
+ def delete_conversation(conversation_id: str):
320
+ conversation_store.delete(conversation_id)
321
+ return {"ok": True}
322
+
323
+
324
+ # ── Static Frontend (production) ──────────────────────────────
325
+
326
+ _FRONTEND_DIST = Path(__file__).resolve().parent.parent / "frontend" / "dist"
327
+ if _FRONTEND_DIST.is_dir():
328
+ # Serve index.html for SPA routes (must come after API routes)
329
+ from fastapi.responses import FileResponse
330
+
331
+ @app.get("/app/{full_path:path}")
332
+ def spa_fallback(full_path: str):
333
+ return FileResponse(_FRONTEND_DIST / "index.html")
334
+
335
+ @app.get("/")
336
+ def root():
337
+ return FileResponse(_FRONTEND_DIST / "index.html")
338
+
339
+ app.mount("/", StaticFiles(directory=str(_FRONTEND_DIST), html=True), name="static")
340
+
341
+
342
+ def main():
343
+ """Entry point for `ai-server` console script and direct execution."""
344
+ import uvicorn
345
+ port = int(os.environ.get("PORT", settings.port))
346
+ uvicorn.run(app, host=settings.host, port=port)
@@ -0,0 +1,210 @@
1
+ """Auto-update engine for model discovery and version tracking.
2
+
3
+ Maintains a sources.json of tracked GitHub repositories.
4
+ Periodically checks for new releases, compares against the local
5
+ registry (by version tag + SHA256), and downloads only new versions.
6
+ """
7
+
8
+ import fnmatch
9
+ import json
10
+ import logging
11
+ from dataclasses import asdict, dataclass, field
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+
15
+ from .config import settings
16
+ from .github_client import (
17
+ discover_model_assets,
18
+ extract_version_number,
19
+ parse_repo_url,
20
+ )
21
+ from .model_registry import ModelRegistry
22
+ from .schemas import DownloadModelReq
23
+
24
+ logger = logging.getLogger("ai-platform.updater")
25
+
26
+
27
+ @dataclass
28
+ class TrackedSource:
29
+ model_id: str
30
+ owner: str
31
+ repo: str
32
+ runtime: str = "llama_cpp"
33
+ asset_pattern: str = "*.gguf" # glob pattern to filter assets
34
+ license: str | None = None
35
+ last_checked: str | None = None
36
+ auto_update: bool = True
37
+
38
+
39
+ @dataclass
40
+ class UpdateResult:
41
+ model_id: str
42
+ new_versions: list[str] = field(default_factory=list)
43
+ skipped_duplicate: int = 0
44
+ skipped_existing: int = 0
45
+ errors: list[str] = field(default_factory=list)
46
+
47
+
48
+ class AutoUpdater:
49
+ def __init__(self, registry: ModelRegistry) -> None:
50
+ self.registry = registry
51
+ self.sources_path = settings.config_dir / "sources.json"
52
+ if not self.sources_path.exists():
53
+ self.sources_path.write_text("[]", encoding="utf-8")
54
+
55
+ # ── Source management ──────────────────────────────────────
56
+
57
+ def list_sources(self) -> list[TrackedSource]:
58
+ raw = json.loads(self.sources_path.read_text(encoding="utf-8"))
59
+ return [TrackedSource(**s) for s in raw]
60
+
61
+ def add_source(
62
+ self,
63
+ repo_url: str,
64
+ model_id: str,
65
+ runtime: str = "llama_cpp",
66
+ asset_pattern: str = "*.gguf",
67
+ license: str | None = None,
68
+ ) -> TrackedSource:
69
+ owner, repo = parse_repo_url(repo_url)
70
+
71
+ sources = self.list_sources()
72
+ for s in sources:
73
+ if s.model_id == model_id:
74
+ raise ValueError(f"Source already tracked: {model_id}")
75
+
76
+ source = TrackedSource(
77
+ model_id=model_id,
78
+ owner=owner,
79
+ repo=repo,
80
+ runtime=runtime,
81
+ asset_pattern=asset_pattern,
82
+ license=license,
83
+ )
84
+ sources.append(source)
85
+ self._save_sources(sources)
86
+ logger.info("Added source: %s/%s -> %s", owner, repo, model_id)
87
+ return source
88
+
89
+ def remove_source(self, model_id: str) -> None:
90
+ sources = self.list_sources()
91
+ sources = [s for s in sources if s.model_id != model_id]
92
+ self._save_sources(sources)
93
+ logger.info("Removed source: %s", model_id)
94
+
95
+ # ── Update checking ───────────────────────────────────────
96
+
97
+ def check_updates(self, model_id: str | None = None) -> list[UpdateResult]:
98
+ """Check one or all tracked sources for new releases."""
99
+ sources = self.list_sources()
100
+ if model_id:
101
+ sources = [s for s in sources if s.model_id == model_id]
102
+ if not sources:
103
+ raise KeyError(f"No tracked source: {model_id}")
104
+
105
+ results = []
106
+ for source in sources:
107
+ if not source.auto_update and model_id is None:
108
+ continue
109
+ result = self._check_source(source)
110
+ results.append(result)
111
+
112
+ # Update last_checked timestamps
113
+ all_sources = self.list_sources()
114
+ checked_ids = {r.model_id for r in results}
115
+ for s in all_sources:
116
+ if s.model_id in checked_ids:
117
+ s.last_checked = datetime.now(timezone.utc).isoformat()
118
+ self._save_sources(all_sources)
119
+
120
+ return results
121
+
122
+ def _check_source(self, source: TrackedSource) -> UpdateResult:
123
+ result = UpdateResult(model_id=source.model_id)
124
+
125
+ try:
126
+ assets = discover_model_assets(source.owner, source.repo)
127
+ except Exception as exc:
128
+ result.errors.append(f"Failed to query GitHub: {exc}")
129
+ return result
130
+
131
+ # Filter by asset pattern
132
+ matching = [
133
+ a for a in assets
134
+ if fnmatch.fnmatch(a["asset_name"].lower(), source.asset_pattern.lower())
135
+ ]
136
+
137
+ if not matching:
138
+ logger.info("No matching assets for %s (pattern: %s)", source.model_id, source.asset_pattern)
139
+ return result
140
+
141
+ # Get existing version tags and hashes for deduplication
142
+ existing_tags = set()
143
+ existing_hashes = set()
144
+ try:
145
+ record = self.registry.get_model(source.model_id)
146
+ for v in record.versions:
147
+ if v.source_url:
148
+ existing_tags.add(self._tag_from_url_or_version(v))
149
+ if v.sha256:
150
+ existing_hashes.add(v.sha256.lower())
151
+ except KeyError:
152
+ pass # Model not yet registered
153
+
154
+ for asset in matching:
155
+ tag = asset["tag"]
156
+ normalized_tag = extract_version_number(tag)
157
+
158
+ # Dedup by version tag
159
+ if tag in existing_tags or normalized_tag in existing_tags:
160
+ result.skipped_existing += 1
161
+ logger.debug("Skipping existing tag %s for %s", tag, source.model_id)
162
+ continue
163
+
164
+ # Download and register
165
+ try:
166
+ req = DownloadModelReq(
167
+ model_id=source.model_id,
168
+ source_url=asset["download_url"],
169
+ runtime=source.runtime,
170
+ filename=asset["asset_name"],
171
+ license=source.license,
172
+ format=asset.get("format"),
173
+ )
174
+ record = self.registry.download_and_register(req)
175
+
176
+ # Check SHA256 dedup after download
177
+ latest = record.active()
178
+ if latest.sha256 and latest.sha256.lower() in existing_hashes:
179
+ result.skipped_duplicate += 1
180
+ logger.info(
181
+ "Downloaded %s tag %s but SHA256 matches existing version (kept as new version entry)",
182
+ source.model_id,
183
+ tag,
184
+ )
185
+ else:
186
+ existing_hashes.add(latest.sha256.lower() if latest.sha256 else "")
187
+
188
+ existing_tags.add(tag)
189
+ existing_tags.add(normalized_tag)
190
+ result.new_versions.append(tag)
191
+ logger.info("Downloaded new version %s for %s", tag, source.model_id)
192
+ except Exception as exc:
193
+ result.errors.append(f"Failed to download {tag}: {exc}")
194
+ logger.error("Download failed for %s %s: %s", source.model_id, tag, exc)
195
+
196
+ return result
197
+
198
+ # ── Helpers ────────────────────────────────────────────────
199
+
200
+ def _tag_from_url_or_version(self, version) -> str:
201
+ """Extract a tag-like identifier from a version."""
202
+ if version.source_url and "/releases/download/" in version.source_url:
203
+ parts = version.source_url.split("/releases/download/")
204
+ if len(parts) > 1:
205
+ return parts[1].split("/")[0]
206
+ return f"v{version.version}"
207
+
208
+ def _save_sources(self, sources: list[TrackedSource]) -> None:
209
+ data = [asdict(s) for s in sources]
210
+ self.sources_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
@@ -0,0 +1,33 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic import BaseModel
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+ _BASE = Path(__file__).resolve().parent.parent
7
+
8
+
9
+ class RuntimeConfig(BaseModel):
10
+ default_runtime: str = "llama_cpp"
11
+ default_threads: int = 0
12
+ default_temperature: float = 0.3
13
+ default_max_tokens: int = 512
14
+ default_top_p: float = 0.95
15
+
16
+
17
+ class AppSettings(BaseSettings):
18
+ model_config = SettingsConfigDict(env_prefix="AI_PLATFORM_", extra="ignore")
19
+ host: str = "0.0.0.0"
20
+ port: int = 8090
21
+ models_dir: Path = _BASE / "models"
22
+ config_dir: Path = _BASE / "config"
23
+ data_dir: Path = _BASE / "data"
24
+ log_level: str = "INFO"
25
+
26
+ def ensure_dirs(self) -> None:
27
+ self.models_dir.mkdir(parents=True, exist_ok=True)
28
+ self.config_dir.mkdir(parents=True, exist_ok=True)
29
+ self.data_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+
32
+ settings = AppSettings()
33
+ runtime_defaults = RuntimeConfig()