parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1557 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model Download Manager for Parishad.
|
|
3
|
+
|
|
4
|
+
Provides unified interface for downloading and managing LLM models from:
|
|
5
|
+
- HuggingFace Hub (GGUF files)
|
|
6
|
+
- Ollama (via ollama pull)
|
|
7
|
+
- LM Studio (symlinks to existing models)
|
|
8
|
+
|
|
9
|
+
Models are stored in a central directory for easy management.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import platform
|
|
19
|
+
import shutil
|
|
20
|
+
import subprocess
|
|
21
|
+
import sys
|
|
22
|
+
import tempfile
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from enum import Enum
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any, Callable, Iterator, Optional
|
|
28
|
+
from urllib.parse import urlparse
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# Constants and Configuration
|
|
35
|
+
# =============================================================================
|
|
36
|
+
|
|
37
|
+
# Environment variable for custom model directory
|
|
38
|
+
PARISHAD_MODELS_DIR_ENV = "PARISHAD_MODELS_DIR"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_config_file_path() -> Path:
|
|
42
|
+
"""Get the path to the parishad config file."""
|
|
43
|
+
if platform.system() == "Darwin":
|
|
44
|
+
return Path.home() / ".parishad" / "config.json"
|
|
45
|
+
elif platform.system() == "Windows":
|
|
46
|
+
app_data = os.environ.get("LOCALAPPDATA", os.environ.get("APPDATA", ""))
|
|
47
|
+
if app_data:
|
|
48
|
+
return Path(app_data) / "parishad" / "config.json"
|
|
49
|
+
return Path.home() / ".parishad" / "config.json"
|
|
50
|
+
else:
|
|
51
|
+
return Path.home() / ".parishad" / "config.json"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_user_configured_model_dir() -> Optional[Path]:
|
|
55
|
+
"""
|
|
56
|
+
Get user-configured model directory from environment or config file.
|
|
57
|
+
|
|
58
|
+
Priority:
|
|
59
|
+
1. PARISHAD_MODELS_DIR environment variable
|
|
60
|
+
2. model_dir in ~/.parishad/config.json
|
|
61
|
+
3. None (use default)
|
|
62
|
+
"""
|
|
63
|
+
# Check environment variable first
|
|
64
|
+
env_dir = os.environ.get(PARISHAD_MODELS_DIR_ENV)
|
|
65
|
+
if env_dir:
|
|
66
|
+
return Path(env_dir)
|
|
67
|
+
|
|
68
|
+
# Check config file
|
|
69
|
+
config_file = get_config_file_path()
|
|
70
|
+
if config_file.exists():
|
|
71
|
+
try:
|
|
72
|
+
with open(config_file) as f:
|
|
73
|
+
config = json.load(f)
|
|
74
|
+
if "model_dir" in config and config["model_dir"]:
|
|
75
|
+
return Path(config["model_dir"])
|
|
76
|
+
except (json.JSONDecodeError, IOError):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def set_model_dir(path: str | Path) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Set the custom model directory in config file.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
path: Path to the directory where models should be stored
|
|
88
|
+
"""
|
|
89
|
+
config_file = get_config_file_path()
|
|
90
|
+
config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
# Load existing config or create new
|
|
93
|
+
config = {}
|
|
94
|
+
if config_file.exists():
|
|
95
|
+
try:
|
|
96
|
+
with open(config_file) as f:
|
|
97
|
+
config = json.load(f)
|
|
98
|
+
except (json.JSONDecodeError, IOError):
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
# Update model_dir
|
|
102
|
+
config["model_dir"] = str(Path(path).resolve())
|
|
103
|
+
|
|
104
|
+
with open(config_file, "w") as f:
|
|
105
|
+
json.dump(config, f, indent=2)
|
|
106
|
+
|
|
107
|
+
logger.info(f"Model directory set to: {path}")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_platform_default_model_dir() -> Path:
|
|
111
|
+
"""
|
|
112
|
+
Get the default model directory.
|
|
113
|
+
|
|
114
|
+
Always uses ~/.parishad/models for consistency across all platforms.
|
|
115
|
+
This ensures a single, predictable location for all Parishad data.
|
|
116
|
+
"""
|
|
117
|
+
return Path.home() / ".parishad" / "models"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def get_default_model_dir() -> Path:
|
|
121
|
+
"""
|
|
122
|
+
Get the model directory, checking user config first.
|
|
123
|
+
|
|
124
|
+
Priority:
|
|
125
|
+
1. PARISHAD_MODELS_DIR environment variable
|
|
126
|
+
2. model_dir in ~/.parishad/config.json
|
|
127
|
+
3. Platform-specific default
|
|
128
|
+
|
|
129
|
+
To set a custom directory:
|
|
130
|
+
- Set PARISHAD_MODELS_DIR environment variable, OR
|
|
131
|
+
- Run: parishad config set-model-dir /path/to/models
|
|
132
|
+
"""
|
|
133
|
+
user_dir = get_user_configured_model_dir()
|
|
134
|
+
if user_dir:
|
|
135
|
+
return user_dir
|
|
136
|
+
return get_platform_default_model_dir()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# Default paths
|
|
140
|
+
DEFAULT_MODEL_DIR = get_default_model_dir()
|
|
141
|
+
|
|
142
|
+
# Ollama models directory (platform-specific)
|
|
143
|
+
if platform.system() == "Windows":
|
|
144
|
+
# Ollama on Windows stores models in USERPROFILE\.ollama\models
|
|
145
|
+
OLLAMA_MODELS_DIR = Path(os.environ.get("USERPROFILE", Path.home())) / ".ollama" / "models"
|
|
146
|
+
else:
|
|
147
|
+
OLLAMA_MODELS_DIR = Path.home() / ".ollama" / "models"
|
|
148
|
+
|
|
149
|
+
LMSTUDIO_MODELS_DIR = Path.home() / ".lmstudio" / "models"
|
|
150
|
+
|
|
151
|
+
# Alternative LM Studio paths (varies by platform)
|
|
152
|
+
LMSTUDIO_ALT_PATHS = [
|
|
153
|
+
Path.home() / ".cache" / "lm-studio" / "models",
|
|
154
|
+
Path.home() / "Library" / "Application Support" / "LM Studio" / "models", # macOS
|
|
155
|
+
Path(os.environ.get("LOCALAPPDATA", "")) / "LM Studio" / "models" if platform.system() == "Windows" else Path("/nonexistent"),
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ModelSource(Enum):
|
|
160
|
+
"""Source of the model."""
|
|
161
|
+
HUGGINGFACE = "huggingface"
|
|
162
|
+
OLLAMA = "ollama"
|
|
163
|
+
LMSTUDIO = "lmstudio"
|
|
164
|
+
LOCAL = "local"
|
|
165
|
+
UNKNOWN = "unknown"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ModelFormat(Enum):
|
|
169
|
+
"""Model file format."""
|
|
170
|
+
GGUF = "gguf"
|
|
171
|
+
SAFETENSORS = "safetensors"
|
|
172
|
+
PYTORCH = "pytorch"
|
|
173
|
+
OLLAMA = "ollama"
|
|
174
|
+
UNKNOWN = "unknown"
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class ModelInfo:
|
|
179
|
+
"""Information about a downloaded model."""
|
|
180
|
+
name: str
|
|
181
|
+
source: ModelSource
|
|
182
|
+
format: ModelFormat
|
|
183
|
+
path: Path
|
|
184
|
+
size_bytes: int = 0
|
|
185
|
+
downloaded_at: Optional[datetime] = None
|
|
186
|
+
quantization: Optional[str] = None
|
|
187
|
+
base_model: Optional[str] = None
|
|
188
|
+
metadata: dict = field(default_factory=dict)
|
|
189
|
+
|
|
190
|
+
def to_dict(self) -> dict:
|
|
191
|
+
"""Convert to dictionary."""
|
|
192
|
+
return {
|
|
193
|
+
"name": self.name,
|
|
194
|
+
"source": self.source.value,
|
|
195
|
+
"format": self.format.value,
|
|
196
|
+
"path": str(self.path),
|
|
197
|
+
"size_bytes": self.size_bytes,
|
|
198
|
+
"downloaded_at": self.downloaded_at.isoformat() if self.downloaded_at else None,
|
|
199
|
+
"quantization": self.quantization,
|
|
200
|
+
"base_model": self.base_model,
|
|
201
|
+
"metadata": self.metadata,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def from_dict(cls, data: dict) -> "ModelInfo":
|
|
206
|
+
"""Create from dictionary."""
|
|
207
|
+
return cls(
|
|
208
|
+
name=data["name"],
|
|
209
|
+
source=ModelSource(data["source"]),
|
|
210
|
+
format=ModelFormat(data["format"]),
|
|
211
|
+
path=Path(data["path"]),
|
|
212
|
+
size_bytes=data.get("size_bytes", 0),
|
|
213
|
+
downloaded_at=datetime.fromisoformat(data["downloaded_at"]) if data.get("downloaded_at") else None,
|
|
214
|
+
quantization=data.get("quantization"),
|
|
215
|
+
base_model=data.get("base_model"),
|
|
216
|
+
metadata=data.get("metadata", {}),
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def size_human(self) -> str:
|
|
221
|
+
"""Human-readable size."""
|
|
222
|
+
size = self.size_bytes
|
|
223
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
224
|
+
if size < 1024:
|
|
225
|
+
return f"{size:.1f} {unit}"
|
|
226
|
+
size /= 1024
|
|
227
|
+
return f"{size:.1f} PB"
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@dataclass
|
|
231
|
+
class DownloadProgress:
|
|
232
|
+
"""Progress information for downloads."""
|
|
233
|
+
total_bytes: int
|
|
234
|
+
downloaded_bytes: int
|
|
235
|
+
speed_bps: float = 0.0
|
|
236
|
+
eta_seconds: float = 0.0
|
|
237
|
+
model_name: str = ""
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def percentage(self) -> float:
|
|
241
|
+
"""Download percentage."""
|
|
242
|
+
if self.total_bytes == 0:
|
|
243
|
+
return 0.0
|
|
244
|
+
return (self.downloaded_bytes / self.total_bytes) * 100
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def speed_human(self) -> str:
|
|
248
|
+
"""Human-readable speed."""
|
|
249
|
+
speed = self.speed_bps
|
|
250
|
+
for unit in ["B/s", "KB/s", "MB/s", "GB/s"]:
|
|
251
|
+
if speed < 1024:
|
|
252
|
+
return f"{speed:.1f} {unit}"
|
|
253
|
+
speed /= 1024
|
|
254
|
+
return f"{speed:.1f} TB/s"
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# Progress callback type
|
|
258
|
+
ProgressCallback = Callable[[DownloadProgress], None]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# =============================================================================
|
|
262
|
+
# Model Registry
|
|
263
|
+
# =============================================================================
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class ModelRegistry:
|
|
267
|
+
"""
|
|
268
|
+
Registry of downloaded models.
|
|
269
|
+
|
|
270
|
+
Tracks all models downloaded via the download manager.
|
|
271
|
+
Uses the unified ~/.parishad/config.json file.
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
def __init__(self, model_dir: Optional[Path] = None):
|
|
275
|
+
"""
|
|
276
|
+
Initialize registry.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
model_dir: Directory for models (default: platform-specific)
|
|
280
|
+
"""
|
|
281
|
+
self.model_dir = Path(model_dir) if model_dir else DEFAULT_MODEL_DIR
|
|
282
|
+
# Use unified config file instead of separate registry
|
|
283
|
+
self.config_file = Path.home() / ".parishad" / "config.json"
|
|
284
|
+
self._models: dict[str, ModelInfo] = {}
|
|
285
|
+
|
|
286
|
+
# Ensure directory exists
|
|
287
|
+
self.model_dir.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
|
|
289
|
+
# Load existing registry
|
|
290
|
+
self._load()
|
|
291
|
+
|
|
292
|
+
def _load(self) -> None:
|
|
293
|
+
"""Load registry from unified config file."""
|
|
294
|
+
if self.config_file.exists():
|
|
295
|
+
try:
|
|
296
|
+
with open(self.config_file) as f:
|
|
297
|
+
data = json.load(f)
|
|
298
|
+
# Models are stored under 'models' key
|
|
299
|
+
self._models = {
|
|
300
|
+
name: ModelInfo.from_dict(info)
|
|
301
|
+
for name, info in data.get("models", {}).items()
|
|
302
|
+
}
|
|
303
|
+
except Exception as e:
|
|
304
|
+
logger.warning(f"Failed to load registry from config: {e}")
|
|
305
|
+
self._models = {}
|
|
306
|
+
|
|
307
|
+
def _save(self) -> None:
|
|
308
|
+
"""Save registry to unified config file."""
|
|
309
|
+
try:
|
|
310
|
+
# Load existing config to preserve other fields
|
|
311
|
+
config = {}
|
|
312
|
+
if self.config_file.exists():
|
|
313
|
+
with open(self.config_file) as f:
|
|
314
|
+
config = json.load(f)
|
|
315
|
+
|
|
316
|
+
# Update models section
|
|
317
|
+
config["models"] = {
|
|
318
|
+
name: info.to_dict()
|
|
319
|
+
for name, info in self._models.items()
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# Write back
|
|
323
|
+
with open(self.config_file, "w") as f:
|
|
324
|
+
json.dump(config, f, indent=2)
|
|
325
|
+
except Exception as e:
|
|
326
|
+
logger.error(f"Failed to save registry: {e}")
|
|
327
|
+
|
|
328
|
+
def add(self, model: ModelInfo) -> None:
|
|
329
|
+
"""Add or update a model in the registry."""
|
|
330
|
+
self._models[model.name] = model
|
|
331
|
+
self._save()
|
|
332
|
+
|
|
333
|
+
def remove(self, name: str) -> Optional[ModelInfo]:
|
|
334
|
+
"""Remove a model from the registry."""
|
|
335
|
+
model = self._models.pop(name, None)
|
|
336
|
+
if model:
|
|
337
|
+
self._save()
|
|
338
|
+
return model
|
|
339
|
+
|
|
340
|
+
def get(self, name: str) -> Optional[ModelInfo]:
|
|
341
|
+
"""Get model by name."""
|
|
342
|
+
return self._models.get(name)
|
|
343
|
+
|
|
344
|
+
def list(self, source: Optional[ModelSource] = None) -> list[ModelInfo]:
|
|
345
|
+
"""List all models, optionally filtered by source."""
|
|
346
|
+
models = list(self._models.values())
|
|
347
|
+
if source:
|
|
348
|
+
models = [m for m in models if m.source == source]
|
|
349
|
+
return sorted(models, key=lambda m: m.name)
|
|
350
|
+
|
|
351
|
+
def find_by_path(self, path: Path) -> Optional[ModelInfo]:
|
|
352
|
+
"""Find model by path."""
|
|
353
|
+
path = path.resolve()
|
|
354
|
+
for model in self._models.values():
|
|
355
|
+
if model.path.resolve() == path:
|
|
356
|
+
return model
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
def exists(self, name: str) -> bool:
|
|
360
|
+
"""Check if model exists in registry."""
|
|
361
|
+
return name in self._models
|
|
362
|
+
|
|
363
|
+
def verify_integrity(self) -> int:
|
|
364
|
+
"""
|
|
365
|
+
Verify that all registered models physically exist.
|
|
366
|
+
Removes invalid entries.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Number of removed entries.
|
|
370
|
+
"""
|
|
371
|
+
to_remove = []
|
|
372
|
+
for name, model in self._models.items():
|
|
373
|
+
if model.source == ModelSource.OLLAMA:
|
|
374
|
+
# Ollama models are managed by ollama service, check via list
|
|
375
|
+
# This check is expensive so we might skip or do a lightweight check
|
|
376
|
+
# For now assume if referenced file (json) exists it's ok,
|
|
377
|
+
# or trust OllamaManager.scan_for_models logic.
|
|
378
|
+
if not model.path.exists():
|
|
379
|
+
to_remove.append(name)
|
|
380
|
+
elif not model.path.exists():
|
|
381
|
+
to_remove.append(name)
|
|
382
|
+
|
|
383
|
+
for name in to_remove:
|
|
384
|
+
self.remove(name)
|
|
385
|
+
|
|
386
|
+
return len(to_remove)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# =============================================================================
|
|
390
|
+
# HuggingFace Downloader
|
|
391
|
+
# =============================================================================
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class HuggingFaceDownloader:
|
|
395
|
+
"""
|
|
396
|
+
Download GGUF models from HuggingFace Hub.
|
|
397
|
+
|
|
398
|
+
Supports downloading specific quantization variants.
|
|
399
|
+
"""
|
|
400
|
+
|
|
401
|
+
# Popular GGUF model repositories
|
|
402
|
+
POPULAR_MODELS = {
|
|
403
|
+
"qwen2.5:0.5b": ("Qwen/Qwen2.5-0.5B-Instruct-GGUF", "qwen2.5-0.5b-instruct-q4_k_m.gguf"),
|
|
404
|
+
"qwen2.5:1.5b": ("Qwen/Qwen2.5-1.5B-Instruct-GGUF", "qwen2.5-1.5b-instruct-q4_k_m.gguf"),
|
|
405
|
+
"qwen2.5:3b": ("Qwen/Qwen2.5-3B-Instruct-GGUF", "qwen2.5-3b-instruct-q4_k_m.gguf"),
|
|
406
|
+
"qwen2.5:7b": ("Qwen/Qwen2.5-7B-Instruct-GGUF", "qwen2.5-7b-instruct-q4_k_m.gguf"),
|
|
407
|
+
"llama3.2:1b": ("bartowski/Llama-3.2-1B-Instruct-GGUF", "Llama-3.2-1B-Instruct-Q4_K_M.gguf"),
|
|
408
|
+
"llama3.2:3b": ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_K_M.gguf"),
|
|
409
|
+
"llama3.1:8b": ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"),
|
|
410
|
+
"phi3:mini": ("microsoft/Phi-3-mini-4k-instruct-gguf", "Phi-3-mini-4k-instruct-q4.gguf"),
|
|
411
|
+
"mistral:7b": ("mistralai/Mistral-7B-Instruct-v0.3-GGUF", "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"),
|
|
412
|
+
"gemma2:2b": ("bartowski/gemma-2-2b-it-GGUF", "gemma-2-2b-it-Q4_K_M.gguf"),
|
|
413
|
+
"deepseek:1.5b": ("bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF", "DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf"),
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
def __init__(self, model_dir: Path):
|
|
417
|
+
"""
|
|
418
|
+
Initialize downloader.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
model_dir: Directory to store downloaded models
|
|
422
|
+
"""
|
|
423
|
+
self.model_dir = model_dir
|
|
424
|
+
self.hf_dir = model_dir / "huggingface"
|
|
425
|
+
self.hf_dir.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
|
|
427
|
+
def list_available(self) -> dict[str, tuple[str, str]]:
|
|
428
|
+
"""List available models with shortcuts."""
|
|
429
|
+
return self.POPULAR_MODELS.copy()
|
|
430
|
+
|
|
431
|
+
def resolve_model(self, model_spec: str) -> tuple[str, str]:
|
|
432
|
+
"""
|
|
433
|
+
Resolve model specification to repo and filename.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
model_spec: Model name (e.g., "qwen2.5:1.5b") or repo/file
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Tuple of (repo_id, filename)
|
|
440
|
+
"""
|
|
441
|
+
# Check shortcuts first
|
|
442
|
+
if model_spec in self.POPULAR_MODELS:
|
|
443
|
+
return self.POPULAR_MODELS[model_spec]
|
|
444
|
+
|
|
445
|
+
# Parse as repo/filename
|
|
446
|
+
if "/" in model_spec:
|
|
447
|
+
parts = model_spec.split("/")
|
|
448
|
+
if len(parts) >= 3:
|
|
449
|
+
# Format: owner/repo/filename
|
|
450
|
+
repo_id = f"{parts[0]}/{parts[1]}"
|
|
451
|
+
filename = "/".join(parts[2:])
|
|
452
|
+
return repo_id, filename
|
|
453
|
+
elif len(parts) == 2:
|
|
454
|
+
# Just repo, need to find GGUF file
|
|
455
|
+
return model_spec, ""
|
|
456
|
+
|
|
457
|
+
raise ValueError(f"Unknown model: {model_spec}. Use format 'owner/repo/file.gguf' or a shortcut like 'qwen2.5:1.5b'")
|
|
458
|
+
|
|
459
|
+
def download(
|
|
460
|
+
self,
|
|
461
|
+
model_spec: str,
|
|
462
|
+
quantization: Optional[str] = None,
|
|
463
|
+
progress_callback: Optional[ProgressCallback] = None,
|
|
464
|
+
) -> ModelInfo:
|
|
465
|
+
"""
|
|
466
|
+
Download a model from HuggingFace.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
model_spec: Model specification (shortcut or repo/file)
|
|
470
|
+
quantization: Preferred quantization (e.g., "q4_k_m", "q8_0")
|
|
471
|
+
progress_callback: Callback for progress updates
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
ModelInfo for the downloaded model
|
|
475
|
+
"""
|
|
476
|
+
repo_id, filename = self.resolve_model(model_spec)
|
|
477
|
+
|
|
478
|
+
# If no filename, try to find one
|
|
479
|
+
if not filename:
|
|
480
|
+
filename = self._find_gguf_file(repo_id, quantization)
|
|
481
|
+
|
|
482
|
+
# Construct download URL
|
|
483
|
+
url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
|
|
484
|
+
|
|
485
|
+
# Determine local path
|
|
486
|
+
safe_name = repo_id.replace("/", "_")
|
|
487
|
+
local_path = self.hf_dir / safe_name / filename
|
|
488
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
489
|
+
|
|
490
|
+
# Download with progress
|
|
491
|
+
self._download_file(url, local_path, progress_callback)
|
|
492
|
+
|
|
493
|
+
# Determine quantization from filename
|
|
494
|
+
quant = self._extract_quantization(filename)
|
|
495
|
+
|
|
496
|
+
# Create model info
|
|
497
|
+
model_name = model_spec if model_spec in self.POPULAR_MODELS else f"hf:{repo_id}/{filename}"
|
|
498
|
+
|
|
499
|
+
return ModelInfo(
|
|
500
|
+
name=model_name,
|
|
501
|
+
source=ModelSource.HUGGINGFACE,
|
|
502
|
+
format=ModelFormat.GGUF,
|
|
503
|
+
path=local_path,
|
|
504
|
+
size_bytes=local_path.stat().st_size,
|
|
505
|
+
downloaded_at=datetime.now(),
|
|
506
|
+
quantization=quant,
|
|
507
|
+
base_model=repo_id,
|
|
508
|
+
metadata={
|
|
509
|
+
"repo_id": repo_id,
|
|
510
|
+
"filename": filename,
|
|
511
|
+
"url": url,
|
|
512
|
+
},
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def _find_gguf_file(self, repo_id: str, quantization: Optional[str] = None) -> str:
|
|
516
|
+
"""Find a GGUF file in the repository."""
|
|
517
|
+
try:
|
|
518
|
+
# Try using huggingface_hub if available
|
|
519
|
+
from huggingface_hub import list_repo_files
|
|
520
|
+
|
|
521
|
+
# Wrap in try-except to handle API failures (rate limits, auth, network)
|
|
522
|
+
try:
|
|
523
|
+
files = list_repo_files(repo_id)
|
|
524
|
+
gguf_files = [f for f in files if f.endswith(".gguf")]
|
|
525
|
+
|
|
526
|
+
if not gguf_files:
|
|
527
|
+
raise ValueError(f"No GGUF files found in {repo_id}")
|
|
528
|
+
|
|
529
|
+
# Prefer requested quantization
|
|
530
|
+
if quantization:
|
|
531
|
+
for f in gguf_files:
|
|
532
|
+
if quantization.lower() in f.lower():
|
|
533
|
+
return f
|
|
534
|
+
|
|
535
|
+
# Prefer Q4_K_M as default
|
|
536
|
+
for f in gguf_files:
|
|
537
|
+
if "q4_k_m" in f.lower():
|
|
538
|
+
return f
|
|
539
|
+
|
|
540
|
+
# Return first GGUF file
|
|
541
|
+
return gguf_files[0]
|
|
542
|
+
|
|
543
|
+
except Exception as e:
|
|
544
|
+
# If listing fails (or import works but call fails), fall back to guessing
|
|
545
|
+
# This protects against API flakiness
|
|
546
|
+
if isinstance(e, ValueError) and "No GGUF files found" in str(e):
|
|
547
|
+
raise # Re-raise valid empty repo errors
|
|
548
|
+
|
|
549
|
+
logger.warning(f"Failed to list files in {repo_id}: {e}. Falling back to filename guessing.")
|
|
550
|
+
raise ImportError("Force fallback") # Trigger fallback logic
|
|
551
|
+
|
|
552
|
+
except (ImportError, Exception):
|
|
553
|
+
# Fallback: Guess the filename based on repo name
|
|
554
|
+
# Most GGUF repos (like bartowski) follow: {ModelName}-{Quant}.gguf
|
|
555
|
+
repo_name = repo_id.split("/")[-1]
|
|
556
|
+
quant_suffix = quantization if quantization else "Q4_K_M"
|
|
557
|
+
|
|
558
|
+
# Construct standard guess
|
|
559
|
+
# Example: Llama-3.2-3B-Instruct -> Llama-3.2-3B-Instruct-Q4_K_M.gguf
|
|
560
|
+
guessed_filename = f"{repo_name}-{quant_suffix}.gguf"
|
|
561
|
+
|
|
562
|
+
logger.info(f"Guessed filename: {guessed_filename}")
|
|
563
|
+
return guessed_filename
|
|
564
|
+
|
|
565
|
+
def _extract_quantization(self, filename: str) -> Optional[str]:
|
|
566
|
+
"""Extract quantization from filename."""
|
|
567
|
+
filename_lower = filename.lower()
|
|
568
|
+
|
|
569
|
+
quantizations = [
|
|
570
|
+
"q2_k", "q3_k_s", "q3_k_m", "q3_k_l",
|
|
571
|
+
"q4_0", "q4_1", "q4_k_s", "q4_k_m",
|
|
572
|
+
"q5_0", "q5_1", "q5_k_s", "q5_k_m",
|
|
573
|
+
"q6_k", "q8_0", "f16", "f32",
|
|
574
|
+
]
|
|
575
|
+
|
|
576
|
+
for quant in quantizations:
|
|
577
|
+
if quant in filename_lower:
|
|
578
|
+
return quant.upper()
|
|
579
|
+
|
|
580
|
+
return None
|
|
581
|
+
|
|
582
|
+
def _download_file(
|
|
583
|
+
self,
|
|
584
|
+
url: str,
|
|
585
|
+
dest: Path,
|
|
586
|
+
progress_callback: Optional[ProgressCallback] = None,
|
|
587
|
+
) -> None:
|
|
588
|
+
"""Download file with progress tracking."""
|
|
589
|
+
import urllib.request
|
|
590
|
+
|
|
591
|
+
# Check if already exists
|
|
592
|
+
if dest.exists():
|
|
593
|
+
logger.info(f"Model already exists: {dest}")
|
|
594
|
+
return
|
|
595
|
+
|
|
596
|
+
logger.info(f"Downloading from {url}")
|
|
597
|
+
|
|
598
|
+
# Get file size
|
|
599
|
+
try:
|
|
600
|
+
req = urllib.request.Request(url, method="HEAD")
|
|
601
|
+
with urllib.request.urlopen(req) as response:
|
|
602
|
+
total_size = int(response.headers.get("Content-Length", 0))
|
|
603
|
+
except Exception as e:
|
|
604
|
+
logger.error(f"Failed to get file size from {url}: {e}")
|
|
605
|
+
raise RuntimeError(f"Cannot access model at {url}. Error: {e}")
|
|
606
|
+
|
|
607
|
+
# Download with progress
|
|
608
|
+
downloaded = 0
|
|
609
|
+
start_time = datetime.now()
|
|
610
|
+
|
|
611
|
+
temp_dest = dest.with_suffix(".download")
|
|
612
|
+
|
|
613
|
+
try:
|
|
614
|
+
with urllib.request.urlopen(url) as response:
|
|
615
|
+
with open(temp_dest, "wb") as f:
|
|
616
|
+
while True:
|
|
617
|
+
chunk = response.read(8192)
|
|
618
|
+
if not chunk:
|
|
619
|
+
break
|
|
620
|
+
|
|
621
|
+
f.write(chunk)
|
|
622
|
+
downloaded += len(chunk)
|
|
623
|
+
|
|
624
|
+
if progress_callback:
|
|
625
|
+
elapsed = (datetime.now() - start_time).total_seconds()
|
|
626
|
+
speed = downloaded / elapsed if elapsed > 0 else 0
|
|
627
|
+
eta = (total_size - downloaded) / speed if speed > 0 else 0
|
|
628
|
+
|
|
629
|
+
progress_callback(DownloadProgress(
|
|
630
|
+
total_bytes=total_size,
|
|
631
|
+
downloaded_bytes=downloaded,
|
|
632
|
+
speed_bps=speed,
|
|
633
|
+
eta_seconds=eta,
|
|
634
|
+
))
|
|
635
|
+
|
|
636
|
+
# Move to final location
|
|
637
|
+
temp_dest.rename(dest)
|
|
638
|
+
logger.info(f"Downloaded: {dest}")
|
|
639
|
+
|
|
640
|
+
except Exception as e:
|
|
641
|
+
# Clean up partial download
|
|
642
|
+
if temp_dest.exists():
|
|
643
|
+
temp_dest.unlink()
|
|
644
|
+
raise
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
# =============================================================================
|
|
648
|
+
# Ollama Integration
|
|
649
|
+
# =============================================================================
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
class OllamaManager:
|
|
653
|
+
"""
|
|
654
|
+
Manage models through Ollama.
|
|
655
|
+
|
|
656
|
+
Uses the ollama CLI to pull and manage models.
|
|
657
|
+
"""
|
|
658
|
+
|
|
659
|
+
def __init__(self, model_dir: Path):
|
|
660
|
+
"""
|
|
661
|
+
Initialize Ollama manager.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
model_dir: Directory for model symlinks/info
|
|
665
|
+
"""
|
|
666
|
+
self.model_dir = model_dir
|
|
667
|
+
self.ollama_dir = model_dir / "ollama"
|
|
668
|
+
self.ollama_dir.mkdir(parents=True, exist_ok=True)
|
|
669
|
+
|
|
670
|
+
@staticmethod
|
|
671
|
+
def is_available() -> bool:
|
|
672
|
+
"""Check if Ollama is installed and running."""
|
|
673
|
+
try:
|
|
674
|
+
result = subprocess.run(
|
|
675
|
+
["ollama", "list"],
|
|
676
|
+
capture_output=True,
|
|
677
|
+
text=True,
|
|
678
|
+
timeout=5,
|
|
679
|
+
)
|
|
680
|
+
return result.returncode == 0
|
|
681
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
682
|
+
return False
|
|
683
|
+
|
|
684
|
+
def list_models(self) -> list[dict]:
|
|
685
|
+
"""List models available in Ollama."""
|
|
686
|
+
if not self.is_available():
|
|
687
|
+
return []
|
|
688
|
+
|
|
689
|
+
try:
|
|
690
|
+
result = subprocess.run(
|
|
691
|
+
["ollama", "list"],
|
|
692
|
+
capture_output=True,
|
|
693
|
+
text=True,
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
if result.returncode != 0:
|
|
697
|
+
return []
|
|
698
|
+
|
|
699
|
+
models = []
|
|
700
|
+
lines = result.stdout.strip().split("\n")
|
|
701
|
+
|
|
702
|
+
# Skip header
|
|
703
|
+
for line in lines[1:]:
|
|
704
|
+
if not line.strip():
|
|
705
|
+
continue
|
|
706
|
+
|
|
707
|
+
parts = line.split()
|
|
708
|
+
if len(parts) >= 2:
|
|
709
|
+
models.append({
|
|
710
|
+
"name": parts[0],
|
|
711
|
+
"size": parts[1] if len(parts) > 1 else "unknown",
|
|
712
|
+
})
|
|
713
|
+
|
|
714
|
+
return models
|
|
715
|
+
|
|
716
|
+
except Exception as e:
|
|
717
|
+
logger.error(f"Failed to list Ollama models: {e}")
|
|
718
|
+
return []
|
|
719
|
+
|
|
720
|
+
def pull(
|
|
721
|
+
self,
|
|
722
|
+
model_name: str,
|
|
723
|
+
progress_callback: Optional[ProgressCallback] = None,
|
|
724
|
+
) -> ModelInfo:
|
|
725
|
+
"""
|
|
726
|
+
Pull a model using Ollama.
|
|
727
|
+
|
|
728
|
+
Args:
|
|
729
|
+
model_name: Model name (e.g., "llama3.2:1b", "qwen2.5:0.5b")
|
|
730
|
+
progress_callback: Callback for progress updates
|
|
731
|
+
|
|
732
|
+
Returns:
|
|
733
|
+
ModelInfo for the pulled model
|
|
734
|
+
"""
|
|
735
|
+
import re
|
|
736
|
+
|
|
737
|
+
if not self.is_available():
|
|
738
|
+
raise RuntimeError("Ollama is not installed or not running. Install from https://ollama.ai")
|
|
739
|
+
|
|
740
|
+
logger.info(f"Pulling model via Ollama: {model_name}")
|
|
741
|
+
|
|
742
|
+
# Run ollama pull with output suppressed (we'll parse stderr for progress)
|
|
743
|
+
process = subprocess.Popen(
|
|
744
|
+
["ollama", "pull", model_name],
|
|
745
|
+
stdout=subprocess.PIPE,
|
|
746
|
+
stderr=subprocess.STDOUT,
|
|
747
|
+
text=True,
|
|
748
|
+
bufsize=1, # Line buffered
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
# Parse Ollama's progress output and update our callback
|
|
752
|
+
# Ollama outputs lines like: "pulling 2bada8a74506: 2% ▕ ▏ 75 MB/4.7 GB"
|
|
753
|
+
size_pattern = re.compile(r'(\d+(?:\.\d+)?)\s*(MB|GB)/(\d+(?:\.\d+)?)\s*(MB|GB)')
|
|
754
|
+
percent_pattern = re.compile(r'(\d+)%')
|
|
755
|
+
|
|
756
|
+
last_progress = None
|
|
757
|
+
|
|
758
|
+
for line in process.stdout:
|
|
759
|
+
line = line.strip()
|
|
760
|
+
if not line:
|
|
761
|
+
continue
|
|
762
|
+
|
|
763
|
+
# Try to parse progress from Ollama output
|
|
764
|
+
if progress_callback:
|
|
765
|
+
# Extract percentage
|
|
766
|
+
percent_match = percent_pattern.search(line)
|
|
767
|
+
size_match = size_pattern.search(line)
|
|
768
|
+
|
|
769
|
+
if size_match:
|
|
770
|
+
# Parse downloaded and total size
|
|
771
|
+
downloaded = float(size_match.group(1))
|
|
772
|
+
downloaded_unit = size_match.group(2)
|
|
773
|
+
total = float(size_match.group(3))
|
|
774
|
+
total_unit = size_match.group(4)
|
|
775
|
+
|
|
776
|
+
# Convert to bytes
|
|
777
|
+
downloaded_bytes = int(downloaded * (1024**3 if downloaded_unit == "GB" else 1024**2))
|
|
778
|
+
total_bytes = int(total * (1024**3 if total_unit == "GB" else 1024**2))
|
|
779
|
+
|
|
780
|
+
progress = DownloadProgress(
|
|
781
|
+
downloaded_bytes=downloaded_bytes,
|
|
782
|
+
total_bytes=total_bytes,
|
|
783
|
+
speed_bps=0.0,
|
|
784
|
+
model_name=model_name,
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
# Only update if progress changed significantly
|
|
788
|
+
if last_progress is None or downloaded_bytes - last_progress >= 1024 * 1024: # 1MB
|
|
789
|
+
progress_callback(progress)
|
|
790
|
+
last_progress = downloaded_bytes
|
|
791
|
+
|
|
792
|
+
# Log non-progress lines (like "pulling manifest", "verifying sha256")
|
|
793
|
+
if not percent_pattern.search(line):
|
|
794
|
+
logger.debug(f"Ollama: {line}")
|
|
795
|
+
|
|
796
|
+
process.wait()
|
|
797
|
+
|
|
798
|
+
if process.returncode != 0:
|
|
799
|
+
raise RuntimeError(f"Failed to pull model: {model_name}")
|
|
800
|
+
|
|
801
|
+
# Signal completion
|
|
802
|
+
if progress_callback:
|
|
803
|
+
progress_callback(DownloadProgress(
|
|
804
|
+
downloaded_bytes=1,
|
|
805
|
+
total_bytes=1,
|
|
806
|
+
speed_bps=0.0,
|
|
807
|
+
model_name=model_name,
|
|
808
|
+
))
|
|
809
|
+
|
|
810
|
+
# Get model info
|
|
811
|
+
models = self.list_models()
|
|
812
|
+
model_info = next((m for m in models if m["name"].startswith(model_name)), None)
|
|
813
|
+
|
|
814
|
+
# Create a reference file
|
|
815
|
+
ref_file = self.ollama_dir / f"{model_name.replace(':', '_')}.json"
|
|
816
|
+
with open(ref_file, "w") as f:
|
|
817
|
+
json.dump({
|
|
818
|
+
"name": model_name,
|
|
819
|
+
"source": "ollama",
|
|
820
|
+
"pulled_at": datetime.now().isoformat(),
|
|
821
|
+
}, f)
|
|
822
|
+
|
|
823
|
+
return ModelInfo(
|
|
824
|
+
name=f"ollama:{model_name}",
|
|
825
|
+
source=ModelSource.OLLAMA,
|
|
826
|
+
format=ModelFormat.OLLAMA,
|
|
827
|
+
path=ref_file, # Reference file, actual model in Ollama's storage
|
|
828
|
+
size_bytes=0, # Ollama manages this
|
|
829
|
+
downloaded_at=datetime.now(),
|
|
830
|
+
metadata={
|
|
831
|
+
"ollama_name": model_name,
|
|
832
|
+
"size": model_info.get("size") if model_info else "unknown",
|
|
833
|
+
},
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
def get_model_path(self, model_name: str) -> Optional[Path]:
|
|
837
|
+
"""Get the path to an Ollama model (for direct access)."""
|
|
838
|
+
# Ollama stores models in a specific structure
|
|
839
|
+
# This is for information only - use ollama CLI for actual inference
|
|
840
|
+
blob_dir = OLLAMA_MODELS_DIR / "blobs"
|
|
841
|
+
|
|
842
|
+
if blob_dir.exists():
|
|
843
|
+
# Models are stored as blobs with SHA256 names
|
|
844
|
+
# We can't easily map name to blob, so return the dir
|
|
845
|
+
return blob_dir
|
|
846
|
+
|
|
847
|
+
return None
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
# =============================================================================
|
|
851
|
+
# LM Studio Integration
|
|
852
|
+
# =============================================================================
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
class LMStudioManager:
|
|
856
|
+
"""
|
|
857
|
+
Manage models from LM Studio.
|
|
858
|
+
|
|
859
|
+
LM Studio stores GGUF models in a standard directory.
|
|
860
|
+
This manager finds and symlinks those models.
|
|
861
|
+
"""
|
|
862
|
+
|
|
863
|
+
def __init__(self, model_dir: Path):
|
|
864
|
+
"""
|
|
865
|
+
Initialize LM Studio manager.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
model_dir: Directory for model symlinks
|
|
869
|
+
"""
|
|
870
|
+
self.model_dir = model_dir
|
|
871
|
+
self.lmstudio_dir = model_dir / "lmstudio"
|
|
872
|
+
self.lmstudio_dir.mkdir(parents=True, exist_ok=True)
|
|
873
|
+
|
|
874
|
+
@classmethod
|
|
875
|
+
def find_lmstudio_dir(cls) -> Optional[Path]:
|
|
876
|
+
"""Find the LM Studio models directory."""
|
|
877
|
+
# Check primary path
|
|
878
|
+
if LMSTUDIO_MODELS_DIR.exists():
|
|
879
|
+
return LMSTUDIO_MODELS_DIR
|
|
880
|
+
|
|
881
|
+
# Check alternative paths
|
|
882
|
+
for alt_path in LMSTUDIO_ALT_PATHS:
|
|
883
|
+
if alt_path.exists():
|
|
884
|
+
return alt_path
|
|
885
|
+
|
|
886
|
+
return None
|
|
887
|
+
|
|
888
|
+
@classmethod
|
|
889
|
+
def is_available(cls) -> bool:
|
|
890
|
+
"""Check if LM Studio models are available."""
|
|
891
|
+
return cls.find_lmstudio_dir() is not None
|
|
892
|
+
|
|
893
|
+
def list_models(self) -> list[dict]:
|
|
894
|
+
"""List models available in LM Studio."""
|
|
895
|
+
lmstudio_dir = self.find_lmstudio_dir()
|
|
896
|
+
|
|
897
|
+
if not lmstudio_dir:
|
|
898
|
+
return []
|
|
899
|
+
|
|
900
|
+
models = []
|
|
901
|
+
|
|
902
|
+
# Walk through the models directory
|
|
903
|
+
for gguf_file in lmstudio_dir.rglob("*.gguf"):
|
|
904
|
+
try:
|
|
905
|
+
size = gguf_file.stat().st_size
|
|
906
|
+
rel_path = gguf_file.relative_to(lmstudio_dir)
|
|
907
|
+
|
|
908
|
+
models.append({
|
|
909
|
+
"name": str(rel_path),
|
|
910
|
+
"path": gguf_file,
|
|
911
|
+
"size_bytes": size,
|
|
912
|
+
})
|
|
913
|
+
except Exception as e:
|
|
914
|
+
logger.warning(f"Failed to read model {gguf_file}: {e}")
|
|
915
|
+
|
|
916
|
+
return models
|
|
917
|
+
|
|
918
|
+
def import_model(self, model_path: str) -> ModelInfo:
|
|
919
|
+
"""
|
|
920
|
+
Import a model from LM Studio.
|
|
921
|
+
|
|
922
|
+
Args:
|
|
923
|
+
model_path: Relative path within LM Studio models dir
|
|
924
|
+
|
|
925
|
+
Returns:
|
|
926
|
+
ModelInfo for the imported model
|
|
927
|
+
"""
|
|
928
|
+
lmstudio_dir = self.find_lmstudio_dir()
|
|
929
|
+
|
|
930
|
+
if not lmstudio_dir:
|
|
931
|
+
raise RuntimeError("LM Studio models directory not found")
|
|
932
|
+
|
|
933
|
+
source_path = lmstudio_dir / model_path
|
|
934
|
+
|
|
935
|
+
if not source_path.exists():
|
|
936
|
+
# Try as absolute path
|
|
937
|
+
source_path = Path(model_path)
|
|
938
|
+
if not source_path.exists():
|
|
939
|
+
raise FileNotFoundError(f"Model not found: {model_path}")
|
|
940
|
+
|
|
941
|
+
# Create symlink in our directory
|
|
942
|
+
safe_name = Path(model_path).name
|
|
943
|
+
link_path = self.lmstudio_dir / safe_name
|
|
944
|
+
|
|
945
|
+
if link_path.exists():
|
|
946
|
+
link_path.unlink()
|
|
947
|
+
|
|
948
|
+
link_path.symlink_to(source_path)
|
|
949
|
+
|
|
950
|
+
return ModelInfo(
|
|
951
|
+
name=f"lmstudio:{safe_name}",
|
|
952
|
+
source=ModelSource.LMSTUDIO,
|
|
953
|
+
format=ModelFormat.GGUF,
|
|
954
|
+
path=link_path,
|
|
955
|
+
size_bytes=source_path.stat().st_size,
|
|
956
|
+
downloaded_at=datetime.now(),
|
|
957
|
+
quantization=self._extract_quantization(safe_name),
|
|
958
|
+
metadata={
|
|
959
|
+
"original_path": str(source_path),
|
|
960
|
+
"lmstudio_path": model_path,
|
|
961
|
+
},
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
def _extract_quantization(self, filename: str) -> Optional[str]:
|
|
965
|
+
"""Extract quantization from filename."""
|
|
966
|
+
filename_lower = filename.lower()
|
|
967
|
+
|
|
968
|
+
quantizations = [
|
|
969
|
+
"q2_k", "q3_k_s", "q3_k_m", "q3_k_l",
|
|
970
|
+
"q4_0", "q4_1", "q4_k_s", "q4_k_m",
|
|
971
|
+
"q5_0", "q5_1", "q5_k_s", "q5_k_m",
|
|
972
|
+
"q6_k", "q8_0", "f16", "f32",
|
|
973
|
+
]
|
|
974
|
+
|
|
975
|
+
for quant in quantizations:
|
|
976
|
+
if quant in filename_lower:
|
|
977
|
+
return quant.upper()
|
|
978
|
+
|
|
979
|
+
return None
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
# =============================================================================
|
|
983
|
+
# Unified Model Manager
|
|
984
|
+
# =============================================================================
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
class ModelManager:
|
|
988
|
+
"""
|
|
989
|
+
Unified interface for managing LLM models.
|
|
990
|
+
|
|
991
|
+
Provides a single entry point for:
|
|
992
|
+
- Downloading from HuggingFace
|
|
993
|
+
- Pulling from Ollama
|
|
994
|
+
- Importing from LM Studio
|
|
995
|
+
- Listing all available models
|
|
996
|
+
|
|
997
|
+
Usage:
|
|
998
|
+
manager = ModelManager()
|
|
999
|
+
|
|
1000
|
+
# Download from HuggingFace
|
|
1001
|
+
model = manager.download("qwen2.5:1.5b", source="huggingface")
|
|
1002
|
+
|
|
1003
|
+
# Pull from Ollama
|
|
1004
|
+
model = manager.download("llama3.2:1b", source="ollama")
|
|
1005
|
+
|
|
1006
|
+
# Import from LM Studio
|
|
1007
|
+
model = manager.download("author/model.gguf", source="lmstudio")
|
|
1008
|
+
|
|
1009
|
+
# List all models
|
|
1010
|
+
models = manager.list_models()
|
|
1011
|
+
|
|
1012
|
+
# Get model path for inference
|
|
1013
|
+
path = manager.get_model_path("qwen2.5:1.5b")
|
|
1014
|
+
"""
|
|
1015
|
+
|
|
1016
|
+
def __init__(self, model_dir: Optional[Path] = None):
|
|
1017
|
+
"""
|
|
1018
|
+
Initialize model manager.
|
|
1019
|
+
|
|
1020
|
+
Args:
|
|
1021
|
+
model_dir: Directory for models (default: platform-specific)
|
|
1022
|
+
"""
|
|
1023
|
+
self.model_dir = Path(model_dir) if model_dir else DEFAULT_MODEL_DIR
|
|
1024
|
+
self.model_dir.mkdir(parents=True, exist_ok=True)
|
|
1025
|
+
|
|
1026
|
+
# Initialize components
|
|
1027
|
+
self.registry = ModelRegistry(self.model_dir)
|
|
1028
|
+
self.huggingface = HuggingFaceDownloader(self.model_dir)
|
|
1029
|
+
self.ollama = OllamaManager(self.model_dir)
|
|
1030
|
+
self.lmstudio = LMStudioManager(self.model_dir)
|
|
1031
|
+
|
|
1032
|
+
logger.info(f"Model manager initialized: {self.model_dir}")
|
|
1033
|
+
|
|
1034
|
+
def download(
|
|
1035
|
+
self,
|
|
1036
|
+
model_spec: str,
|
|
1037
|
+
source: str = "auto",
|
|
1038
|
+
quantization: Optional[str] = None,
|
|
1039
|
+
progress_callback: Optional[ProgressCallback] = None,
|
|
1040
|
+
) -> ModelInfo:
|
|
1041
|
+
"""
|
|
1042
|
+
Download or import a model.
|
|
1043
|
+
|
|
1044
|
+
Tries multiple sources as fallbacks if the primary source fails.
|
|
1045
|
+
|
|
1046
|
+
Args:
|
|
1047
|
+
model_spec: Model specification (name, path, or URL)
|
|
1048
|
+
source: Source to use ("huggingface", "ollama", "lmstudio", or "auto")
|
|
1049
|
+
quantization: Preferred quantization (for HuggingFace)
|
|
1050
|
+
progress_callback: Progress callback function
|
|
1051
|
+
|
|
1052
|
+
Returns:
|
|
1053
|
+
ModelInfo for the downloaded model
|
|
1054
|
+
"""
|
|
1055
|
+
source = source.lower()
|
|
1056
|
+
|
|
1057
|
+
# Auto-detect source
|
|
1058
|
+
if source == "auto":
|
|
1059
|
+
source = self._detect_source(model_spec)
|
|
1060
|
+
|
|
1061
|
+
# Check if already downloaded
|
|
1062
|
+
existing = self.registry.get(model_spec)
|
|
1063
|
+
if existing and existing.path.exists():
|
|
1064
|
+
logger.info(f"Model already available: {model_spec}")
|
|
1065
|
+
return existing
|
|
1066
|
+
|
|
1067
|
+
# Define fallback order based on primary source
|
|
1068
|
+
if source == "huggingface":
|
|
1069
|
+
sources_to_try = ["huggingface", "ollama", "lmstudio"]
|
|
1070
|
+
elif source == "ollama":
|
|
1071
|
+
sources_to_try = ["ollama", "huggingface", "lmstudio"]
|
|
1072
|
+
elif source == "lmstudio":
|
|
1073
|
+
sources_to_try = ["lmstudio", "huggingface", "ollama"]
|
|
1074
|
+
else:
|
|
1075
|
+
sources_to_try = [source]
|
|
1076
|
+
|
|
1077
|
+
errors = []
|
|
1078
|
+
|
|
1079
|
+
for try_source in sources_to_try:
|
|
1080
|
+
try:
|
|
1081
|
+
model = self._download_from_source(
|
|
1082
|
+
model_spec, try_source, quantization, progress_callback
|
|
1083
|
+
)
|
|
1084
|
+
# Register the model
|
|
1085
|
+
self.registry.add(model)
|
|
1086
|
+
return model
|
|
1087
|
+
except Exception as e:
|
|
1088
|
+
error_msg = f"{try_source}: {e}"
|
|
1089
|
+
errors.append(error_msg)
|
|
1090
|
+
logger.warning(f"Failed to download from {try_source}: {e}")
|
|
1091
|
+
continue
|
|
1092
|
+
|
|
1093
|
+
# All sources failed
|
|
1094
|
+
raise RuntimeError(
|
|
1095
|
+
f"Failed to download '{model_spec}' from all sources:\n" +
|
|
1096
|
+
"\n".join(f" - {err}" for err in errors)
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
def _download_from_source(
|
|
1100
|
+
self,
|
|
1101
|
+
model_spec: str,
|
|
1102
|
+
source: str,
|
|
1103
|
+
quantization: Optional[str],
|
|
1104
|
+
progress_callback: Optional[ProgressCallback],
|
|
1105
|
+
) -> ModelInfo:
|
|
1106
|
+
"""Download from a specific source."""
|
|
1107
|
+
if source == "huggingface":
|
|
1108
|
+
return self.huggingface.download(model_spec, quantization, progress_callback)
|
|
1109
|
+
elif source == "ollama":
|
|
1110
|
+
# Convert model spec to Ollama format if needed
|
|
1111
|
+
ollama_name = self._convert_to_ollama_name(model_spec)
|
|
1112
|
+
return self.ollama.pull(ollama_name, progress_callback)
|
|
1113
|
+
elif source == "lmstudio":
|
|
1114
|
+
return self.lmstudio.import_model(model_spec)
|
|
1115
|
+
else:
|
|
1116
|
+
raise ValueError(f"Unknown source: {source}")
|
|
1117
|
+
|
|
1118
|
+
def _convert_to_ollama_name(self, model_spec: str) -> str:
|
|
1119
|
+
"""Convert a model specification to Ollama format."""
|
|
1120
|
+
# Common mappings from shortcut to Ollama model name
|
|
1121
|
+
ollama_mappings = {
|
|
1122
|
+
"qwen2.5:0.5b": "qwen2.5:0.5b",
|
|
1123
|
+
"qwen2.5:1.5b": "qwen2.5:1.5b",
|
|
1124
|
+
"qwen2.5:3b": "qwen2.5:3b",
|
|
1125
|
+
"qwen2.5:7b": "qwen2.5:7b",
|
|
1126
|
+
"llama3.2:1b": "llama3.2:1b",
|
|
1127
|
+
"llama3.2:3b": "llama3.2:3b",
|
|
1128
|
+
"llama3.1:8b": "llama3.1:8b",
|
|
1129
|
+
"phi3:mini": "phi3:mini",
|
|
1130
|
+
"mistral:7b": "mistral:7b-instruct",
|
|
1131
|
+
"gemma2:2b": "gemma2:2b",
|
|
1132
|
+
"deepseek:1.5b": "deepseek-r1:1.5b",
|
|
1133
|
+
"deepseek:7b": "deepseek-r1:7b",
|
|
1134
|
+
}
|
|
1135
|
+
return ollama_mappings.get(model_spec, model_spec)
|
|
1136
|
+
|
|
1137
|
+
def _detect_source(self, model_spec: str) -> str:
|
|
1138
|
+
"""Auto-detect the source for a model specification."""
|
|
1139
|
+
# Check shortcuts first
|
|
1140
|
+
if model_spec in self.huggingface.POPULAR_MODELS:
|
|
1141
|
+
return "huggingface"
|
|
1142
|
+
|
|
1143
|
+
# Check prefixes
|
|
1144
|
+
if model_spec.startswith("hf:") or model_spec.startswith("huggingface:"):
|
|
1145
|
+
return "huggingface"
|
|
1146
|
+
if model_spec.startswith("ollama:"):
|
|
1147
|
+
return "ollama"
|
|
1148
|
+
if model_spec.startswith("lmstudio:"):
|
|
1149
|
+
return "lmstudio"
|
|
1150
|
+
|
|
1151
|
+
# Check if it looks like a HuggingFace repo
|
|
1152
|
+
if "/" in model_spec and ".gguf" in model_spec.lower():
|
|
1153
|
+
return "huggingface"
|
|
1154
|
+
|
|
1155
|
+
# Check if Ollama has it
|
|
1156
|
+
if self.ollama.is_available():
|
|
1157
|
+
return "ollama"
|
|
1158
|
+
|
|
1159
|
+
# Default to HuggingFace
|
|
1160
|
+
return "huggingface"
|
|
1161
|
+
|
|
1162
|
+
def list_models(self, source: Optional[str] = None) -> list[ModelInfo]:
|
|
1163
|
+
"""
|
|
1164
|
+
List all downloaded models.
|
|
1165
|
+
|
|
1166
|
+
Args:
|
|
1167
|
+
source: Filter by source (optional)
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
List of ModelInfo objects
|
|
1171
|
+
"""
|
|
1172
|
+
source_enum = ModelSource(source) if source else None
|
|
1173
|
+
return self.registry.list(source_enum)
|
|
1174
|
+
|
|
1175
|
+
def get_model_path(self, name: str) -> Optional[Path]:
|
|
1176
|
+
"""
|
|
1177
|
+
Get the path to a model for inference.
|
|
1178
|
+
|
|
1179
|
+
Args:
|
|
1180
|
+
name: Model name (e.g., "qwen2.5:7b" or "ollama:qwen2.5:7b")
|
|
1181
|
+
|
|
1182
|
+
Returns:
|
|
1183
|
+
Path to model file, or None if not found
|
|
1184
|
+
"""
|
|
1185
|
+
# Try exact name first
|
|
1186
|
+
model = self.registry.get(name)
|
|
1187
|
+
|
|
1188
|
+
# Try with ollama: prefix
|
|
1189
|
+
if not model:
|
|
1190
|
+
model = self.registry.get(f"ollama:{name}")
|
|
1191
|
+
|
|
1192
|
+
# Try with hf: prefix
|
|
1193
|
+
if not model:
|
|
1194
|
+
model = self.registry.get(f"hf:{name}")
|
|
1195
|
+
|
|
1196
|
+
# Try searching by partial name match
|
|
1197
|
+
if not model:
|
|
1198
|
+
for registered_name, registered_model in self.registry._models.items():
|
|
1199
|
+
if name in registered_name or registered_name.endswith(name):
|
|
1200
|
+
model = registered_model
|
|
1201
|
+
break
|
|
1202
|
+
|
|
1203
|
+
# Check if model exists and return path
|
|
1204
|
+
if model and model.path.exists():
|
|
1205
|
+
# For symlinks, return the symlink path (not resolved)
|
|
1206
|
+
# This allows llama.cpp to load from symlink
|
|
1207
|
+
return model.path
|
|
1208
|
+
|
|
1209
|
+
# Try finding directly in ollama folder
|
|
1210
|
+
ollama_symlink = self.model_dir / "ollama" / f"{name.replace(':', '_')}.gguf"
|
|
1211
|
+
if ollama_symlink.exists():
|
|
1212
|
+
return ollama_symlink
|
|
1213
|
+
|
|
1214
|
+
return None
|
|
1215
|
+
|
|
1216
|
+
def remove_model(self, name: str, delete_files: bool = True) -> bool:
|
|
1217
|
+
"""
|
|
1218
|
+
Remove a model from the registry.
|
|
1219
|
+
|
|
1220
|
+
Args:
|
|
1221
|
+
name: Model name
|
|
1222
|
+
delete_files: Also delete the model files
|
|
1223
|
+
|
|
1224
|
+
Returns:
|
|
1225
|
+
True if model was removed
|
|
1226
|
+
"""
|
|
1227
|
+
model = self.registry.remove(name)
|
|
1228
|
+
|
|
1229
|
+
if model and delete_files:
|
|
1230
|
+
try:
|
|
1231
|
+
if model.path.exists():
|
|
1232
|
+
if model.path.is_dir():
|
|
1233
|
+
shutil.rmtree(model.path)
|
|
1234
|
+
else:
|
|
1235
|
+
model.path.unlink()
|
|
1236
|
+
logger.info(f"Deleted model files: {model.path}")
|
|
1237
|
+
except Exception as e:
|
|
1238
|
+
logger.error(f"Failed to delete model files: {e}")
|
|
1239
|
+
|
|
1240
|
+
return model is not None
|
|
1241
|
+
|
|
1242
|
+
def get_available_sources(self) -> dict[str, bool]:
|
|
1243
|
+
"""Get availability status of each source."""
|
|
1244
|
+
return {
|
|
1245
|
+
"huggingface": True, # Always available (uses urllib)
|
|
1246
|
+
"ollama": self.ollama.is_available(),
|
|
1247
|
+
"lmstudio": self.lmstudio.is_available(),
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
def scan_for_models(self) -> list[ModelInfo]:
|
|
1251
|
+
"""
|
|
1252
|
+
Scan for models that aren't in the registry.
|
|
1253
|
+
|
|
1254
|
+
Finds GGUF files in the model directory and LM Studio.
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
List of newly discovered models
|
|
1258
|
+
"""
|
|
1259
|
+
discovered = []
|
|
1260
|
+
|
|
1261
|
+
# Scan model directory for GGUF files
|
|
1262
|
+
search_paths = [self.model_dir]
|
|
1263
|
+
|
|
1264
|
+
# Add default HuggingFace Hub cache
|
|
1265
|
+
hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
|
|
1266
|
+
if hf_cache.exists():
|
|
1267
|
+
search_paths.append(hf_cache)
|
|
1268
|
+
|
|
1269
|
+
# Add default LM Studio cache (MacOS)
|
|
1270
|
+
lms_cache = Path.home() / ".cache" / "lm-studio" / "models"
|
|
1271
|
+
if lms_cache.exists():
|
|
1272
|
+
search_paths.append(lms_cache)
|
|
1273
|
+
|
|
1274
|
+
# Also check standard MacOS Application Support
|
|
1275
|
+
# (Where LM Studio often stores them by default on Mac)
|
|
1276
|
+
lms_app_support = Path.home() / "Library" / "Application Support" / "LM Studio" / "models"
|
|
1277
|
+
if lms_app_support.exists():
|
|
1278
|
+
search_paths.append(lms_app_support)
|
|
1279
|
+
|
|
1280
|
+
for base_path in search_paths:
|
|
1281
|
+
# Scan for GGUF and Transformers weights
|
|
1282
|
+
extensions = ["*.gguf", "*.safetensors", "pytorch_model.bin"]
|
|
1283
|
+
candidates = []
|
|
1284
|
+
for ext in extensions:
|
|
1285
|
+
candidates.extend(base_path.rglob(ext))
|
|
1286
|
+
|
|
1287
|
+
for model_file in candidates:
|
|
1288
|
+
if "blobs" in str(model_file): continue
|
|
1289
|
+
|
|
1290
|
+
# Check if already registered
|
|
1291
|
+
if self.registry.find_by_path(model_file):
|
|
1292
|
+
continue
|
|
1293
|
+
|
|
1294
|
+
try:
|
|
1295
|
+
size = model_file.stat().st_size
|
|
1296
|
+
|
|
1297
|
+
# Determine format/backend hint
|
|
1298
|
+
name_prefix = "local"
|
|
1299
|
+
fmt = ModelFormat.GGUF
|
|
1300
|
+
src = ModelSource.LOCAL
|
|
1301
|
+
|
|
1302
|
+
if model_file.suffix in [".safetensors", ".bin"]:
|
|
1303
|
+
fmt = ModelFormat.SAFETENSORS if model_file.suffix == ".safetensors" else ModelFormat.PYTORCH
|
|
1304
|
+
|
|
1305
|
+
# Extract repo name from HF cache path structure
|
|
1306
|
+
# Path looks like: .../models--Organization--Repo/snapshots/hash/model.safetensors
|
|
1307
|
+
repo_name = None
|
|
1308
|
+
for p in model_file.parts:
|
|
1309
|
+
if p.startswith("models--"):
|
|
1310
|
+
repo_name = p.replace("models--", "").replace("--", "/")
|
|
1311
|
+
break
|
|
1312
|
+
|
|
1313
|
+
if repo_name:
|
|
1314
|
+
name_prefix = repo_name
|
|
1315
|
+
else:
|
|
1316
|
+
name_prefix = "hf_local"
|
|
1317
|
+
|
|
1318
|
+
# Improve name if filename is generic
|
|
1319
|
+
stem = model_file.stem
|
|
1320
|
+
if stem in ["model", "pytorch_model", "adapter_model"] or stem.startswith("model-00"):
|
|
1321
|
+
# Use repo name if we have it, otherwise parent dir
|
|
1322
|
+
if name_prefix and name_prefix != "local" and name_prefix != "hf_local":
|
|
1323
|
+
stem = name_prefix.split("/")[-1] # Use just the model name part
|
|
1324
|
+
else:
|
|
1325
|
+
stem = model_file.parent.name
|
|
1326
|
+
|
|
1327
|
+
# Filter out non-generative models (BERT, encoders, etc.)
|
|
1328
|
+
# Also filter out MLX-quantized models (incompatible with transformers)
|
|
1329
|
+
# Skip models that are clearly not for text generation or incompatible
|
|
1330
|
+
skip_patterns = ["bert", "roberta", "albert", "electra", "deberta", "muril", "xlm-roberta"]
|
|
1331
|
+
model_name_lower = stem.lower()
|
|
1332
|
+
path_lower = str(model_file).lower()
|
|
1333
|
+
|
|
1334
|
+
# Skip non-generative models
|
|
1335
|
+
if any(pattern in model_name_lower for pattern in skip_patterns):
|
|
1336
|
+
continue
|
|
1337
|
+
|
|
1338
|
+
# Skip MLX models (they're incompatible with transformers backend)
|
|
1339
|
+
if "mlx" in path_lower:
|
|
1340
|
+
continue
|
|
1341
|
+
|
|
1342
|
+
model = ModelInfo(
|
|
1343
|
+
name=f"{name_prefix}:{stem}",
|
|
1344
|
+
source=src,
|
|
1345
|
+
format=fmt,
|
|
1346
|
+
path=model_file,
|
|
1347
|
+
size_bytes=size,
|
|
1348
|
+
)
|
|
1349
|
+
self.registry.add(model)
|
|
1350
|
+
discovered.append(model)
|
|
1351
|
+
except OSError:
|
|
1352
|
+
continue
|
|
1353
|
+
|
|
1354
|
+
# Scan LM Studio
|
|
1355
|
+
for lms_model in self.lmstudio.list_models():
|
|
1356
|
+
name = f"lmstudio:{lms_model['name']}"
|
|
1357
|
+
if not self.registry.get(name):
|
|
1358
|
+
model = ModelInfo(
|
|
1359
|
+
name=name,
|
|
1360
|
+
source=ModelSource.LMSTUDIO,
|
|
1361
|
+
format=ModelFormat.GGUF,
|
|
1362
|
+
path=lms_model["path"],
|
|
1363
|
+
size_bytes=lms_model["size_bytes"],
|
|
1364
|
+
)
|
|
1365
|
+
self.registry.add(model)
|
|
1366
|
+
discovered.append(model)
|
|
1367
|
+
|
|
1368
|
+
# Reconcile Ollama
|
|
1369
|
+
try:
|
|
1370
|
+
live_ollama = {m['name']: m for m in self.ollama.list_models()}
|
|
1371
|
+
live_names = set(live_ollama.keys())
|
|
1372
|
+
|
|
1373
|
+
# Prune stale
|
|
1374
|
+
for existing in self.registry.list(ModelSource.OLLAMA):
|
|
1375
|
+
# Check if existing.metadata['ollama_name'] is in live_names
|
|
1376
|
+
# Or try to parse from name
|
|
1377
|
+
o_name = existing.name.replace("ollama:", "")
|
|
1378
|
+
if o_name not in live_names:
|
|
1379
|
+
# Stale entry, remove it
|
|
1380
|
+
self.registry.remove(existing.name)
|
|
1381
|
+
# Also try to remove the JSON proxy file if we created it
|
|
1382
|
+
if existing.path and existing.path.suffix == ".json" and "ollama" in str(existing.path):
|
|
1383
|
+
try: existing.path.unlink()
|
|
1384
|
+
except OSError: pass
|
|
1385
|
+
|
|
1386
|
+
# Add new/current
|
|
1387
|
+
for name, o_model in live_ollama.items():
|
|
1388
|
+
reg_name = f"ollama:{name}"
|
|
1389
|
+
if not self.registry.get(reg_name):
|
|
1390
|
+
ref_file = self.ollama.ollama_dir / f"{name.replace(':', '_')}.json"
|
|
1391
|
+
if not ref_file.exists():
|
|
1392
|
+
try:
|
|
1393
|
+
with open(ref_file, "w") as f:
|
|
1394
|
+
json.dump({
|
|
1395
|
+
"name": name,
|
|
1396
|
+
"source": "ollama",
|
|
1397
|
+
"auto_discovered": True
|
|
1398
|
+
}, f)
|
|
1399
|
+
except OSError: continue
|
|
1400
|
+
|
|
1401
|
+
model = ModelInfo(
|
|
1402
|
+
name=reg_name,
|
|
1403
|
+
source=ModelSource.OLLAMA,
|
|
1404
|
+
format=ModelFormat.OLLAMA,
|
|
1405
|
+
path=ref_file,
|
|
1406
|
+
size_bytes=0, # Unknown
|
|
1407
|
+
downloaded_at=datetime.now()
|
|
1408
|
+
)
|
|
1409
|
+
self.registry.add(model)
|
|
1410
|
+
discovered.append(model)
|
|
1411
|
+
except Exception as e:
|
|
1412
|
+
logger.warning(f"Ollama reconciliation failed: {e}")
|
|
1413
|
+
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
return discovered
|
|
1417
|
+
|
|
1418
|
+
|
|
1419
|
+
# =============================================================================
|
|
1420
|
+
# CLI Helper Functions
|
|
1421
|
+
# =============================================================================
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def print_progress(progress: DownloadProgress) -> None:
|
|
1425
|
+
"""Print download progress to terminal with in-place update."""
|
|
1426
|
+
bar_width = 40
|
|
1427
|
+
filled = int(bar_width * progress.percentage / 100)
|
|
1428
|
+
bar = "=" * filled + "-" * (bar_width - filled)
|
|
1429
|
+
|
|
1430
|
+
# Build the progress line
|
|
1431
|
+
line = (
|
|
1432
|
+
f"[{bar}] {progress.percentage:.1f}% "
|
|
1433
|
+
f"({progress.downloaded_bytes / 1024 / 1024:.1f}MB) "
|
|
1434
|
+
f"{progress.speed_human}"
|
|
1435
|
+
)
|
|
1436
|
+
|
|
1437
|
+
# Clear line and write progress (use ANSI escape to clear to end of line)
|
|
1438
|
+
sys.stdout.write(f"\r\033[K{line}")
|
|
1439
|
+
sys.stdout.flush()
|
|
1440
|
+
|
|
1441
|
+
if progress.percentage >= 100:
|
|
1442
|
+
sys.stdout.write("\n") # Newline when done
|
|
1443
|
+
sys.stdout.flush()
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
def interactive_download(manager: ModelManager) -> Optional[ModelInfo]:
|
|
1447
|
+
"""Interactive model download wizard."""
|
|
1448
|
+
print("\n=== Parishad Model Download ===\n")
|
|
1449
|
+
|
|
1450
|
+
# Show available sources
|
|
1451
|
+
sources = manager.get_available_sources()
|
|
1452
|
+
print("Available sources:")
|
|
1453
|
+
print(f" 1. HuggingFace (GGUF models) - {'✓ Available' if sources['huggingface'] else '✗ Not available'}")
|
|
1454
|
+
print(f" 2. Ollama - {'✓ Available' if sources['ollama'] else '✗ Not installed'}")
|
|
1455
|
+
print(f" 3. LM Studio - {'✓ Available' if sources['lmstudio'] else '✗ Not found'}")
|
|
1456
|
+
|
|
1457
|
+
print("\nSelect source (1-3): ", end="")
|
|
1458
|
+
choice = input().strip()
|
|
1459
|
+
|
|
1460
|
+
if choice == "1":
|
|
1461
|
+
return _download_from_huggingface(manager)
|
|
1462
|
+
elif choice == "2":
|
|
1463
|
+
return _download_from_ollama(manager)
|
|
1464
|
+
elif choice == "3":
|
|
1465
|
+
return _download_from_lmstudio(manager)
|
|
1466
|
+
else:
|
|
1467
|
+
print("Invalid choice")
|
|
1468
|
+
return None
|
|
1469
|
+
|
|
1470
|
+
|
|
1471
|
+
def _download_from_huggingface(manager: ModelManager) -> Optional[ModelInfo]:
|
|
1472
|
+
"""Interactive HuggingFace download."""
|
|
1473
|
+
print("\nPopular models:")
|
|
1474
|
+
models = list(manager.huggingface.POPULAR_MODELS.keys())
|
|
1475
|
+
for i, name in enumerate(models, 1):
|
|
1476
|
+
print(f" {i}. {name}")
|
|
1477
|
+
print(f" {len(models) + 1}. Custom (enter repo/filename)")
|
|
1478
|
+
|
|
1479
|
+
print("\nSelect model: ", end="")
|
|
1480
|
+
choice = input().strip()
|
|
1481
|
+
|
|
1482
|
+
try:
|
|
1483
|
+
idx = int(choice) - 1
|
|
1484
|
+
if 0 <= idx < len(models):
|
|
1485
|
+
model_spec = models[idx]
|
|
1486
|
+
elif idx == len(models):
|
|
1487
|
+
print("Enter HuggingFace repo/filename: ", end="")
|
|
1488
|
+
model_spec = input().strip()
|
|
1489
|
+
else:
|
|
1490
|
+
print("Invalid choice")
|
|
1491
|
+
return None
|
|
1492
|
+
except ValueError:
|
|
1493
|
+
# Treat as model name directly
|
|
1494
|
+
model_spec = choice
|
|
1495
|
+
|
|
1496
|
+
print(f"\nDownloading {model_spec}...")
|
|
1497
|
+
return manager.download(model_spec, source="huggingface", progress_callback=print_progress)
|
|
1498
|
+
|
|
1499
|
+
|
|
1500
|
+
def _download_from_ollama(manager: ModelManager) -> Optional[ModelInfo]:
|
|
1501
|
+
"""Interactive Ollama download."""
|
|
1502
|
+
if not manager.ollama.is_available():
|
|
1503
|
+
print("\nOllama is not installed. Install from: https://ollama.ai")
|
|
1504
|
+
return None
|
|
1505
|
+
|
|
1506
|
+
# Show existing models
|
|
1507
|
+
existing = manager.ollama.list_models()
|
|
1508
|
+
if existing:
|
|
1509
|
+
print("\nModels already in Ollama:")
|
|
1510
|
+
for m in existing:
|
|
1511
|
+
print(f" - {m['name']} ({m['size']})")
|
|
1512
|
+
|
|
1513
|
+
print("\nEnter model name to pull (e.g., 'llama3.2:1b', 'qwen2.5:0.5b'): ", end="")
|
|
1514
|
+
model_name = input().strip()
|
|
1515
|
+
|
|
1516
|
+
if not model_name:
|
|
1517
|
+
return None
|
|
1518
|
+
|
|
1519
|
+
print(f"\nPulling {model_name} via Ollama...")
|
|
1520
|
+
return manager.download(model_name, source="ollama")
|
|
1521
|
+
|
|
1522
|
+
|
|
1523
|
+
def _download_from_lmstudio(manager: ModelManager) -> Optional[ModelInfo]:
|
|
1524
|
+
"""Interactive LM Studio import."""
|
|
1525
|
+
if not manager.lmstudio.is_available():
|
|
1526
|
+
print("\nLM Studio models directory not found.")
|
|
1527
|
+
print("Expected locations:")
|
|
1528
|
+
print(f" - {LMSTUDIO_MODELS_DIR}")
|
|
1529
|
+
for path in LMSTUDIO_ALT_PATHS:
|
|
1530
|
+
print(f" - {path}")
|
|
1531
|
+
return None
|
|
1532
|
+
|
|
1533
|
+
models = manager.lmstudio.list_models()
|
|
1534
|
+
|
|
1535
|
+
if not models:
|
|
1536
|
+
print("\nNo GGUF models found in LM Studio.")
|
|
1537
|
+
return None
|
|
1538
|
+
|
|
1539
|
+
print("\nModels in LM Studio:")
|
|
1540
|
+
for i, m in enumerate(models, 1):
|
|
1541
|
+
size_mb = m["size_bytes"] / 1024 / 1024
|
|
1542
|
+
print(f" {i}. {m['name']} ({size_mb:.1f} MB)")
|
|
1543
|
+
|
|
1544
|
+
print("\nSelect model to import: ", end="")
|
|
1545
|
+
choice = input().strip()
|
|
1546
|
+
|
|
1547
|
+
try:
|
|
1548
|
+
idx = int(choice) - 1
|
|
1549
|
+
if 0 <= idx < len(models):
|
|
1550
|
+
model = models[idx]
|
|
1551
|
+
print(f"\nImporting {model['name']}...")
|
|
1552
|
+
return manager.download(model["name"], source="lmstudio")
|
|
1553
|
+
except ValueError:
|
|
1554
|
+
pass
|
|
1555
|
+
|
|
1556
|
+
print("Invalid choice")
|
|
1557
|
+
return None
|