lollms-client 1.7.13__py3-none-any.whl → 1.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llama_cpp_server/__init__.py +191 -70
- lollms_client/lollms_discussion.py +170 -38
- lollms_client/lollms_mcp_binding.py +0 -1
- lollms_client/lollms_stt_binding.py +1 -1
- lollms_client/lollms_tti_binding.py +1 -1
- lollms_client/lollms_ttm_binding.py +1 -1
- lollms_client/lollms_tts_binding.py +1 -1
- lollms_client/lollms_ttv_binding.py +1 -1
- lollms_client/tti_bindings/open_router/__init__.py +341 -0
- {lollms_client-1.7.13.dist-info → lollms_client-1.8.3.dist-info}/METADATA +1 -1
- {lollms_client-1.7.13.dist-info → lollms_client-1.8.3.dist-info}/RECORD +15 -15
- lollms_client/llm_bindings/llamacpp/__init__.py +0 -1155
- {lollms_client-1.7.13.dist-info → lollms_client-1.8.3.dist-info}/WHEEL +0 -0
- {lollms_client-1.7.13.dist-info → lollms_client-1.8.3.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-1.7.13.dist-info → lollms_client-1.8.3.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
8
8
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
9
9
|
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
10
10
|
|
|
11
|
-
__version__ = "1.
|
|
11
|
+
__version__ = "1.8.3" # Updated version
|
|
12
12
|
|
|
13
13
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
14
14
|
__all__ = [
|
|
@@ -9,6 +9,7 @@ import platform
|
|
|
9
9
|
import zipfile
|
|
10
10
|
import tarfile
|
|
11
11
|
import json
|
|
12
|
+
import yaml
|
|
12
13
|
import atexit
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Optional, List, Dict, Any, Union, Callable
|
|
@@ -20,7 +21,7 @@ from lollms_client.lollms_types import MSG_TYPE
|
|
|
20
21
|
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
21
22
|
|
|
22
23
|
# Ensure dependencies
|
|
23
|
-
pm.ensure_packages(["openai", "huggingface_hub", "filelock", "requests", "tqdm", "psutil"])
|
|
24
|
+
pm.ensure_packages(["openai", "huggingface_hub", "filelock", "requests", "tqdm", "psutil", "pyyaml"])
|
|
24
25
|
import openai
|
|
25
26
|
from huggingface_hub import hf_hub_download
|
|
26
27
|
from filelock import FileLock
|
|
@@ -67,6 +68,9 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
67
68
|
self.bin_dir = self.binding_dir / "bin"
|
|
68
69
|
self.models_dir = Path(kwargs.get("models_path", "models/llama_cpp_models")).resolve()
|
|
69
70
|
|
|
71
|
+
# Multimodal Registry
|
|
72
|
+
self.mm_registry_path = self.models_dir / "multimodal_bindings.yaml"
|
|
73
|
+
|
|
70
74
|
# Registry directory for inter-process coordination
|
|
71
75
|
self.servers_dir = self.models_dir / "servers"
|
|
72
76
|
self.servers_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -265,6 +269,86 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
265
269
|
ASCIIColors.warning(f"Max active models ({self.max_active_models}) reached. Unloading LRU model: {model_to_kill}")
|
|
266
270
|
self._kill_server(model_to_kill, oldest_info)
|
|
267
271
|
|
|
272
|
+
def _load_mm_registry(self) -> Dict[str, str]:
|
|
273
|
+
if not self.mm_registry_path.exists():
|
|
274
|
+
return {}
|
|
275
|
+
try:
|
|
276
|
+
with open(self.mm_registry_path, 'r') as f:
|
|
277
|
+
registry = yaml.safe_load(f) or {}
|
|
278
|
+
|
|
279
|
+
# Self-healing: remove missing files
|
|
280
|
+
updated = False
|
|
281
|
+
to_remove = []
|
|
282
|
+
for m, p in registry.items():
|
|
283
|
+
if not (self.models_dir / m).exists() or not (self.models_dir / p).exists():
|
|
284
|
+
to_remove.append(m)
|
|
285
|
+
updated = True
|
|
286
|
+
|
|
287
|
+
for m in to_remove:
|
|
288
|
+
del registry[m]
|
|
289
|
+
|
|
290
|
+
if updated:
|
|
291
|
+
self._save_mm_registry(registry)
|
|
292
|
+
return registry
|
|
293
|
+
except Exception as e:
|
|
294
|
+
ASCIIColors.error(f"Failed to load multimodal registry: {e}")
|
|
295
|
+
return {}
|
|
296
|
+
|
|
297
|
+
def _save_mm_registry(self, registry: Dict[str, str]):
|
|
298
|
+
try:
|
|
299
|
+
with open(self.mm_registry_path, 'w') as f:
|
|
300
|
+
yaml.dump(registry, f)
|
|
301
|
+
except Exception as e:
|
|
302
|
+
ASCIIColors.error(f"Failed to save multimodal registry: {e}")
|
|
303
|
+
|
|
304
|
+
def bind_multimodal_model(self, model_name: str, mmproj_name: str) -> dict:
|
|
305
|
+
"""Explicitly binds a model to an mmproj file."""
|
|
306
|
+
if not (self.models_dir / model_name).exists():
|
|
307
|
+
return {"status": False, "error": f"Model {model_name} not found."}
|
|
308
|
+
if not (self.models_dir / mmproj_name).exists():
|
|
309
|
+
return {"status": False, "error": f"Projector {mmproj_name} not found."}
|
|
310
|
+
|
|
311
|
+
registry = self._load_mm_registry()
|
|
312
|
+
registry[model_name] = mmproj_name
|
|
313
|
+
self._save_mm_registry(registry)
|
|
314
|
+
|
|
315
|
+
ASCIIColors.success(f"Bound {model_name} with {mmproj_name}")
|
|
316
|
+
return {"status": True, "message": f"Bound {model_name} with {mmproj_name}"}
|
|
317
|
+
|
|
318
|
+
def _find_mmproj(self, model_path: Path) -> Optional[Path]:
|
|
319
|
+
"""Finds a corresponding mmproj file for a given model path."""
|
|
320
|
+
# 1. Check registry first
|
|
321
|
+
registry = self._load_mm_registry()
|
|
322
|
+
if model_path.name in registry:
|
|
323
|
+
proj_path = self.models_dir / registry[model_path.name]
|
|
324
|
+
if proj_path.exists():
|
|
325
|
+
return proj_path
|
|
326
|
+
|
|
327
|
+
# 2. Automatic detection patterns
|
|
328
|
+
stem = model_path.stem
|
|
329
|
+
clean_stem = re.sub(r'\.(Q\d_.*|f16|f32)$', '', stem)
|
|
330
|
+
patterns = [
|
|
331
|
+
f"{stem}.mmproj", f"{stem}-mmproj.gguf", f"{stem}.mmproj.gguf",
|
|
332
|
+
f"{clean_stem}.mmproj", f"{clean_stem}-mmproj.gguf",
|
|
333
|
+
f"mmproj-{stem}.gguf", "mmproj.gguf"
|
|
334
|
+
]
|
|
335
|
+
|
|
336
|
+
for p in patterns:
|
|
337
|
+
pot = model_path.parent / p
|
|
338
|
+
if pot.exists():
|
|
339
|
+
return pot
|
|
340
|
+
|
|
341
|
+
# 3. Last resort: simple scan
|
|
342
|
+
try:
|
|
343
|
+
for f in model_path.parent.iterdir():
|
|
344
|
+
if f.is_file() and "mmproj" in f.name.lower() and f.name != model_path.name:
|
|
345
|
+
if f.suffix in [".gguf", ".mmproj", ".bin"]:
|
|
346
|
+
return f
|
|
347
|
+
except:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
return None
|
|
351
|
+
|
|
268
352
|
def _spawn_server_detached(self, model_name: str):
|
|
269
353
|
"""Spawns the server process detached so it survives if this python script ends."""
|
|
270
354
|
exe_path = self._get_server_executable()
|
|
@@ -286,6 +370,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
286
370
|
"--batch-size", str(self.batch_size),
|
|
287
371
|
"--embedding"
|
|
288
372
|
]
|
|
373
|
+
|
|
374
|
+
# Automatic detection or Registry-based mmproj
|
|
375
|
+
mmproj_path = self._find_mmproj(model_path)
|
|
376
|
+
if mmproj_path:
|
|
377
|
+
ASCIIColors.info(f"Detected multimodal projector: {mmproj_path}")
|
|
378
|
+
cmd.extend(["--mmproj", str(mmproj_path)])
|
|
289
379
|
|
|
290
380
|
if self.n_threads:
|
|
291
381
|
cmd.extend(["--threads", str(self.n_threads)])
|
|
@@ -306,14 +396,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
306
396
|
**kwargs
|
|
307
397
|
)
|
|
308
398
|
|
|
309
|
-
# Wait for health check
|
|
399
|
+
# Wait for health check
|
|
310
400
|
url = f"http://{self.host}:{port}/v1"
|
|
311
401
|
start_time = time.time()
|
|
312
|
-
# Increased timeout to 120s for larger models
|
|
313
402
|
while time.time() - start_time < 120:
|
|
314
403
|
try:
|
|
315
404
|
res = requests.get(f"{url}/models", timeout=1)
|
|
316
|
-
# STRICTLY check for 200, as 503 means loading
|
|
317
405
|
if res.status_code == 200:
|
|
318
406
|
return proc.pid, port, url
|
|
319
407
|
except:
|
|
@@ -321,29 +409,21 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
321
409
|
|
|
322
410
|
if proc.poll() is not None:
|
|
323
411
|
raise RuntimeError(f"Server process exited immediately with code {proc.returncode}")
|
|
324
|
-
|
|
325
412
|
time.sleep(0.5)
|
|
326
413
|
|
|
327
|
-
# Timeout
|
|
328
414
|
proc.terminate()
|
|
329
|
-
raise TimeoutError(f"Server for {model_name} failed to become responsive
|
|
330
|
-
|
|
415
|
+
raise TimeoutError(f"Server for {model_name} failed to become responsive.")
|
|
331
416
|
|
|
332
417
|
def load_model(self, model_name: str) -> bool:
|
|
333
|
-
"""
|
|
334
|
-
Thread-safe and Process-safe model loading.
|
|
335
|
-
"""
|
|
418
|
+
"""Thread-safe and Process-safe model loading."""
|
|
336
419
|
if not self.global_lock_path.parent.exists():
|
|
337
420
|
self.global_lock_path.parent.mkdir(parents=True)
|
|
338
421
|
|
|
339
422
|
lock = FileLock(str(self.global_lock_path))
|
|
340
|
-
|
|
341
423
|
try:
|
|
342
424
|
with lock.acquire(timeout=60):
|
|
343
425
|
info = self._get_server_info(model_name)
|
|
344
|
-
|
|
345
426
|
if info:
|
|
346
|
-
# Update heartbeat
|
|
347
427
|
try:
|
|
348
428
|
self._get_registry_file(model_name).touch()
|
|
349
429
|
except:
|
|
@@ -357,16 +437,11 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
357
437
|
reg_file = self._get_registry_file(model_name)
|
|
358
438
|
with open(reg_file, 'w') as f:
|
|
359
439
|
json.dump({
|
|
360
|
-
"model_name": model_name,
|
|
361
|
-
"pid": pid,
|
|
362
|
-
"port": port,
|
|
363
|
-
"url": url,
|
|
364
|
-
"started_at": time.time()
|
|
440
|
+
"model_name": model_name, "pid": pid, "port": port, "url": url, "started_at": time.time()
|
|
365
441
|
}, f)
|
|
366
442
|
|
|
367
443
|
self.model_name = model_name
|
|
368
444
|
return True
|
|
369
|
-
|
|
370
445
|
except Exception as e:
|
|
371
446
|
ASCIIColors.error(f"Error loading model {model_name}: {e}")
|
|
372
447
|
trace_exception(e)
|
|
@@ -376,9 +451,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
376
451
|
target_model = model_name or self.model_name
|
|
377
452
|
if not target_model:
|
|
378
453
|
raise ValueError("No model specified.")
|
|
379
|
-
|
|
380
454
|
info = self._get_server_info(target_model)
|
|
381
|
-
|
|
382
455
|
if not info:
|
|
383
456
|
if self.load_model(target_model):
|
|
384
457
|
info = self._get_server_info(target_model)
|
|
@@ -389,58 +462,41 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
389
462
|
self._get_registry_file(target_model).touch()
|
|
390
463
|
except:
|
|
391
464
|
pass
|
|
392
|
-
|
|
393
465
|
if not info:
|
|
394
466
|
raise RuntimeError(f"Model {target_model} failed to load.")
|
|
395
|
-
|
|
396
467
|
return openai.OpenAI(base_url=info['url'], api_key="sk-no-key-required")
|
|
397
468
|
|
|
398
469
|
def _execute_with_retry(self, func: Callable, *args, **kwargs):
|
|
399
|
-
|
|
400
|
-
Executes an API call with retries for 503 (Model Loading) errors.
|
|
401
|
-
"""
|
|
402
|
-
retries = 60 # Wait up to ~2 minutes
|
|
470
|
+
retries = 60
|
|
403
471
|
for i in range(retries):
|
|
404
472
|
try:
|
|
405
473
|
return func(*args, **kwargs)
|
|
406
474
|
except openai.InternalServerError as e:
|
|
407
|
-
# Catch 503 Loading model
|
|
408
475
|
if e.status_code == 503:
|
|
409
|
-
if i % 10 == 0:
|
|
476
|
+
if i % 10 == 0:
|
|
410
477
|
ASCIIColors.warning(f"Model is loading (503). Waiting... ({i+1}/{retries})")
|
|
411
478
|
time.sleep(2)
|
|
412
479
|
continue
|
|
413
480
|
raise e
|
|
414
481
|
except openai.APIConnectionError:
|
|
415
|
-
# Server might be briefly unreachable during heavy load or restart
|
|
416
482
|
if i % 10 == 0:
|
|
417
483
|
ASCIIColors.warning(f"Connection error. Waiting... ({i+1}/{retries})")
|
|
418
484
|
time.sleep(2)
|
|
419
485
|
continue
|
|
420
|
-
# Final attempt
|
|
421
486
|
return func(*args, **kwargs)
|
|
422
487
|
|
|
423
488
|
def generate_text(self, prompt: str, n_predict: int = None, stream: bool = False, **kwargs) -> Union[str, Dict]:
|
|
424
489
|
try:
|
|
425
490
|
client = self._get_client()
|
|
426
|
-
|
|
427
491
|
def do_gen():
|
|
428
492
|
return client.completions.create(
|
|
429
|
-
model=self.model_name,
|
|
430
|
-
prompt=prompt,
|
|
493
|
+
model=self.model_name, prompt=prompt,
|
|
431
494
|
max_tokens=n_predict if n_predict else 1024,
|
|
432
495
|
temperature=kwargs.get("temperature", 0.7),
|
|
433
|
-
top_p=kwargs.get("top_p", 0.9),
|
|
434
|
-
|
|
435
|
-
extra_body={
|
|
436
|
-
"top_k": kwargs.get("top_k", 40),
|
|
437
|
-
"repeat_penalty": kwargs.get("repeat_penalty", 1.1),
|
|
438
|
-
"n_predict": n_predict
|
|
439
|
-
}
|
|
496
|
+
top_p=kwargs.get("top_p", 0.9), stream=stream,
|
|
497
|
+
extra_body={"top_k": kwargs.get("top_k", 40), "repeat_penalty": kwargs.get("repeat_penalty", 1.1), "n_predict": n_predict}
|
|
440
498
|
)
|
|
441
|
-
|
|
442
499
|
completion = self._execute_with_retry(do_gen)
|
|
443
|
-
|
|
444
500
|
if stream:
|
|
445
501
|
full_text = ""
|
|
446
502
|
for chunk in completion:
|
|
@@ -460,22 +516,15 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
460
516
|
try:
|
|
461
517
|
client = self._get_client()
|
|
462
518
|
messages = discussion.export("openai_chat")
|
|
463
|
-
|
|
464
519
|
def do_chat():
|
|
465
520
|
return client.chat.completions.create(
|
|
466
|
-
model=self.model_name,
|
|
467
|
-
messages=messages,
|
|
521
|
+
model=self.model_name, messages=messages,
|
|
468
522
|
max_tokens=kwargs.get("n_predict", 1024),
|
|
469
523
|
temperature=kwargs.get("temperature", 0.7),
|
|
470
524
|
stream=kwargs.get("stream", False),
|
|
471
|
-
extra_body={
|
|
472
|
-
"top_k": kwargs.get("top_k", 40),
|
|
473
|
-
"repeat_penalty": kwargs.get("repeat_penalty", 1.1)
|
|
474
|
-
}
|
|
525
|
+
extra_body={"top_k": kwargs.get("top_k", 40), "repeat_penalty": kwargs.get("repeat_penalty", 1.1)}
|
|
475
526
|
)
|
|
476
|
-
|
|
477
527
|
response = self._execute_with_retry(do_chat)
|
|
478
|
-
|
|
479
528
|
if kwargs.get("stream", False):
|
|
480
529
|
full_text = ""
|
|
481
530
|
for chunk in response:
|
|
@@ -495,6 +544,10 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
495
544
|
models = []
|
|
496
545
|
if self.models_dir.exists():
|
|
497
546
|
for f in self.models_dir.glob("*.gguf"):
|
|
547
|
+
# Hide files explicitly containing 'mmproj' as they are not standalone models
|
|
548
|
+
if "mmproj" in f.name.lower():
|
|
549
|
+
continue
|
|
550
|
+
|
|
498
551
|
if re.search(r'-\d{5}-of-\d{5}\.gguf$', f.name):
|
|
499
552
|
if "00001-of-" not in f.name: continue
|
|
500
553
|
models.append({"model_name": f.name, "owned_by": "local", "created": time.ctime(f.stat().st_ctime), "size": f.stat().st_size})
|
|
@@ -510,20 +563,14 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
510
563
|
try:
|
|
511
564
|
client = self._get_client()
|
|
512
565
|
url = client.base_url
|
|
513
|
-
|
|
514
566
|
def do_tokenize():
|
|
515
|
-
# Llama-server specific endpoint
|
|
516
567
|
ep = f"{url}tokenize"
|
|
517
|
-
# Strip v1/ if present because tokenize is often at root in older llama-server,
|
|
518
|
-
# but in recent versions it might be under v1 or root. We try robustly.
|
|
519
568
|
res = requests.post(ep, json={"content": text})
|
|
520
569
|
if res.status_code == 404:
|
|
521
570
|
res = requests.post(str(url).replace("/v1/", "/tokenize"), json={"content": text})
|
|
522
|
-
|
|
523
571
|
if res.status_code == 503:
|
|
524
572
|
raise openai.InternalServerError("Loading model", response=res, body=None)
|
|
525
573
|
return res
|
|
526
|
-
|
|
527
574
|
res = self._execute_with_retry(do_tokenize)
|
|
528
575
|
if res.status_code == 200: return res.json().get("tokens", [])
|
|
529
576
|
except: pass
|
|
@@ -533,17 +580,14 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
533
580
|
try:
|
|
534
581
|
client = self._get_client()
|
|
535
582
|
url = client.base_url
|
|
536
|
-
|
|
537
583
|
def do_detokenize():
|
|
538
584
|
ep = f"{url}detokenize"
|
|
539
585
|
res = requests.post(ep, json={"tokens": tokens})
|
|
540
586
|
if res.status_code == 404:
|
|
541
587
|
res = requests.post(str(url).replace("/v1/", "/detokenize"), json={"tokens": tokens})
|
|
542
|
-
|
|
543
588
|
if res.status_code == 503:
|
|
544
589
|
raise openai.InternalServerError("Loading model", response=res, body=None)
|
|
545
590
|
return res
|
|
546
|
-
|
|
547
591
|
res = self._execute_with_retry(do_detokenize)
|
|
548
592
|
if res.status_code == 200: return res.json().get("content", "")
|
|
549
593
|
except: pass
|
|
@@ -557,22 +601,91 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
557
601
|
return client.embeddings.create(input=text, model=self.model_name)
|
|
558
602
|
res = self._execute_with_retry(do_embed)
|
|
559
603
|
return res.data[0].embedding
|
|
560
|
-
|
|
604
|
+
|
|
561
605
|
def get_zoo(self) -> List[Dict[str, Any]]:
|
|
562
606
|
return [
|
|
563
|
-
|
|
564
|
-
{
|
|
565
|
-
|
|
566
|
-
|
|
607
|
+
# Ministral 3: High-performance edge model (3B)
|
|
608
|
+
{
|
|
609
|
+
"name": "Ministral-3-3B-Instruct-2512-GGUF",
|
|
610
|
+
"description": "Mistral AI Ministral 3 3B Instruct (Bartowski Quant) - Efficient Edge Model",
|
|
611
|
+
"size": "2.2 GB (Q4_K_M)",
|
|
612
|
+
"type": "gguf",
|
|
613
|
+
"link": "bartowski/mistralai_Ministral-3-3B-Instruct-2512-GGUF",
|
|
614
|
+
"filename": "mistralai_Ministral-3-3B-Instruct-2512-Q4_K_M.gguf"
|
|
615
|
+
},
|
|
616
|
+
# Devstral 2 Mini: Agentic coding specialist (24B)
|
|
617
|
+
{
|
|
618
|
+
"name": "Devstral-Small-2-24B-Instruct-GGUF",
|
|
619
|
+
"description": "Mistral AI Devstral Small 2 24B Instruct (Bartowski Quant) - Coding Specialist",
|
|
620
|
+
"size": "14.8 GB (Q4_K_M)",
|
|
621
|
+
"type": "gguf",
|
|
622
|
+
"link": "bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF",
|
|
623
|
+
"filename": "mistralai_Devstral-Small-2-24B-Instruct-2512-Q4_K_M.gguf"
|
|
624
|
+
},
|
|
625
|
+
# Llama 4 Scout: Meta's efficient MoE (17B)
|
|
626
|
+
{
|
|
627
|
+
"name": "Llama-4-Scout-17B-Instruct-GGUF",
|
|
628
|
+
"description": "Meta Llama 4 Scout 17B Instruct (Bartowski Quant) - 16-Expert MoE",
|
|
629
|
+
"size": "11.2 GB (Q4_K_M)",
|
|
630
|
+
"type": "gguf",
|
|
631
|
+
"link": "bartowski/meta-llama_Llama-4-Scout-17B-16E-Instruct-old-GGUF",
|
|
632
|
+
"filename": "meta-llama_Llama-4-Scout-17B-16E-Instruct-Q4_K_M.gguf"
|
|
633
|
+
},
|
|
634
|
+
# Qwen 3 VL: Vision-Language with "Thinking" (32B)
|
|
635
|
+
{
|
|
636
|
+
"name": "Qwen3-VL-32B-Thinking-GGUF",
|
|
637
|
+
"description": "Qwen 3 VL 32B Thinking (Bartowski Quant) - Vision CoT Reasoning",
|
|
638
|
+
"size": "19.5 GB (Q4_K_M)",
|
|
639
|
+
"type": "gguf",
|
|
640
|
+
"link": "bartowski/Qwen_Qwen3-VL-32B-Thinking-GGUF",
|
|
641
|
+
"filename": "Qwen_Qwen3-VL-32B-Thinking-Q4_K_M.gguf"
|
|
642
|
+
},
|
|
643
|
+
# Qwen 3: Dense reasoning powerhouse (72B)
|
|
644
|
+
{
|
|
645
|
+
"name": "Qwen3-72B-Embiggened-GGUF",
|
|
646
|
+
"description": "Qwen 3 72B Embiggened (Bartowski Quant) - Enhanced Reasoning Dense Model",
|
|
647
|
+
"size": "43.1 GB (Q4_K_M)",
|
|
648
|
+
"type": "gguf",
|
|
649
|
+
"link": "bartowski/cognitivecomputations_Qwen3-72B-Embiggened-GGUF",
|
|
650
|
+
"filename": "Qwen3-72B-Embiggened-Q4_K_M.gguf"
|
|
651
|
+
},
|
|
652
|
+
# Devstral 2: Massive coding architecture (123B)
|
|
653
|
+
{
|
|
654
|
+
"name": "Devstral-2-123B-Instruct-GGUF",
|
|
655
|
+
"description": "Mistral AI Devstral 2 123B Instruct (Bartowski Quant) - Heavy Duty Coding",
|
|
656
|
+
"size": "71.4 GB (Q4_K_M)",
|
|
657
|
+
"type": "gguf",
|
|
658
|
+
"link": "bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF",
|
|
659
|
+
"filename": "Devstral-2-123B-Instruct-2512-Q4_K_M.gguf"
|
|
660
|
+
},
|
|
661
|
+
# ChatGPT OSS: Open weights rival (120B)
|
|
662
|
+
{
|
|
663
|
+
"name": "ChatGPT-OSS-120B-GGUF",
|
|
664
|
+
"description": "OpenAI GPT-OSS 120B (Bartowski Quant) - Open Weight Research Model",
|
|
665
|
+
"size": "69.8 GB (Q4_K_M)",
|
|
666
|
+
"type": "gguf",
|
|
667
|
+
"link": "bartowski/openai_gpt-oss-120b-GGUF",
|
|
668
|
+
"filename": "gpt-oss-120b-Q4_K_M.gguf"
|
|
669
|
+
},
|
|
670
|
+
# DeepSeek V3: The MoE Giant (671B Base / 37B Active)
|
|
671
|
+
{
|
|
672
|
+
"name": "DeepSeek-V3-0324-GGUF",
|
|
673
|
+
"description": "DeepSeek V3 0324 (Bartowski Quant) - 671B MoE",
|
|
674
|
+
"size": "365 GB (Q4_K_M)",
|
|
675
|
+
"type": "gguf",
|
|
676
|
+
"link": "bartowski/deepseek-ai_DeepSeek-V3-0324-GGUF",
|
|
677
|
+
"filename": "DeepSeek-V3-0324-Q4_K_M.gguf"
|
|
678
|
+
}
|
|
567
679
|
]
|
|
568
680
|
|
|
681
|
+
|
|
569
682
|
def download_from_zoo(self, index: int, progress_callback: Callable[[dict], None] = None) -> dict:
|
|
570
683
|
zoo = self.get_zoo();
|
|
571
684
|
if index < 0 or index >= len(zoo): return {"status": False, "message": "Index out of bounds"}
|
|
572
685
|
item = zoo[index]
|
|
573
|
-
return self.pull_model(item["link"], item.get("filename"), progress_callback)
|
|
686
|
+
return self.pull_model(item["link"], item.get("filename"), progress_callback=progress_callback)
|
|
574
687
|
|
|
575
|
-
def pull_model(self, repo_id: str, filename: str, progress_callback: Callable[[dict], None] = None) -> dict:
|
|
688
|
+
def pull_model(self, repo_id: str, filename: str, mmproj_repo_id: str = None, mmproj_filename: str = None, progress_callback: Callable[[dict], None] = None) -> dict:
|
|
576
689
|
try:
|
|
577
690
|
match = re.match(r"^(.*)-(\d{5})-of-(\d{5})\.gguf$", filename)
|
|
578
691
|
files = []
|
|
@@ -582,7 +695,6 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
582
695
|
for i in range(1, total + 1): files.append(f"{base}-{i:05d}-of-{total:05d}.gguf")
|
|
583
696
|
else:
|
|
584
697
|
files.append(filename)
|
|
585
|
-
|
|
586
698
|
paths = []
|
|
587
699
|
for f in files:
|
|
588
700
|
ASCIIColors.info(f"Downloading {f} from {repo_id}...")
|
|
@@ -591,7 +703,16 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
591
703
|
paths.append(p)
|
|
592
704
|
ASCIIColors.success(f"Downloaded {f}")
|
|
593
705
|
|
|
706
|
+
if mmproj_filename:
|
|
707
|
+
proj_repo = mmproj_repo_id if mmproj_repo_id else repo_id
|
|
708
|
+
ASCIIColors.info(f"Downloading mmproj {mmproj_filename} from {proj_repo}...")
|
|
709
|
+
hf_hub_download(repo_id=proj_repo, filename=mmproj_filename, local_dir=self.models_dir, local_dir_use_symlinks=False, resume_download=True)
|
|
710
|
+
ASCIIColors.success(f"Downloaded mmproj {mmproj_filename}")
|
|
711
|
+
# Automatically bind the model with its projector
|
|
712
|
+
self.bind_multimodal_model(filename, mmproj_filename)
|
|
713
|
+
|
|
594
714
|
msg = f"Successfully downloaded model: {filename}"
|
|
715
|
+
if mmproj_filename: msg += f" and bound with projector: {mmproj_filename}"
|
|
595
716
|
if progress_callback: progress_callback({"status": "success", "message": msg, "completed": 100, "total": 100})
|
|
596
717
|
return {"status": True, "message": msg, "path": paths[0]}
|
|
597
718
|
except Exception as e:
|