lemonade-sdk 8.1.11__py3-none-any.whl → 8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/common/status.py +4 -4
- lemonade/common/system_info.py +0 -26
- lemonade/tools/bench.py +22 -1
- lemonade/tools/flm/utils.py +70 -22
- lemonade/tools/llamacpp/bench.py +111 -23
- lemonade/tools/llamacpp/load.py +30 -2
- lemonade/tools/llamacpp/utils.py +234 -15
- lemonade/tools/oga/bench.py +0 -26
- lemonade/tools/oga/load.py +38 -142
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/report/table.py +6 -0
- lemonade/tools/server/flm.py +2 -6
- lemonade/tools/server/llamacpp.py +20 -1
- lemonade/tools/server/serve.py +335 -17
- lemonade/tools/server/static/js/models.js +416 -18
- lemonade/tools/server/static/js/shared.js +44 -6
- lemonade/tools/server/static/logs.html +29 -19
- lemonade/tools/server/static/styles.css +204 -0
- lemonade/tools/server/static/webapp.html +32 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +33 -579
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/METADATA +5 -3
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/RECORD +32 -31
- lemonade_server/cli.py +10 -0
- lemonade_server/model_manager.py +172 -11
- lemonade_server/server_models.json +102 -66
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.0.dist-info}/top_level.txt +0 -0
lemonade/tools/server/serve.py
CHANGED
|
@@ -7,12 +7,12 @@ import logging
|
|
|
7
7
|
import platform
|
|
8
8
|
import tempfile
|
|
9
9
|
import traceback
|
|
10
|
-
from typing import Optional, Union
|
|
10
|
+
from typing import Optional, Union, List
|
|
11
11
|
import json
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
import os
|
|
14
|
-
|
|
15
|
-
from fastapi import FastAPI, HTTPException, status, Request, WebSocket
|
|
14
|
+
import shutil
|
|
15
|
+
from fastapi import FastAPI, HTTPException, status, Request, WebSocket, Form, UploadFile
|
|
16
16
|
from fastapi.responses import StreamingResponse
|
|
17
17
|
from fastapi.middleware.cors import CORSMiddleware
|
|
18
18
|
from fastapi.staticfiles import StaticFiles
|
|
@@ -83,10 +83,31 @@ if platform.system() in ["Windows", "Darwin"]:
|
|
|
83
83
|
from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
class
|
|
86
|
+
class ServerLogFilter(logging.Filter):
|
|
87
|
+
def __init__(self, server):
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.server = server
|
|
90
|
+
self.noisy_paths = {
|
|
91
|
+
"/api/v1/health",
|
|
92
|
+
"/api/v0/health",
|
|
93
|
+
"/api/v1/models",
|
|
94
|
+
"/api/v0/models",
|
|
95
|
+
}
|
|
96
|
+
|
|
87
97
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
88
|
-
|
|
89
|
-
|
|
98
|
+
msg = record.getMessage()
|
|
99
|
+
|
|
100
|
+
# Filter out websocket logs
|
|
101
|
+
if "> TEXT" in msg:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
# Filter out noisy HTTP routes if debug logs are OFF
|
|
105
|
+
if not self.server.debug_logging_enabled:
|
|
106
|
+
if any(path in msg for path in self.noisy_paths):
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
# Otherwise, allow the log
|
|
110
|
+
return True
|
|
90
111
|
|
|
91
112
|
|
|
92
113
|
async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
|
|
@@ -94,7 +115,7 @@ async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
|
|
|
94
115
|
await websocket.accept()
|
|
95
116
|
try:
|
|
96
117
|
with open(path, "r", encoding="utf-8") as f:
|
|
97
|
-
f.seek(0
|
|
118
|
+
f.seek(0) # start at the beginning of the file
|
|
98
119
|
while True:
|
|
99
120
|
# Try reading a line
|
|
100
121
|
line = f.readline()
|
|
@@ -325,6 +346,7 @@ class Server:
|
|
|
325
346
|
self.app.post(f"{prefix}/responses")(self.responses)
|
|
326
347
|
self.app.post(f"{prefix}/log-level")(self.set_log_level)
|
|
327
348
|
self.app.websocket(f"{prefix}/logs/ws")(self.logs_ws)
|
|
349
|
+
self.app.post(f"{prefix}/add-local-model")(self.add_local_model)
|
|
328
350
|
|
|
329
351
|
# OpenAI-compatible routes
|
|
330
352
|
self.app.post(f"{prefix}/chat/completions")(self.chat_completions)
|
|
@@ -336,6 +358,186 @@ class Server:
|
|
|
336
358
|
self.app.post(f"{prefix}/reranking")(self.reranking)
|
|
337
359
|
self.app.post(f"{prefix}/rerank")(self.reranking)
|
|
338
360
|
|
|
361
|
+
# Migration routes
|
|
362
|
+
self.app.get(f"{prefix}/migration/incompatible-models")(
|
|
363
|
+
self.get_incompatible_models
|
|
364
|
+
)
|
|
365
|
+
self.app.post(f"{prefix}/migration/cleanup")(
|
|
366
|
+
self.cleanup_incompatible_models
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
async def add_local_model(
|
|
370
|
+
self,
|
|
371
|
+
model_name: str = Form(...),
|
|
372
|
+
checkpoint: str = Form(""),
|
|
373
|
+
recipe: str = Form(...),
|
|
374
|
+
reasoning: bool = Form(False),
|
|
375
|
+
vision: bool = Form(False),
|
|
376
|
+
mmproj: str = Form(None),
|
|
377
|
+
model_files: List[UploadFile] = None,
|
|
378
|
+
):
|
|
379
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
380
|
+
from lemonade.tools.llamacpp.utils import parse_checkpoint
|
|
381
|
+
|
|
382
|
+
# Upload and register a local model from files.
|
|
383
|
+
try:
|
|
384
|
+
if not model_files:
|
|
385
|
+
raise HTTPException(
|
|
386
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
387
|
+
detail="No model files provided for upload",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
if not model_name.startswith("user."):
|
|
391
|
+
raise HTTPException(
|
|
392
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
393
|
+
detail="Model name must start with 'user.'",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
valid_recipes = ["llamacpp", "oga-npu", "oga-hybrid", "oga-cpu"]
|
|
397
|
+
if recipe not in valid_recipes:
|
|
398
|
+
raise HTTPException(
|
|
399
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
400
|
+
detail=f"Invalid recipe. Must be one of: {', '.join(valid_recipes)}",
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
if recipe == "llamacpp" and not any(
|
|
404
|
+
f.filename.lower().endswith(".gguf") for f in model_files
|
|
405
|
+
):
|
|
406
|
+
raise HTTPException(
|
|
407
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
408
|
+
detail="At least one .gguf file is required for llamacpp",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Check if model name already exists
|
|
412
|
+
if model_name in ModelManager().supported_models:
|
|
413
|
+
raise HTTPException(
|
|
414
|
+
status_code=status.HTTP_409_CONFLICT,
|
|
415
|
+
detail=(
|
|
416
|
+
f"Model name '{model_name}' already exists. "
|
|
417
|
+
"Please use a different name."
|
|
418
|
+
),
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
model_name_clean = model_name.replace("user.", "")
|
|
422
|
+
|
|
423
|
+
# Files are saved to models--{model_name_clean}
|
|
424
|
+
# Note: This is based on the user's custom model name, NOT the checkpoint field
|
|
425
|
+
repo_cache_name = model_name_clean.replace("/", "--")
|
|
426
|
+
snapshot_path = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
|
|
427
|
+
os.makedirs(snapshot_path, exist_ok=True)
|
|
428
|
+
|
|
429
|
+
# Extract variant from checkpoint field if provided
|
|
430
|
+
# checkpoint field format: "folder:variant" or just "folder"
|
|
431
|
+
variant = None
|
|
432
|
+
if checkpoint and ":" in checkpoint:
|
|
433
|
+
_, variant = parse_checkpoint(checkpoint)
|
|
434
|
+
# variant now contains just the variant[can be with or without the
|
|
435
|
+
# .gguf extension] filename (e.g., "LFM2-VL-1.6B-F16 or LFM2-VL-1.6B-F16.gguf")
|
|
436
|
+
|
|
437
|
+
# Save uploaded files, preserving folder structure
|
|
438
|
+
for file in model_files:
|
|
439
|
+
relative_path = file.filename
|
|
440
|
+
path_parts = relative_path.split("/")
|
|
441
|
+
|
|
442
|
+
if len(path_parts) > 1:
|
|
443
|
+
internal_path = "/".join(path_parts[1:])
|
|
444
|
+
file_path = os.path.join(snapshot_path, internal_path)
|
|
445
|
+
else:
|
|
446
|
+
file_path = os.path.join(snapshot_path, path_parts[0])
|
|
447
|
+
|
|
448
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
449
|
+
with open(file_path, "wb") as f:
|
|
450
|
+
content = await file.read()
|
|
451
|
+
f.write(content)
|
|
452
|
+
|
|
453
|
+
# Resolve actual file paths after upload (for faster loading later)
|
|
454
|
+
resolved_checkpoint = None
|
|
455
|
+
resolved_mmproj = None
|
|
456
|
+
|
|
457
|
+
# For OGA models, find genai_config.json
|
|
458
|
+
if recipe.startswith("oga-"):
|
|
459
|
+
for root, _, files in os.walk(snapshot_path):
|
|
460
|
+
if "genai_config.json" in files:
|
|
461
|
+
resolved_checkpoint = root
|
|
462
|
+
break
|
|
463
|
+
if not resolved_checkpoint:
|
|
464
|
+
resolved_checkpoint = snapshot_path
|
|
465
|
+
|
|
466
|
+
# For llamacpp models, find the GGUF file
|
|
467
|
+
elif recipe == "llamacpp":
|
|
468
|
+
gguf_file_found = None
|
|
469
|
+
|
|
470
|
+
# If variant is specified, look for that specific file
|
|
471
|
+
if variant:
|
|
472
|
+
search_term = (
|
|
473
|
+
variant if variant.endswith(".gguf") else f"{variant}.gguf"
|
|
474
|
+
)
|
|
475
|
+
for root, _, files in os.walk(snapshot_path):
|
|
476
|
+
if search_term in files:
|
|
477
|
+
gguf_file_found = os.path.join(root, search_term)
|
|
478
|
+
break
|
|
479
|
+
|
|
480
|
+
# If no variant or variant not found, search for any .gguf file (excluding mmproj)
|
|
481
|
+
if not gguf_file_found:
|
|
482
|
+
for root, _, files in os.walk(snapshot_path):
|
|
483
|
+
gguf_files = [
|
|
484
|
+
f
|
|
485
|
+
for f in files
|
|
486
|
+
if f.endswith(".gguf") and "mmproj" not in f.lower()
|
|
487
|
+
]
|
|
488
|
+
if gguf_files:
|
|
489
|
+
gguf_file_found = os.path.join(root, gguf_files[0])
|
|
490
|
+
break
|
|
491
|
+
|
|
492
|
+
resolved_checkpoint = (
|
|
493
|
+
gguf_file_found if gguf_file_found else snapshot_path
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Search for mmproj file if provided
|
|
497
|
+
if mmproj:
|
|
498
|
+
for root, _, files in os.walk(snapshot_path):
|
|
499
|
+
if mmproj in files:
|
|
500
|
+
resolved_mmproj = os.path.join(root, mmproj)
|
|
501
|
+
break
|
|
502
|
+
|
|
503
|
+
# Build checkpoint for registration
|
|
504
|
+
# For llamacpp with resolved path, store the full path relative to HF_HUB_CACHE
|
|
505
|
+
if resolved_checkpoint:
|
|
506
|
+
# Store as relative path from HF_HUB_CACHE for portability
|
|
507
|
+
checkpoint_to_register = os.path.relpath(
|
|
508
|
+
resolved_checkpoint, HF_HUB_CACHE
|
|
509
|
+
)
|
|
510
|
+
elif variant:
|
|
511
|
+
checkpoint_to_register = f"models--{repo_cache_name}:{variant}"
|
|
512
|
+
else:
|
|
513
|
+
checkpoint_to_register = f"models--{repo_cache_name}"
|
|
514
|
+
|
|
515
|
+
# Register the model
|
|
516
|
+
ModelManager().register_local_model(
|
|
517
|
+
model_name=model_name,
|
|
518
|
+
checkpoint=checkpoint_to_register,
|
|
519
|
+
recipe=recipe,
|
|
520
|
+
reasoning=reasoning,
|
|
521
|
+
vision=vision,
|
|
522
|
+
mmproj=resolved_mmproj if resolved_mmproj else mmproj,
|
|
523
|
+
snapshot_path=snapshot_path,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Refresh local models
|
|
527
|
+
self.local_models = ModelManager().downloaded_models_enabled
|
|
528
|
+
|
|
529
|
+
return {
|
|
530
|
+
"status": "success",
|
|
531
|
+
"message": f"Model {model_name} uploaded and registered successfully",
|
|
532
|
+
}
|
|
533
|
+
except Exception as e:
|
|
534
|
+
if os.path.exists(checkpoint_to_register):
|
|
535
|
+
shutil.rmtree(checkpoint_to_register)
|
|
536
|
+
raise HTTPException(
|
|
537
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
538
|
+
detail=f"Failed to upload model: {str(e)}",
|
|
539
|
+
)
|
|
540
|
+
|
|
339
541
|
async def set_log_level(self, config: LogLevelConfig):
|
|
340
542
|
"""
|
|
341
543
|
Set the logging level of the server.
|
|
@@ -454,13 +656,13 @@ class Server:
|
|
|
454
656
|
)
|
|
455
657
|
file_handler.setLevel(logging_level)
|
|
456
658
|
file_handler.setFormatter(uvicorn_formatter)
|
|
457
|
-
file_handler.addFilter(
|
|
659
|
+
file_handler.addFilter(ServerLogFilter(self))
|
|
458
660
|
|
|
459
661
|
# Set up console handler
|
|
460
662
|
console_handler = logging.StreamHandler()
|
|
461
663
|
console_handler.setLevel(logging_level)
|
|
462
664
|
console_handler.setFormatter(uvicorn_formatter)
|
|
463
|
-
console_handler.addFilter(
|
|
665
|
+
console_handler.addFilter(ServerLogFilter(self))
|
|
464
666
|
|
|
465
667
|
# Configure root logger with both handlers
|
|
466
668
|
logging.basicConfig(
|
|
@@ -807,6 +1009,11 @@ class Server:
|
|
|
807
1009
|
|
|
808
1010
|
# Keep track of the full response for tool call extraction
|
|
809
1011
|
full_response = ""
|
|
1012
|
+
|
|
1013
|
+
# Track whether we're still in the thinking phase (before </think> tag)
|
|
1014
|
+
in_thinking_phase = self.llm_loaded.reasoning
|
|
1015
|
+
reasoning_buffer = "" # Accumulate reasoning tokens to detect </think>
|
|
1016
|
+
|
|
810
1017
|
try:
|
|
811
1018
|
async for token in self._generate_tokens(**generation_args):
|
|
812
1019
|
# Handle client disconnect: stop generation and exit
|
|
@@ -845,7 +1052,53 @@ class Server:
|
|
|
845
1052
|
)
|
|
846
1053
|
)
|
|
847
1054
|
|
|
848
|
-
# Create a ChatCompletionChunk
|
|
1055
|
+
# Create a ChatCompletionChunk with reasoning_content support
|
|
1056
|
+
# If we're in reasoning mode and haven't seen </think> yet,
|
|
1057
|
+
# send tokens as reasoning_content instead of content
|
|
1058
|
+
delta_content = None
|
|
1059
|
+
delta_reasoning = None
|
|
1060
|
+
|
|
1061
|
+
if reasoning_first_token:
|
|
1062
|
+
# First token - include opening tag in reasoning
|
|
1063
|
+
delta_reasoning = "<think>" + token
|
|
1064
|
+
reasoning_first_token = False
|
|
1065
|
+
reasoning_buffer = token
|
|
1066
|
+
elif in_thinking_phase:
|
|
1067
|
+
# Still in thinking phase - accumulate and check for </think>
|
|
1068
|
+
reasoning_buffer += token
|
|
1069
|
+
|
|
1070
|
+
# Check if we've seen the closing tag
|
|
1071
|
+
if "</think>" in reasoning_buffer:
|
|
1072
|
+
# Split at the closing tag
|
|
1073
|
+
before_close, after_close = reasoning_buffer.split(
|
|
1074
|
+
"</think>", 1
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
# Send everything before + closing tag as reasoning
|
|
1078
|
+
if before_close or not reasoning_buffer.startswith(
|
|
1079
|
+
"</think>"
|
|
1080
|
+
):
|
|
1081
|
+
delta_reasoning = before_close + "</think>"
|
|
1082
|
+
else:
|
|
1083
|
+
delta_reasoning = "</think>"
|
|
1084
|
+
|
|
1085
|
+
# Everything after goes to content (will be sent in next iteration)
|
|
1086
|
+
# For now, mark that we've exited thinking phase
|
|
1087
|
+
in_thinking_phase = False
|
|
1088
|
+
|
|
1089
|
+
# If there's content after </think>, we need to send it too
|
|
1090
|
+
# But we send it in the current chunk as regular content
|
|
1091
|
+
if after_close:
|
|
1092
|
+
# We have both reasoning and content in this token
|
|
1093
|
+
# Send reasoning first, content will accumulate
|
|
1094
|
+
delta_content = after_close
|
|
1095
|
+
else:
|
|
1096
|
+
# Still accumulating thinking, send as reasoning_content
|
|
1097
|
+
delta_reasoning = token
|
|
1098
|
+
else:
|
|
1099
|
+
# Normal content (after thinking phase ended)
|
|
1100
|
+
delta_content = token
|
|
1101
|
+
|
|
849
1102
|
chunk = ChatCompletionChunk.model_construct(
|
|
850
1103
|
id="0",
|
|
851
1104
|
object="chat.completion.chunk",
|
|
@@ -855,11 +1108,8 @@ class Server:
|
|
|
855
1108
|
Choice.model_construct(
|
|
856
1109
|
index=0,
|
|
857
1110
|
delta=ChoiceDelta(
|
|
858
|
-
content=
|
|
859
|
-
|
|
860
|
-
if reasoning_first_token
|
|
861
|
-
else token
|
|
862
|
-
),
|
|
1111
|
+
content=delta_content,
|
|
1112
|
+
reasoning_content=delta_reasoning,
|
|
863
1113
|
function_call=None,
|
|
864
1114
|
role="assistant",
|
|
865
1115
|
tool_calls=openai_tool_calls,
|
|
@@ -872,7 +1122,6 @@ class Server:
|
|
|
872
1122
|
)
|
|
873
1123
|
|
|
874
1124
|
# Format as SSE
|
|
875
|
-
reasoning_first_token = False
|
|
876
1125
|
yield f"data: {chunk.model_dump_json()}\n\n".encode("utf-8")
|
|
877
1126
|
|
|
878
1127
|
# Send the [DONE] marker only if still connected
|
|
@@ -1570,9 +1819,10 @@ class Server:
|
|
|
1570
1819
|
Load a registered LLM into system memory. Install the model first, if needed.
|
|
1571
1820
|
config: the information required to load the model
|
|
1572
1821
|
"""
|
|
1822
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
1823
|
+
|
|
1573
1824
|
try:
|
|
1574
1825
|
await self._load_lock.acquire()
|
|
1575
|
-
|
|
1576
1826
|
# Acquire all generate locks
|
|
1577
1827
|
for _ in range(self.max_concurrent_generations):
|
|
1578
1828
|
await self._generate_semaphore.acquire()
|
|
@@ -1597,6 +1847,38 @@ class Server:
|
|
|
1597
1847
|
# Get additional properties from the model registry
|
|
1598
1848
|
config_to_use = LoadConfig(**supported_models[config.model_name])
|
|
1599
1849
|
|
|
1850
|
+
# For locally uploaded models, convert the relative checkpoint path to absolute path
|
|
1851
|
+
model_source = supported_models.get(config.model_name, {}).get(
|
|
1852
|
+
"source", None
|
|
1853
|
+
)
|
|
1854
|
+
if (
|
|
1855
|
+
model_source == "local_upload"
|
|
1856
|
+
and config_to_use.checkpoint
|
|
1857
|
+
and not config_to_use.recipe.startswith("hf-")
|
|
1858
|
+
):
|
|
1859
|
+
# Check if checkpoint is a relative path (stored during upload)
|
|
1860
|
+
if not os.path.isabs(config_to_use.checkpoint):
|
|
1861
|
+
# Convert relative path to absolute by joining with HF_HUB_CACHE
|
|
1862
|
+
absolute_checkpoint = os.path.join(
|
|
1863
|
+
HF_HUB_CACHE, config_to_use.checkpoint
|
|
1864
|
+
)
|
|
1865
|
+
if os.path.exists(absolute_checkpoint):
|
|
1866
|
+
config_to_use.checkpoint = absolute_checkpoint
|
|
1867
|
+
else:
|
|
1868
|
+
logging.warning(
|
|
1869
|
+
f"Checkpoint path does not exist: {absolute_checkpoint}"
|
|
1870
|
+
)
|
|
1871
|
+
|
|
1872
|
+
# Also resolve mmproj path if present
|
|
1873
|
+
if config_to_use.mmproj and not os.path.isabs(config_to_use.mmproj):
|
|
1874
|
+
absolute_mmproj = os.path.join(HF_HUB_CACHE, config_to_use.mmproj)
|
|
1875
|
+
if os.path.exists(absolute_mmproj):
|
|
1876
|
+
config_to_use.mmproj = absolute_mmproj
|
|
1877
|
+
else:
|
|
1878
|
+
logging.warning(
|
|
1879
|
+
f"MMProj path does not exist: {absolute_mmproj}"
|
|
1880
|
+
)
|
|
1881
|
+
|
|
1600
1882
|
# Caching mechanism: if the checkpoint is already loaded there is nothing else to do
|
|
1601
1883
|
if (
|
|
1602
1884
|
self.llm_loaded
|
|
@@ -1782,6 +2064,42 @@ class Server:
|
|
|
1782
2064
|
return
|
|
1783
2065
|
await log_streamer(websocket, self.log_file)
|
|
1784
2066
|
|
|
2067
|
+
async def get_incompatible_models(self):
|
|
2068
|
+
"""
|
|
2069
|
+
Get information about incompatible RyzenAI models in the cache.
|
|
2070
|
+
"""
|
|
2071
|
+
try:
|
|
2072
|
+
return ModelManager().get_incompatible_ryzenai_models()
|
|
2073
|
+
except Exception as e:
|
|
2074
|
+
raise HTTPException(
|
|
2075
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
2076
|
+
detail=f"Failed to scan for incompatible models: {str(e)}",
|
|
2077
|
+
)
|
|
2078
|
+
|
|
2079
|
+
async def cleanup_incompatible_models(self, request: Request):
|
|
2080
|
+
"""
|
|
2081
|
+
Delete selected incompatible RyzenAI models from the cache.
|
|
2082
|
+
"""
|
|
2083
|
+
try:
|
|
2084
|
+
body = await request.json()
|
|
2085
|
+
model_paths = body.get("model_paths", [])
|
|
2086
|
+
|
|
2087
|
+
if not model_paths:
|
|
2088
|
+
raise HTTPException(
|
|
2089
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
2090
|
+
detail="No model_paths provided",
|
|
2091
|
+
)
|
|
2092
|
+
|
|
2093
|
+
result = ModelManager().cleanup_incompatible_models(model_paths)
|
|
2094
|
+
return result
|
|
2095
|
+
except HTTPException:
|
|
2096
|
+
raise
|
|
2097
|
+
except Exception as e:
|
|
2098
|
+
raise HTTPException(
|
|
2099
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
2100
|
+
detail=f"Failed to cleanup models: {str(e)}",
|
|
2101
|
+
)
|
|
2102
|
+
|
|
1785
2103
|
|
|
1786
2104
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
1787
2105
|
# Modifications Copyright (c) 2025 AMD
|