lemonade-sdk 8.1.11__py3-none-any.whl → 8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/common/status.py +4 -4
- lemonade/common/system_info.py +0 -26
- lemonade/tools/accuracy.py +143 -48
- lemonade/tools/adapter.py +6 -1
- lemonade/tools/bench.py +26 -8
- lemonade/tools/flm/utils.py +70 -22
- lemonade/tools/huggingface/bench.py +6 -1
- lemonade/tools/llamacpp/bench.py +146 -27
- lemonade/tools/llamacpp/load.py +30 -2
- lemonade/tools/llamacpp/utils.py +317 -21
- lemonade/tools/oga/bench.py +5 -26
- lemonade/tools/oga/load.py +49 -123
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/report/table.py +76 -8
- lemonade/tools/server/flm.py +2 -6
- lemonade/tools/server/llamacpp.py +43 -2
- lemonade/tools/server/serve.py +354 -18
- lemonade/tools/server/static/js/chat.js +15 -77
- lemonade/tools/server/static/js/model-settings.js +24 -3
- lemonade/tools/server/static/js/models.js +440 -37
- lemonade/tools/server/static/js/shared.js +61 -8
- lemonade/tools/server/static/logs.html +157 -13
- lemonade/tools/server/static/styles.css +204 -0
- lemonade/tools/server/static/webapp.html +39 -1
- lemonade/version.py +1 -1
- lemonade_install/install.py +33 -579
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +6 -4
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/RECORD +38 -37
- lemonade_server/cli.py +10 -0
- lemonade_server/model_manager.py +172 -11
- lemonade_server/pydantic_models.py +3 -0
- lemonade_server/server_models.json +102 -66
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
lemonade/tools/server/serve.py
CHANGED
|
@@ -7,12 +7,12 @@ import logging
|
|
|
7
7
|
import platform
|
|
8
8
|
import tempfile
|
|
9
9
|
import traceback
|
|
10
|
-
from typing import Optional, Union
|
|
10
|
+
from typing import Optional, Union, List
|
|
11
11
|
import json
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
import os
|
|
14
|
-
|
|
15
|
-
from fastapi import FastAPI, HTTPException, status, Request, WebSocket
|
|
14
|
+
import shutil
|
|
15
|
+
from fastapi import FastAPI, HTTPException, status, Request, WebSocket, Form, UploadFile
|
|
16
16
|
from fastapi.responses import StreamingResponse
|
|
17
17
|
from fastapi.middleware.cors import CORSMiddleware
|
|
18
18
|
from fastapi.staticfiles import StaticFiles
|
|
@@ -83,10 +83,31 @@ if platform.system() in ["Windows", "Darwin"]:
|
|
|
83
83
|
from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
class
|
|
86
|
+
class ServerLogFilter(logging.Filter):
|
|
87
|
+
def __init__(self, server):
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.server = server
|
|
90
|
+
self.noisy_paths = {
|
|
91
|
+
"/api/v1/health",
|
|
92
|
+
"/api/v0/health",
|
|
93
|
+
"/api/v1/models",
|
|
94
|
+
"/api/v0/models",
|
|
95
|
+
}
|
|
96
|
+
|
|
87
97
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
88
|
-
|
|
89
|
-
|
|
98
|
+
msg = record.getMessage()
|
|
99
|
+
|
|
100
|
+
# Filter out websocket logs
|
|
101
|
+
if "> TEXT" in msg:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
# Filter out noisy HTTP routes if debug logs are OFF
|
|
105
|
+
if not self.server.debug_logging_enabled:
|
|
106
|
+
if any(path in msg for path in self.noisy_paths):
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
# Otherwise, allow the log
|
|
110
|
+
return True
|
|
90
111
|
|
|
91
112
|
|
|
92
113
|
async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
|
|
@@ -94,7 +115,7 @@ async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
|
|
|
94
115
|
await websocket.accept()
|
|
95
116
|
try:
|
|
96
117
|
with open(path, "r", encoding="utf-8") as f:
|
|
97
|
-
f.seek(0
|
|
118
|
+
f.seek(0) # start at the beginning of the file
|
|
98
119
|
while True:
|
|
99
120
|
# Try reading a line
|
|
100
121
|
line = f.readline()
|
|
@@ -325,6 +346,7 @@ class Server:
|
|
|
325
346
|
self.app.post(f"{prefix}/responses")(self.responses)
|
|
326
347
|
self.app.post(f"{prefix}/log-level")(self.set_log_level)
|
|
327
348
|
self.app.websocket(f"{prefix}/logs/ws")(self.logs_ws)
|
|
349
|
+
self.app.post(f"{prefix}/add-local-model")(self.add_local_model)
|
|
328
350
|
|
|
329
351
|
# OpenAI-compatible routes
|
|
330
352
|
self.app.post(f"{prefix}/chat/completions")(self.chat_completions)
|
|
@@ -336,6 +358,186 @@ class Server:
|
|
|
336
358
|
self.app.post(f"{prefix}/reranking")(self.reranking)
|
|
337
359
|
self.app.post(f"{prefix}/rerank")(self.reranking)
|
|
338
360
|
|
|
361
|
+
# Migration routes
|
|
362
|
+
self.app.get(f"{prefix}/migration/incompatible-models")(
|
|
363
|
+
self.get_incompatible_models
|
|
364
|
+
)
|
|
365
|
+
self.app.post(f"{prefix}/migration/cleanup")(
|
|
366
|
+
self.cleanup_incompatible_models
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
async def add_local_model(
|
|
370
|
+
self,
|
|
371
|
+
model_name: str = Form(...),
|
|
372
|
+
checkpoint: str = Form(""),
|
|
373
|
+
recipe: str = Form(...),
|
|
374
|
+
reasoning: bool = Form(False),
|
|
375
|
+
vision: bool = Form(False),
|
|
376
|
+
mmproj: str = Form(None),
|
|
377
|
+
model_files: List[UploadFile] = None,
|
|
378
|
+
):
|
|
379
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
380
|
+
from lemonade.tools.llamacpp.utils import parse_checkpoint
|
|
381
|
+
|
|
382
|
+
# Upload and register a local model from files.
|
|
383
|
+
try:
|
|
384
|
+
if not model_files:
|
|
385
|
+
raise HTTPException(
|
|
386
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
387
|
+
detail="No model files provided for upload",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
if not model_name.startswith("user."):
|
|
391
|
+
raise HTTPException(
|
|
392
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
393
|
+
detail="Model name must start with 'user.'",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
valid_recipes = ["llamacpp", "oga-npu", "oga-hybrid", "oga-cpu"]
|
|
397
|
+
if recipe not in valid_recipes:
|
|
398
|
+
raise HTTPException(
|
|
399
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
400
|
+
detail=f"Invalid recipe. Must be one of: {', '.join(valid_recipes)}",
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
if recipe == "llamacpp" and not any(
|
|
404
|
+
f.filename.lower().endswith(".gguf") for f in model_files
|
|
405
|
+
):
|
|
406
|
+
raise HTTPException(
|
|
407
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
408
|
+
detail="At least one .gguf file is required for llamacpp",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Check if model name already exists
|
|
412
|
+
if model_name in ModelManager().supported_models:
|
|
413
|
+
raise HTTPException(
|
|
414
|
+
status_code=status.HTTP_409_CONFLICT,
|
|
415
|
+
detail=(
|
|
416
|
+
f"Model name '{model_name}' already exists. "
|
|
417
|
+
"Please use a different name."
|
|
418
|
+
),
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
model_name_clean = model_name.replace("user.", "")
|
|
422
|
+
|
|
423
|
+
# Files are saved to models--{model_name_clean}
|
|
424
|
+
# Note: This is based on the user's custom model name, NOT the checkpoint field
|
|
425
|
+
repo_cache_name = model_name_clean.replace("/", "--")
|
|
426
|
+
snapshot_path = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
|
|
427
|
+
os.makedirs(snapshot_path, exist_ok=True)
|
|
428
|
+
|
|
429
|
+
# Extract variant from checkpoint field if provided
|
|
430
|
+
# checkpoint field format: "folder:variant" or just "folder"
|
|
431
|
+
variant = None
|
|
432
|
+
if checkpoint and ":" in checkpoint:
|
|
433
|
+
_, variant = parse_checkpoint(checkpoint)
|
|
434
|
+
# variant now contains just the variant[can be with or without the
|
|
435
|
+
# .gguf extension] filename (e.g., "LFM2-VL-1.6B-F16 or LFM2-VL-1.6B-F16.gguf")
|
|
436
|
+
|
|
437
|
+
# Save uploaded files, preserving folder structure
|
|
438
|
+
for file in model_files:
|
|
439
|
+
relative_path = file.filename
|
|
440
|
+
path_parts = relative_path.split("/")
|
|
441
|
+
|
|
442
|
+
if len(path_parts) > 1:
|
|
443
|
+
internal_path = "/".join(path_parts[1:])
|
|
444
|
+
file_path = os.path.join(snapshot_path, internal_path)
|
|
445
|
+
else:
|
|
446
|
+
file_path = os.path.join(snapshot_path, path_parts[0])
|
|
447
|
+
|
|
448
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
449
|
+
with open(file_path, "wb") as f:
|
|
450
|
+
content = await file.read()
|
|
451
|
+
f.write(content)
|
|
452
|
+
|
|
453
|
+
# Resolve actual file paths after upload (for faster loading later)
|
|
454
|
+
resolved_checkpoint = None
|
|
455
|
+
resolved_mmproj = None
|
|
456
|
+
|
|
457
|
+
# For OGA models, find genai_config.json
|
|
458
|
+
if recipe.startswith("oga-"):
|
|
459
|
+
for root, _, files in os.walk(snapshot_path):
|
|
460
|
+
if "genai_config.json" in files:
|
|
461
|
+
resolved_checkpoint = root
|
|
462
|
+
break
|
|
463
|
+
if not resolved_checkpoint:
|
|
464
|
+
resolved_checkpoint = snapshot_path
|
|
465
|
+
|
|
466
|
+
# For llamacpp models, find the GGUF file
|
|
467
|
+
elif recipe == "llamacpp":
|
|
468
|
+
gguf_file_found = None
|
|
469
|
+
|
|
470
|
+
# If variant is specified, look for that specific file
|
|
471
|
+
if variant:
|
|
472
|
+
search_term = (
|
|
473
|
+
variant if variant.endswith(".gguf") else f"{variant}.gguf"
|
|
474
|
+
)
|
|
475
|
+
for root, _, files in os.walk(snapshot_path):
|
|
476
|
+
if search_term in files:
|
|
477
|
+
gguf_file_found = os.path.join(root, search_term)
|
|
478
|
+
break
|
|
479
|
+
|
|
480
|
+
# If no variant or variant not found, search for any .gguf file (excluding mmproj)
|
|
481
|
+
if not gguf_file_found:
|
|
482
|
+
for root, _, files in os.walk(snapshot_path):
|
|
483
|
+
gguf_files = [
|
|
484
|
+
f
|
|
485
|
+
for f in files
|
|
486
|
+
if f.endswith(".gguf") and "mmproj" not in f.lower()
|
|
487
|
+
]
|
|
488
|
+
if gguf_files:
|
|
489
|
+
gguf_file_found = os.path.join(root, gguf_files[0])
|
|
490
|
+
break
|
|
491
|
+
|
|
492
|
+
resolved_checkpoint = (
|
|
493
|
+
gguf_file_found if gguf_file_found else snapshot_path
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Search for mmproj file if provided
|
|
497
|
+
if mmproj:
|
|
498
|
+
for root, _, files in os.walk(snapshot_path):
|
|
499
|
+
if mmproj in files:
|
|
500
|
+
resolved_mmproj = os.path.join(root, mmproj)
|
|
501
|
+
break
|
|
502
|
+
|
|
503
|
+
# Build checkpoint for registration
|
|
504
|
+
# For llamacpp with resolved path, store the full path relative to HF_HUB_CACHE
|
|
505
|
+
if resolved_checkpoint:
|
|
506
|
+
# Store as relative path from HF_HUB_CACHE for portability
|
|
507
|
+
checkpoint_to_register = os.path.relpath(
|
|
508
|
+
resolved_checkpoint, HF_HUB_CACHE
|
|
509
|
+
)
|
|
510
|
+
elif variant:
|
|
511
|
+
checkpoint_to_register = f"models--{repo_cache_name}:{variant}"
|
|
512
|
+
else:
|
|
513
|
+
checkpoint_to_register = f"models--{repo_cache_name}"
|
|
514
|
+
|
|
515
|
+
# Register the model
|
|
516
|
+
ModelManager().register_local_model(
|
|
517
|
+
model_name=model_name,
|
|
518
|
+
checkpoint=checkpoint_to_register,
|
|
519
|
+
recipe=recipe,
|
|
520
|
+
reasoning=reasoning,
|
|
521
|
+
vision=vision,
|
|
522
|
+
mmproj=resolved_mmproj if resolved_mmproj else mmproj,
|
|
523
|
+
snapshot_path=snapshot_path,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Refresh local models
|
|
527
|
+
self.local_models = ModelManager().downloaded_models_enabled
|
|
528
|
+
|
|
529
|
+
return {
|
|
530
|
+
"status": "success",
|
|
531
|
+
"message": f"Model {model_name} uploaded and registered successfully",
|
|
532
|
+
}
|
|
533
|
+
except Exception as e:
|
|
534
|
+
if os.path.exists(checkpoint_to_register):
|
|
535
|
+
shutil.rmtree(checkpoint_to_register)
|
|
536
|
+
raise HTTPException(
|
|
537
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
538
|
+
detail=f"Failed to upload model: {str(e)}",
|
|
539
|
+
)
|
|
540
|
+
|
|
339
541
|
async def set_log_level(self, config: LogLevelConfig):
|
|
340
542
|
"""
|
|
341
543
|
Set the logging level of the server.
|
|
@@ -454,13 +656,13 @@ class Server:
|
|
|
454
656
|
)
|
|
455
657
|
file_handler.setLevel(logging_level)
|
|
456
658
|
file_handler.setFormatter(uvicorn_formatter)
|
|
457
|
-
file_handler.addFilter(
|
|
659
|
+
file_handler.addFilter(ServerLogFilter(self))
|
|
458
660
|
|
|
459
661
|
# Set up console handler
|
|
460
662
|
console_handler = logging.StreamHandler()
|
|
461
663
|
console_handler.setLevel(logging_level)
|
|
462
664
|
console_handler.setFormatter(uvicorn_formatter)
|
|
463
|
-
console_handler.addFilter(
|
|
665
|
+
console_handler.addFilter(ServerLogFilter(self))
|
|
464
666
|
|
|
465
667
|
# Configure root logger with both handlers
|
|
466
668
|
logging.basicConfig(
|
|
@@ -746,6 +948,23 @@ class Server:
|
|
|
746
948
|
await self.load_llm(lc)
|
|
747
949
|
|
|
748
950
|
if self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm":
|
|
951
|
+
if (
|
|
952
|
+
hasattr(chat_completion_request, "enable_thinking")
|
|
953
|
+
and chat_completion_request.enable_thinking is False
|
|
954
|
+
and "qwen3" in self.llm_loaded.model_name.lower()
|
|
955
|
+
):
|
|
956
|
+
|
|
957
|
+
# Modify the last user message to include /no_think
|
|
958
|
+
if chat_completion_request.messages:
|
|
959
|
+
for i in range(len(chat_completion_request.messages) - 1, -1, -1):
|
|
960
|
+
if chat_completion_request.messages[i].get("role") == "user":
|
|
961
|
+
original_content = chat_completion_request.messages[i][
|
|
962
|
+
"content"
|
|
963
|
+
]
|
|
964
|
+
chat_completion_request.messages[i][
|
|
965
|
+
"content"
|
|
966
|
+
] = f"/no_think\n{original_content}"
|
|
967
|
+
break
|
|
749
968
|
return self.wrapped_server.chat_completion(chat_completion_request)
|
|
750
969
|
|
|
751
970
|
# Convert chat messages to text using the model's chat template
|
|
@@ -807,6 +1026,11 @@ class Server:
|
|
|
807
1026
|
|
|
808
1027
|
# Keep track of the full response for tool call extraction
|
|
809
1028
|
full_response = ""
|
|
1029
|
+
|
|
1030
|
+
# Track whether we're still in the thinking phase (before </think> tag)
|
|
1031
|
+
in_thinking_phase = self.llm_loaded.reasoning
|
|
1032
|
+
reasoning_buffer = "" # Accumulate reasoning tokens to detect </think>
|
|
1033
|
+
|
|
810
1034
|
try:
|
|
811
1035
|
async for token in self._generate_tokens(**generation_args):
|
|
812
1036
|
# Handle client disconnect: stop generation and exit
|
|
@@ -845,7 +1069,53 @@ class Server:
|
|
|
845
1069
|
)
|
|
846
1070
|
)
|
|
847
1071
|
|
|
848
|
-
# Create a ChatCompletionChunk
|
|
1072
|
+
# Create a ChatCompletionChunk with reasoning_content support
|
|
1073
|
+
# If we're in reasoning mode and haven't seen </think> yet,
|
|
1074
|
+
# send tokens as reasoning_content instead of content
|
|
1075
|
+
delta_content = None
|
|
1076
|
+
delta_reasoning = None
|
|
1077
|
+
|
|
1078
|
+
if reasoning_first_token:
|
|
1079
|
+
# First token - include opening tag in reasoning
|
|
1080
|
+
delta_reasoning = "<think>" + token
|
|
1081
|
+
reasoning_first_token = False
|
|
1082
|
+
reasoning_buffer = token
|
|
1083
|
+
elif in_thinking_phase:
|
|
1084
|
+
# Still in thinking phase - accumulate and check for </think>
|
|
1085
|
+
reasoning_buffer += token
|
|
1086
|
+
|
|
1087
|
+
# Check if we've seen the closing tag
|
|
1088
|
+
if "</think>" in reasoning_buffer:
|
|
1089
|
+
# Split at the closing tag
|
|
1090
|
+
before_close, after_close = reasoning_buffer.split(
|
|
1091
|
+
"</think>", 1
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
# Send everything before + closing tag as reasoning
|
|
1095
|
+
if before_close or not reasoning_buffer.startswith(
|
|
1096
|
+
"</think>"
|
|
1097
|
+
):
|
|
1098
|
+
delta_reasoning = before_close + "</think>"
|
|
1099
|
+
else:
|
|
1100
|
+
delta_reasoning = "</think>"
|
|
1101
|
+
|
|
1102
|
+
# Everything after goes to content (will be sent in next iteration)
|
|
1103
|
+
# For now, mark that we've exited thinking phase
|
|
1104
|
+
in_thinking_phase = False
|
|
1105
|
+
|
|
1106
|
+
# If there's content after </think>, we need to send it too
|
|
1107
|
+
# But we send it in the current chunk as regular content
|
|
1108
|
+
if after_close:
|
|
1109
|
+
# We have both reasoning and content in this token
|
|
1110
|
+
# Send reasoning first, content will accumulate
|
|
1111
|
+
delta_content = after_close
|
|
1112
|
+
else:
|
|
1113
|
+
# Still accumulating thinking, send as reasoning_content
|
|
1114
|
+
delta_reasoning = token
|
|
1115
|
+
else:
|
|
1116
|
+
# Normal content (after thinking phase ended)
|
|
1117
|
+
delta_content = token
|
|
1118
|
+
|
|
849
1119
|
chunk = ChatCompletionChunk.model_construct(
|
|
850
1120
|
id="0",
|
|
851
1121
|
object="chat.completion.chunk",
|
|
@@ -855,11 +1125,8 @@ class Server:
|
|
|
855
1125
|
Choice.model_construct(
|
|
856
1126
|
index=0,
|
|
857
1127
|
delta=ChoiceDelta(
|
|
858
|
-
content=
|
|
859
|
-
|
|
860
|
-
if reasoning_first_token
|
|
861
|
-
else token
|
|
862
|
-
),
|
|
1128
|
+
content=delta_content,
|
|
1129
|
+
reasoning_content=delta_reasoning,
|
|
863
1130
|
function_call=None,
|
|
864
1131
|
role="assistant",
|
|
865
1132
|
tool_calls=openai_tool_calls,
|
|
@@ -872,7 +1139,6 @@ class Server:
|
|
|
872
1139
|
)
|
|
873
1140
|
|
|
874
1141
|
# Format as SSE
|
|
875
|
-
reasoning_first_token = False
|
|
876
1142
|
yield f"data: {chunk.model_dump_json()}\n\n".encode("utf-8")
|
|
877
1143
|
|
|
878
1144
|
# Send the [DONE] marker only if still connected
|
|
@@ -1125,9 +1391,10 @@ class Server:
|
|
|
1125
1391
|
"<think>" + token if reasoning_first_token else token
|
|
1126
1392
|
),
|
|
1127
1393
|
item_id="0 ",
|
|
1394
|
+
logprobs=[],
|
|
1128
1395
|
output_index=0,
|
|
1129
|
-
type="response.output_text.delta",
|
|
1130
1396
|
sequence_number=0,
|
|
1397
|
+
type="response.output_text.delta",
|
|
1131
1398
|
)
|
|
1132
1399
|
full_response += token
|
|
1133
1400
|
|
|
@@ -1570,9 +1837,10 @@ class Server:
|
|
|
1570
1837
|
Load a registered LLM into system memory. Install the model first, if needed.
|
|
1571
1838
|
config: the information required to load the model
|
|
1572
1839
|
"""
|
|
1840
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
1841
|
+
|
|
1573
1842
|
try:
|
|
1574
1843
|
await self._load_lock.acquire()
|
|
1575
|
-
|
|
1576
1844
|
# Acquire all generate locks
|
|
1577
1845
|
for _ in range(self.max_concurrent_generations):
|
|
1578
1846
|
await self._generate_semaphore.acquire()
|
|
@@ -1597,6 +1865,38 @@ class Server:
|
|
|
1597
1865
|
# Get additional properties from the model registry
|
|
1598
1866
|
config_to_use = LoadConfig(**supported_models[config.model_name])
|
|
1599
1867
|
|
|
1868
|
+
# For locally uploaded models, convert the relative checkpoint path to absolute path
|
|
1869
|
+
model_source = supported_models.get(config.model_name, {}).get(
|
|
1870
|
+
"source", None
|
|
1871
|
+
)
|
|
1872
|
+
if (
|
|
1873
|
+
model_source == "local_upload"
|
|
1874
|
+
and config_to_use.checkpoint
|
|
1875
|
+
and not config_to_use.recipe.startswith("hf-")
|
|
1876
|
+
):
|
|
1877
|
+
# Check if checkpoint is a relative path (stored during upload)
|
|
1878
|
+
if not os.path.isabs(config_to_use.checkpoint):
|
|
1879
|
+
# Convert relative path to absolute by joining with HF_HUB_CACHE
|
|
1880
|
+
absolute_checkpoint = os.path.join(
|
|
1881
|
+
HF_HUB_CACHE, config_to_use.checkpoint
|
|
1882
|
+
)
|
|
1883
|
+
if os.path.exists(absolute_checkpoint):
|
|
1884
|
+
config_to_use.checkpoint = absolute_checkpoint
|
|
1885
|
+
else:
|
|
1886
|
+
logging.warning(
|
|
1887
|
+
f"Checkpoint path does not exist: {absolute_checkpoint}"
|
|
1888
|
+
)
|
|
1889
|
+
|
|
1890
|
+
# Also resolve mmproj path if present
|
|
1891
|
+
if config_to_use.mmproj and not os.path.isabs(config_to_use.mmproj):
|
|
1892
|
+
absolute_mmproj = os.path.join(HF_HUB_CACHE, config_to_use.mmproj)
|
|
1893
|
+
if os.path.exists(absolute_mmproj):
|
|
1894
|
+
config_to_use.mmproj = absolute_mmproj
|
|
1895
|
+
else:
|
|
1896
|
+
logging.warning(
|
|
1897
|
+
f"MMProj path does not exist: {absolute_mmproj}"
|
|
1898
|
+
)
|
|
1899
|
+
|
|
1600
1900
|
# Caching mechanism: if the checkpoint is already loaded there is nothing else to do
|
|
1601
1901
|
if (
|
|
1602
1902
|
self.llm_loaded
|
|
@@ -1782,6 +2082,42 @@ class Server:
|
|
|
1782
2082
|
return
|
|
1783
2083
|
await log_streamer(websocket, self.log_file)
|
|
1784
2084
|
|
|
2085
|
+
async def get_incompatible_models(self):
|
|
2086
|
+
"""
|
|
2087
|
+
Get information about incompatible RyzenAI models in the cache.
|
|
2088
|
+
"""
|
|
2089
|
+
try:
|
|
2090
|
+
return ModelManager().get_incompatible_ryzenai_models()
|
|
2091
|
+
except Exception as e:
|
|
2092
|
+
raise HTTPException(
|
|
2093
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
2094
|
+
detail=f"Failed to scan for incompatible models: {str(e)}",
|
|
2095
|
+
)
|
|
2096
|
+
|
|
2097
|
+
async def cleanup_incompatible_models(self, request: Request):
|
|
2098
|
+
"""
|
|
2099
|
+
Delete selected incompatible RyzenAI models from the cache.
|
|
2100
|
+
"""
|
|
2101
|
+
try:
|
|
2102
|
+
body = await request.json()
|
|
2103
|
+
model_paths = body.get("model_paths", [])
|
|
2104
|
+
|
|
2105
|
+
if not model_paths:
|
|
2106
|
+
raise HTTPException(
|
|
2107
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
2108
|
+
detail="No model_paths provided",
|
|
2109
|
+
)
|
|
2110
|
+
|
|
2111
|
+
result = ModelManager().cleanup_incompatible_models(model_paths)
|
|
2112
|
+
return result
|
|
2113
|
+
except HTTPException:
|
|
2114
|
+
raise
|
|
2115
|
+
except Exception as e:
|
|
2116
|
+
raise HTTPException(
|
|
2117
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
2118
|
+
detail=f"Failed to cleanup models: {str(e)}",
|
|
2119
|
+
)
|
|
2120
|
+
|
|
1785
2121
|
|
|
1786
2122
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
1787
2123
|
# Modifications Copyright (c) 2025 AMD
|
|
@@ -4,9 +4,6 @@ let attachedFiles = [];
|
|
|
4
4
|
let systemMessageElement = null;
|
|
5
5
|
let abortController = null;
|
|
6
6
|
|
|
7
|
-
// Default model configuration
|
|
8
|
-
const DEFAULT_MODEL = 'Qwen2.5-0.5B-Instruct-CPU';
|
|
9
|
-
|
|
10
7
|
const THINKING_ANIM_INTERVAL_MS = 550;
|
|
11
8
|
// Toggle this to false if you prefer plain dots only.
|
|
12
9
|
const THINKING_USE_LEMON = true;
|
|
@@ -165,17 +162,22 @@ async function handleModelSelectChange() {
|
|
|
165
162
|
loadingOption.hidden = true;
|
|
166
163
|
select.appendChild(loadingOption);
|
|
167
164
|
}
|
|
165
|
+
// Gray out send button during loading
|
|
166
|
+
updateAttachmentButtonState();
|
|
168
167
|
},
|
|
169
168
|
onLoadingEnd: (modelId, success) => {
|
|
170
169
|
// Reset the default option text
|
|
171
170
|
const defaultOption = modelSelect.querySelector('option[value=""]');
|
|
172
171
|
if (defaultOption) defaultOption.textContent = 'Click to select a model ▼';
|
|
172
|
+
// Update button state after loading completes
|
|
173
|
+
updateAttachmentButtonState();
|
|
173
174
|
},
|
|
174
175
|
onSuccess: () => {
|
|
175
176
|
updateAttachmentButtonState();
|
|
176
177
|
},
|
|
177
178
|
onError: () => {
|
|
178
179
|
updateModelSelectValue();
|
|
180
|
+
updateAttachmentButtonState();
|
|
179
181
|
}
|
|
180
182
|
});
|
|
181
183
|
}
|
|
@@ -192,7 +194,8 @@ function updateAttachmentButtonState() {
|
|
|
192
194
|
toggleBtn.disabled = false;
|
|
193
195
|
toggleBtn.textContent = 'Stop';
|
|
194
196
|
} else {
|
|
195
|
-
|
|
197
|
+
// Gray out send button if no model is loaded or if loading
|
|
198
|
+
toggleBtn.disabled = loading || !currentLoadedModel;
|
|
196
199
|
toggleBtn.textContent = 'Send';
|
|
197
200
|
}
|
|
198
201
|
}
|
|
@@ -225,43 +228,6 @@ window.updateAttachmentButtonState = updateAttachmentButtonState;
|
|
|
225
228
|
// Make displaySystemMessage accessible globally
|
|
226
229
|
window.displaySystemMessage = displaySystemMessage;
|
|
227
230
|
|
|
228
|
-
// Auto-load default model and send message
|
|
229
|
-
async function autoLoadDefaultModelAndSend() {
|
|
230
|
-
// Check if default model is available and installed
|
|
231
|
-
if (!window.SERVER_MODELS || !window.SERVER_MODELS[DEFAULT_MODEL]) {
|
|
232
|
-
showErrorBanner('No models available. Please install a model first.');
|
|
233
|
-
return;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
if (!window.installedModels || !window.installedModels.has(DEFAULT_MODEL)) {
|
|
237
|
-
showErrorBanner('Default model is not installed. Please install it from the Model Management tab.');
|
|
238
|
-
return;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// Store the message to send after loading
|
|
242
|
-
const messageToSend = chatInput.value.trim();
|
|
243
|
-
if (!messageToSend && attachedFiles.length === 0) return;
|
|
244
|
-
|
|
245
|
-
// Use the standardized load function
|
|
246
|
-
const success = await loadModelStandardized(DEFAULT_MODEL, {
|
|
247
|
-
// Custom UI updates for auto-loading
|
|
248
|
-
onLoadingStart: () => {
|
|
249
|
-
if (toggleBtn) {
|
|
250
|
-
toggleBtn.disabled = true;
|
|
251
|
-
toggleBtn.textContent = 'Send';
|
|
252
|
-
}
|
|
253
|
-
},
|
|
254
|
-
// Reset send button state
|
|
255
|
-
onLoadingEnd: () => { updateAttachmentButtonState(); },
|
|
256
|
-
// Send the message after successful load
|
|
257
|
-
onSuccess: () => { sendMessage(messageToSend); },
|
|
258
|
-
onError: (error) => {
|
|
259
|
-
console.error('Error auto-loading default model:', error);
|
|
260
|
-
showErrorBanner('Failed to load model: ' + error.message);
|
|
261
|
-
}
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
|
|
265
231
|
// Check if model supports vision and update attachment button
|
|
266
232
|
function checkCurrentModel() {
|
|
267
233
|
if (attachedFiles.length > 0 && currentLoadedModel && !isVisionModel(currentLoadedModel)) {
|
|
@@ -320,13 +286,11 @@ function handleChatInputKeydown(e) {
|
|
|
320
286
|
clearAttachments();
|
|
321
287
|
} else if (e.key === 'Enter' && !e.shiftKey) {
|
|
322
288
|
e.preventDefault();
|
|
323
|
-
//
|
|
289
|
+
// Only send if we have a loaded model
|
|
324
290
|
if (currentLoadedModel && modelSelect.value !== '' && !modelSelect.disabled) {
|
|
325
291
|
sendMessage();
|
|
326
|
-
} else if (!currentLoadedModel) {
|
|
327
|
-
// Auto-load default model and send
|
|
328
|
-
autoLoadDefaultModelAndSend();
|
|
329
292
|
}
|
|
293
|
+
// Otherwise do nothing - button is grayed out
|
|
330
294
|
}
|
|
331
295
|
}
|
|
332
296
|
|
|
@@ -860,39 +824,13 @@ async function sendMessage(existingTextIfAny) {
|
|
|
860
824
|
systemMessageElement = null;
|
|
861
825
|
}
|
|
862
826
|
|
|
863
|
-
// Check if a model is loaded
|
|
827
|
+
// Check if a model is loaded
|
|
864
828
|
if (!currentLoadedModel) {
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
const loadingBubble = appendMessage('system', 'Loading default model, please wait...');
|
|
871
|
-
|
|
872
|
-
// Load the default model
|
|
873
|
-
await httpRequest(getServerBaseUrl() + '/api/v1/load', {
|
|
874
|
-
method: 'POST',
|
|
875
|
-
headers: { 'Content-Type': 'application/json' },
|
|
876
|
-
body: JSON.stringify({ model_name: DEFAULT_MODEL })
|
|
877
|
-
});
|
|
878
|
-
|
|
879
|
-
// Update model status
|
|
880
|
-
await updateModelStatusIndicator();
|
|
881
|
-
|
|
882
|
-
// Remove loading message
|
|
883
|
-
loadingBubble.parentElement.remove();
|
|
884
|
-
|
|
885
|
-
// Show success message briefly
|
|
886
|
-
const successBubble = appendMessage('system', `Loaded ${DEFAULT_MODEL} successfully!`);
|
|
887
|
-
setTimeout(() => { successBubble.parentElement.remove(); }, 2000);
|
|
888
|
-
} catch (error) {
|
|
889
|
-
alert('Please load a model first before sending messages.');
|
|
890
|
-
return;
|
|
891
|
-
}
|
|
892
|
-
} else {
|
|
893
|
-
alert('Please load a model first before sending messages.');
|
|
894
|
-
return;
|
|
895
|
-
}
|
|
829
|
+
alert('Please load a model first before sending messages.');
|
|
830
|
+
abortController = null;
|
|
831
|
+
isStreaming = false;
|
|
832
|
+
updateAttachmentButtonState();
|
|
833
|
+
return;
|
|
896
834
|
}
|
|
897
835
|
|
|
898
836
|
// Check if trying to send images to non-vision model
|