lemonade-sdk 8.1.10__py3-none-any.whl → 8.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/common/status.py +4 -4
- lemonade/tools/bench.py +22 -1
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +255 -0
- lemonade/tools/llamacpp/bench.py +111 -23
- lemonade/tools/llamacpp/load.py +20 -1
- lemonade/tools/llamacpp/utils.py +210 -17
- lemonade/tools/oga/bench.py +0 -26
- lemonade/tools/report/table.py +6 -0
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +23 -5
- lemonade/tools/server/serve.py +260 -135
- lemonade/tools/server/static/js/chat.js +165 -82
- lemonade/tools/server/static/js/models.js +87 -54
- lemonade/tools/server/static/js/shared.js +9 -6
- lemonade/tools/server/static/logs.html +57 -0
- lemonade/tools/server/static/styles.css +159 -8
- lemonade/tools/server/static/webapp.html +28 -10
- lemonade/tools/server/tray.py +94 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +91 -25
- lemonade/version.py +1 -1
- lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/METADATA +10 -6
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/RECORD +37 -32
- lemonade_server/cli.py +103 -14
- lemonade_server/model_manager.py +186 -45
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +175 -62
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/top_level.txt +0 -0
lemonade_server/cli.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
import os
|
|
4
|
+
import platform
|
|
4
5
|
from typing import Tuple, Optional
|
|
5
6
|
import psutil
|
|
6
7
|
from typing import List
|
|
@@ -104,12 +105,34 @@ def serve(
|
|
|
104
105
|
max_wait_time = 30
|
|
105
106
|
wait_interval = 0.5
|
|
106
107
|
waited = 0
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
108
|
+
|
|
109
|
+
if platform.system() == "Darwin":
|
|
110
|
+
# On macOS, use direct HTTP health check instead of process scanning for better
|
|
111
|
+
# performance
|
|
112
|
+
import requests
|
|
113
|
+
|
|
114
|
+
while waited < max_wait_time:
|
|
115
|
+
time.sleep(wait_interval)
|
|
116
|
+
try:
|
|
117
|
+
response = requests.get(
|
|
118
|
+
f"http://{host}:{port}/api/v1/health", timeout=1
|
|
119
|
+
)
|
|
120
|
+
if response.status_code == 200:
|
|
121
|
+
break
|
|
122
|
+
except (
|
|
123
|
+
requests.exceptions.ConnectionError,
|
|
124
|
+
requests.exceptions.Timeout,
|
|
125
|
+
):
|
|
126
|
+
pass # Server not ready yet
|
|
127
|
+
waited += wait_interval
|
|
128
|
+
else:
|
|
129
|
+
# On other platforms, use the existing approach
|
|
130
|
+
while waited < max_wait_time:
|
|
131
|
+
time.sleep(wait_interval)
|
|
132
|
+
_, running_port = get_server_info()
|
|
133
|
+
if running_port is not None:
|
|
134
|
+
break
|
|
135
|
+
waited += wait_interval
|
|
113
136
|
|
|
114
137
|
return port, server_thread
|
|
115
138
|
|
|
@@ -176,6 +199,7 @@ def pull(
|
|
|
176
199
|
checkpoint: Optional[str] = None,
|
|
177
200
|
recipe: Optional[str] = None,
|
|
178
201
|
reasoning: bool = False,
|
|
202
|
+
vision: bool = False,
|
|
179
203
|
mmproj: str = "",
|
|
180
204
|
):
|
|
181
205
|
"""
|
|
@@ -202,6 +226,7 @@ def pull(
|
|
|
202
226
|
("checkpoint", checkpoint),
|
|
203
227
|
("recipe", recipe),
|
|
204
228
|
("reasoning", reasoning),
|
|
229
|
+
("vision", vision),
|
|
205
230
|
("mmproj", mmproj),
|
|
206
231
|
]:
|
|
207
232
|
if value:
|
|
@@ -224,6 +249,7 @@ def pull(
|
|
|
224
249
|
checkpoint=checkpoint,
|
|
225
250
|
recipe=recipe,
|
|
226
251
|
reasoning=reasoning,
|
|
252
|
+
vision=vision,
|
|
227
253
|
mmproj=mmproj,
|
|
228
254
|
# The pull command will download an upgraded model if available, even
|
|
229
255
|
# if we already have a local copy of the model
|
|
@@ -282,6 +308,10 @@ def run(
|
|
|
282
308
|
import time
|
|
283
309
|
import os
|
|
284
310
|
|
|
311
|
+
# Disable tray on macOS for run command due to threading issues
|
|
312
|
+
if platform.system() == "Darwin":
|
|
313
|
+
tray = False
|
|
314
|
+
|
|
285
315
|
# Start the server if not running
|
|
286
316
|
_, running_port = get_server_info()
|
|
287
317
|
server_previously_running = running_port is not None
|
|
@@ -367,6 +397,23 @@ def is_lemonade_server(pid):
|
|
|
367
397
|
"""
|
|
368
398
|
Check whether or not a given PID corresponds to a Lemonade server
|
|
369
399
|
"""
|
|
400
|
+
# macOS only: Self-exclusion to prevent blocking server startup
|
|
401
|
+
if platform.system() == "Darwin":
|
|
402
|
+
current_pid = os.getpid()
|
|
403
|
+
if pid == current_pid:
|
|
404
|
+
return False
|
|
405
|
+
|
|
406
|
+
# Exclude children of current process to avoid detecting status commands
|
|
407
|
+
try:
|
|
408
|
+
current_process = psutil.Process(current_pid)
|
|
409
|
+
child_pids = [
|
|
410
|
+
child.pid for child in current_process.children(recursive=True)
|
|
411
|
+
]
|
|
412
|
+
if pid in child_pids:
|
|
413
|
+
return False
|
|
414
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
415
|
+
pass
|
|
416
|
+
|
|
370
417
|
try:
|
|
371
418
|
process = psutil.Process(pid)
|
|
372
419
|
|
|
@@ -382,6 +429,22 @@ def is_lemonade_server(pid):
|
|
|
382
429
|
"lsdev",
|
|
383
430
|
]:
|
|
384
431
|
return True
|
|
432
|
+
# macOS only: Python scripts appear as "python3.x", check command line
|
|
433
|
+
elif process_name.startswith("python") and platform.system() == "Darwin":
|
|
434
|
+
try:
|
|
435
|
+
cmdline = process.cmdline()
|
|
436
|
+
if len(cmdline) >= 2:
|
|
437
|
+
script_path = cmdline[1]
|
|
438
|
+
# Check for various lemonade server command patterns (macOS only)
|
|
439
|
+
lemonade_patterns = [
|
|
440
|
+
"lemonade-server-dev",
|
|
441
|
+
"lemonade-server",
|
|
442
|
+
"lsdev", # Short alias for lemonade-server-dev
|
|
443
|
+
]
|
|
444
|
+
if any(pattern in script_path for pattern in lemonade_patterns):
|
|
445
|
+
return True
|
|
446
|
+
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
|
447
|
+
pass
|
|
385
448
|
elif "llama-server" in process_name:
|
|
386
449
|
return False
|
|
387
450
|
if not process.parent():
|
|
@@ -399,18 +462,43 @@ def get_server_info() -> Tuple[int | None, int | None]:
|
|
|
399
462
|
2. The port that Lemonade Server is running on
|
|
400
463
|
"""
|
|
401
464
|
|
|
402
|
-
#
|
|
465
|
+
# Try the global approach first (works on Windows/Linux without permissions)
|
|
403
466
|
try:
|
|
404
467
|
connections = psutil.net_connections(kind="tcp4")
|
|
405
|
-
|
|
406
468
|
for conn in connections:
|
|
407
469
|
if conn.status == "LISTEN" and conn.laddr and conn.pid is not None:
|
|
408
470
|
if is_lemonade_server(conn.pid):
|
|
409
471
|
return conn.pid, conn.laddr.port
|
|
410
|
-
|
|
411
|
-
|
|
472
|
+
except (psutil.AccessDenied, PermissionError):
|
|
473
|
+
# Global approach needs elevated permissions on macOS, fall back to per-process approach
|
|
474
|
+
pass
|
|
475
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
412
476
|
pass
|
|
413
477
|
|
|
478
|
+
# Per-process approach (macOS only - needs this due to permission requirements)
|
|
479
|
+
if platform.system() == "Darwin":
|
|
480
|
+
try:
|
|
481
|
+
for proc in psutil.process_iter(["pid", "name"]):
|
|
482
|
+
try:
|
|
483
|
+
pid = proc.info["pid"]
|
|
484
|
+
if is_lemonade_server(pid):
|
|
485
|
+
# Found a lemonade server, check its listening ports
|
|
486
|
+
connections = proc.net_connections(kind="inet")
|
|
487
|
+
for conn in connections:
|
|
488
|
+
if conn.status == "LISTEN" and conn.laddr:
|
|
489
|
+
return pid, conn.laddr.port
|
|
490
|
+
# If no listening connections found, this process is not actually serving
|
|
491
|
+
# Continue looking for other processes
|
|
492
|
+
except (
|
|
493
|
+
psutil.NoSuchProcess,
|
|
494
|
+
psutil.AccessDenied,
|
|
495
|
+
psutil.ZombieProcess,
|
|
496
|
+
):
|
|
497
|
+
# Some processes may be inaccessible, continue to next
|
|
498
|
+
continue
|
|
499
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
500
|
+
pass
|
|
501
|
+
|
|
414
502
|
return None, None
|
|
415
503
|
|
|
416
504
|
|
|
@@ -425,12 +513,13 @@ def list_models():
|
|
|
425
513
|
|
|
426
514
|
# Get all supported models and downloaded models
|
|
427
515
|
supported_models = model_manager.supported_models
|
|
516
|
+
filtered_models = model_manager.filter_models_by_backend(supported_models)
|
|
428
517
|
downloaded_models = model_manager.downloaded_models
|
|
429
518
|
|
|
430
519
|
# Filter to only show recommended models
|
|
431
520
|
recommended_models = {
|
|
432
521
|
model_name: model_info
|
|
433
|
-
for model_name, model_info in
|
|
522
|
+
for model_name, model_info in filtered_models.items()
|
|
434
523
|
if model_info.get("suggested", False)
|
|
435
524
|
}
|
|
436
525
|
|
|
@@ -507,7 +596,7 @@ def _add_server_arguments(parser):
|
|
|
507
596
|
"--llamacpp",
|
|
508
597
|
type=str,
|
|
509
598
|
help="LlamaCpp backend to use",
|
|
510
|
-
choices=["vulkan", "rocm"],
|
|
599
|
+
choices=["vulkan", "rocm", "metal"],
|
|
511
600
|
default=DEFAULT_LLAMACPP_BACKEND,
|
|
512
601
|
)
|
|
513
602
|
parser.add_argument(
|
|
@@ -520,7 +609,7 @@ def _add_server_arguments(parser):
|
|
|
520
609
|
default=DEFAULT_CTX_SIZE,
|
|
521
610
|
)
|
|
522
611
|
|
|
523
|
-
if os.name == "nt":
|
|
612
|
+
if os.name == "nt" or platform.system() == "Darwin":
|
|
524
613
|
parser.add_argument(
|
|
525
614
|
"--no-tray",
|
|
526
615
|
action="store_true",
|
|
@@ -620,7 +709,7 @@ def main():
|
|
|
620
709
|
|
|
621
710
|
args = parser.parse_args()
|
|
622
711
|
|
|
623
|
-
if os.name != "nt":
|
|
712
|
+
if os.name != "nt" and platform.system() != "Darwin":
|
|
624
713
|
args.no_tray = True
|
|
625
714
|
|
|
626
715
|
if args.version:
|
lemonade_server/model_manager.py
CHANGED
|
@@ -1,16 +1,25 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import subprocess
|
|
3
4
|
from typing import Optional
|
|
4
5
|
import shutil
|
|
5
6
|
import huggingface_hub
|
|
6
7
|
from importlib.metadata import distributions
|
|
7
8
|
from lemonade_server.pydantic_models import PullConfig
|
|
9
|
+
from lemonade_server.pydantic_models import PullConfig
|
|
8
10
|
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
9
11
|
from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
|
|
10
12
|
from lemonade.common.network import custom_snapshot_download
|
|
11
13
|
|
|
12
14
|
USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
|
|
13
15
|
|
|
16
|
+
from lemonade.tools.flm.utils import (
|
|
17
|
+
get_flm_installed_models,
|
|
18
|
+
is_flm_available,
|
|
19
|
+
install_flm,
|
|
20
|
+
download_flm_model,
|
|
21
|
+
)
|
|
22
|
+
|
|
14
23
|
|
|
15
24
|
class ModelManager:
|
|
16
25
|
|
|
@@ -81,46 +90,61 @@ class ModelManager:
|
|
|
81
90
|
"""
|
|
82
91
|
downloaded_models = {}
|
|
83
92
|
downloaded_checkpoints = self.downloaded_hf_checkpoints
|
|
93
|
+
|
|
94
|
+
# Get FLM installed model checkpoints
|
|
95
|
+
flm_installed_checkpoints = get_flm_installed_models()
|
|
96
|
+
|
|
84
97
|
for model in self.supported_models:
|
|
85
98
|
model_info = self.supported_models[model]
|
|
86
|
-
checkpoint = model_info["checkpoint"]
|
|
87
|
-
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
88
|
-
|
|
89
|
-
if base_checkpoint in downloaded_checkpoints:
|
|
90
|
-
# For GGUF models with variants, verify the specific variant files exist
|
|
91
|
-
if variant and model_info.get("recipe") == "llamacpp":
|
|
92
|
-
try:
|
|
93
|
-
from lemonade.tools.llamacpp.utils import identify_gguf_models
|
|
94
|
-
from lemonade.common.network import custom_snapshot_download
|
|
95
|
-
|
|
96
|
-
# Get the local snapshot path
|
|
97
|
-
snapshot_path = custom_snapshot_download(
|
|
98
|
-
base_checkpoint, local_files_only=True
|
|
99
|
-
)
|
|
100
99
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
)
|
|
105
|
-
all_variant_files = list(core_files.values()) + sharded_files
|
|
106
|
-
|
|
107
|
-
# Verify all required files exist locally
|
|
108
|
-
all_files_exist = True
|
|
109
|
-
for file_path in all_variant_files:
|
|
110
|
-
full_file_path = os.path.join(snapshot_path, file_path)
|
|
111
|
-
if not os.path.exists(full_file_path):
|
|
112
|
-
all_files_exist = False
|
|
113
|
-
break
|
|
114
|
-
|
|
115
|
-
if all_files_exist:
|
|
116
|
-
downloaded_models[model] = model_info
|
|
117
|
-
|
|
118
|
-
except Exception:
|
|
119
|
-
# If we can't verify the variant, don't include it
|
|
120
|
-
pass
|
|
121
|
-
else:
|
|
122
|
-
# For non-GGUF models or GGUF without variants, use the original logic
|
|
100
|
+
# Handle FLM models
|
|
101
|
+
if model_info.get("recipe") == "flm":
|
|
102
|
+
if model_info["checkpoint"] in flm_installed_checkpoints:
|
|
123
103
|
downloaded_models[model] = model_info
|
|
104
|
+
else:
|
|
105
|
+
# Handle other models
|
|
106
|
+
checkpoint = model_info["checkpoint"]
|
|
107
|
+
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
108
|
+
|
|
109
|
+
if base_checkpoint in downloaded_checkpoints:
|
|
110
|
+
# For GGUF models with variants, verify the specific variant files exist
|
|
111
|
+
if variant and model_info.get("recipe") == "llamacpp":
|
|
112
|
+
try:
|
|
113
|
+
from lemonade.tools.llamacpp.utils import (
|
|
114
|
+
identify_gguf_models,
|
|
115
|
+
)
|
|
116
|
+
from lemonade.common.network import custom_snapshot_download
|
|
117
|
+
|
|
118
|
+
# Get the local snapshot path
|
|
119
|
+
snapshot_path = custom_snapshot_download(
|
|
120
|
+
base_checkpoint, local_files_only=True
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Check if the specific variant files exist
|
|
124
|
+
core_files, sharded_files = identify_gguf_models(
|
|
125
|
+
base_checkpoint, variant, model_info.get("mmproj", "")
|
|
126
|
+
)
|
|
127
|
+
all_variant_files = (
|
|
128
|
+
list(core_files.values()) + sharded_files
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Verify all required files exist locally
|
|
132
|
+
all_files_exist = True
|
|
133
|
+
for file_path in all_variant_files:
|
|
134
|
+
full_file_path = os.path.join(snapshot_path, file_path)
|
|
135
|
+
if not os.path.exists(full_file_path):
|
|
136
|
+
all_files_exist = False
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
if all_files_exist:
|
|
140
|
+
downloaded_models[model] = model_info
|
|
141
|
+
|
|
142
|
+
except Exception:
|
|
143
|
+
# If we can't verify the variant, don't include it
|
|
144
|
+
pass
|
|
145
|
+
else:
|
|
146
|
+
# For non-GGUF models or GGUF without variants, use the original logic
|
|
147
|
+
downloaded_models[model] = model_info
|
|
124
148
|
return downloaded_models
|
|
125
149
|
|
|
126
150
|
@property
|
|
@@ -137,6 +161,7 @@ class ModelManager:
|
|
|
137
161
|
checkpoint: Optional[str] = None,
|
|
138
162
|
recipe: Optional[str] = None,
|
|
139
163
|
reasoning: bool = False,
|
|
164
|
+
vision: bool = False,
|
|
140
165
|
mmproj: str = "",
|
|
141
166
|
do_not_upgrade: bool = False,
|
|
142
167
|
):
|
|
@@ -150,6 +175,7 @@ class ModelManager:
|
|
|
150
175
|
if model not in self.supported_models:
|
|
151
176
|
# Register the model as a user model if the model name
|
|
152
177
|
# is not already registered
|
|
178
|
+
import logging
|
|
153
179
|
|
|
154
180
|
# Ensure the model name includes the `user` namespace
|
|
155
181
|
model_parsed = model.split(".", 1)
|
|
@@ -172,11 +198,17 @@ class ModelManager:
|
|
|
172
198
|
)
|
|
173
199
|
|
|
174
200
|
# JSON content that will be used for registration if the download succeeds
|
|
201
|
+
labels = ["custom"]
|
|
202
|
+
if reasoning:
|
|
203
|
+
labels.append("reasoning")
|
|
204
|
+
if vision:
|
|
205
|
+
labels.append("vision")
|
|
206
|
+
|
|
175
207
|
new_user_model = {
|
|
176
208
|
"checkpoint": checkpoint,
|
|
177
209
|
"recipe": recipe,
|
|
178
210
|
"suggested": True,
|
|
179
|
-
"labels":
|
|
211
|
+
"labels": labels,
|
|
180
212
|
}
|
|
181
213
|
|
|
182
214
|
if mmproj:
|
|
@@ -199,6 +231,7 @@ class ModelManager:
|
|
|
199
231
|
checkpoint=checkpoint,
|
|
200
232
|
recipe=recipe,
|
|
201
233
|
reasoning=reasoning,
|
|
234
|
+
vision=vision,
|
|
202
235
|
)
|
|
203
236
|
else:
|
|
204
237
|
# Model is already registered - check if trying to register with different parameters
|
|
@@ -207,18 +240,21 @@ class ModelManager:
|
|
|
207
240
|
existing_recipe = existing_model.get("recipe")
|
|
208
241
|
existing_reasoning = "reasoning" in existing_model.get("labels", [])
|
|
209
242
|
existing_mmproj = existing_model.get("mmproj", "")
|
|
243
|
+
existing_vision = "vision" in existing_model.get("labels", [])
|
|
210
244
|
|
|
211
245
|
# Compare parameters
|
|
212
246
|
checkpoint_differs = checkpoint and checkpoint != existing_checkpoint
|
|
213
247
|
recipe_differs = recipe and recipe != existing_recipe
|
|
214
248
|
reasoning_differs = reasoning and reasoning != existing_reasoning
|
|
215
249
|
mmproj_differs = mmproj and mmproj != existing_mmproj
|
|
250
|
+
vision_differs = vision and vision != existing_vision
|
|
216
251
|
|
|
217
252
|
if (
|
|
218
253
|
checkpoint_differs
|
|
219
254
|
or recipe_differs
|
|
220
255
|
or reasoning_differs
|
|
221
256
|
or mmproj_differs
|
|
257
|
+
or vision_differs
|
|
222
258
|
):
|
|
223
259
|
conflicts = []
|
|
224
260
|
if checkpoint_differs:
|
|
@@ -237,6 +273,10 @@ class ModelManager:
|
|
|
237
273
|
conflicts.append(
|
|
238
274
|
f"mmproj (existing: '{existing_mmproj}', new: '{mmproj}')"
|
|
239
275
|
)
|
|
276
|
+
if vision_differs:
|
|
277
|
+
conflicts.append(
|
|
278
|
+
f"vision (existing: {existing_vision}, new: {vision})"
|
|
279
|
+
)
|
|
240
280
|
|
|
241
281
|
conflict_details = ", ".join(conflicts)
|
|
242
282
|
|
|
@@ -260,7 +300,34 @@ class ModelManager:
|
|
|
260
300
|
gguf_model_config = PullConfig(**self.supported_models[model])
|
|
261
301
|
print(f"Downloading {model} ({checkpoint_to_download})")
|
|
262
302
|
|
|
263
|
-
|
|
303
|
+
# Handle FLM models
|
|
304
|
+
current_recipe = (
|
|
305
|
+
recipe
|
|
306
|
+
if new_registration_model_config
|
|
307
|
+
else self.supported_models[model].get("recipe")
|
|
308
|
+
)
|
|
309
|
+
if current_recipe == "flm":
|
|
310
|
+
# Check if FLM is available, and install it if not
|
|
311
|
+
if not is_flm_available():
|
|
312
|
+
print(
|
|
313
|
+
"FLM is not installed or not at the minimum required version. Installing FLM..."
|
|
314
|
+
)
|
|
315
|
+
install_flm()
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
download_flm_model(checkpoint_to_download, None, do_not_upgrade)
|
|
319
|
+
print(f"Successfully downloaded FLM model: {model}")
|
|
320
|
+
except subprocess.CalledProcessError as e:
|
|
321
|
+
raise RuntimeError(
|
|
322
|
+
f"Failed to download FLM model {model}: {e}"
|
|
323
|
+
) from e
|
|
324
|
+
except FileNotFoundError as e:
|
|
325
|
+
# This shouldn't happen after install_flm(), but just in case
|
|
326
|
+
raise RuntimeError(
|
|
327
|
+
f"FLM command not found even after installation attempt. "
|
|
328
|
+
f"Please manually install FLM using 'lemonade-install --flm'."
|
|
329
|
+
) from e
|
|
330
|
+
elif "gguf" in checkpoint_to_download.lower():
|
|
264
331
|
download_gguf(
|
|
265
332
|
gguf_model_config.checkpoint,
|
|
266
333
|
gguf_model_config.mmproj,
|
|
@@ -292,21 +359,84 @@ class ModelManager:
|
|
|
292
359
|
def filter_models_by_backend(self, models: dict) -> dict:
|
|
293
360
|
"""
|
|
294
361
|
Returns a filtered dict of models that are enabled by the
|
|
295
|
-
current environment.
|
|
362
|
+
current environment and platform.
|
|
296
363
|
"""
|
|
364
|
+
import platform
|
|
365
|
+
|
|
297
366
|
installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
|
|
298
367
|
|
|
299
368
|
hybrid_installed = (
|
|
300
369
|
"onnxruntime-vitisai" in installed_packages
|
|
301
370
|
and "onnxruntime-genai-directml-ryzenai" in installed_packages
|
|
302
371
|
)
|
|
372
|
+
|
|
373
|
+
from lemonade_install.install import (
|
|
374
|
+
check_ryzen_ai_processor,
|
|
375
|
+
UnsupportedPlatformError,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
check_ryzen_ai_processor()
|
|
380
|
+
ryzenai_npu_available = True
|
|
381
|
+
except UnsupportedPlatformError:
|
|
382
|
+
ryzenai_npu_available = False
|
|
383
|
+
|
|
384
|
+
# On macOS, only llamacpp (GGUF) models are supported, and only on Apple Silicon with macOS 14+
|
|
385
|
+
is_macos = platform.system() == "Darwin"
|
|
386
|
+
if is_macos:
|
|
387
|
+
machine = platform.machine().lower()
|
|
388
|
+
if machine == "x86_64":
|
|
389
|
+
# Intel Macs are not supported - return empty model list with error info
|
|
390
|
+
return {
|
|
391
|
+
"_unsupported_platform_error": {
|
|
392
|
+
"error": "Intel Mac Not Supported",
|
|
393
|
+
"message": (
|
|
394
|
+
"Lemonade Server requires Apple Silicon processors on macOS. "
|
|
395
|
+
"Intel Macs are not currently supported. "
|
|
396
|
+
"Please use a Mac with Apple Silicon or try Lemonade on Windows/Linux."
|
|
397
|
+
),
|
|
398
|
+
"platform": f"macOS {machine}",
|
|
399
|
+
"supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
# Check macOS version requirement
|
|
404
|
+
mac_version = platform.mac_ver()[0]
|
|
405
|
+
if mac_version:
|
|
406
|
+
major_version = int(mac_version.split(".")[0])
|
|
407
|
+
if major_version < 14:
|
|
408
|
+
return {
|
|
409
|
+
"_unsupported_platform_error": {
|
|
410
|
+
"error": "macOS Version Not Supported",
|
|
411
|
+
"message": (
|
|
412
|
+
f"Lemonade Server requires macOS 14 or later. "
|
|
413
|
+
f"Your system is running macOS {mac_version}. "
|
|
414
|
+
f"Please update your macOS version to use Lemonade Server."
|
|
415
|
+
),
|
|
416
|
+
"platform": f"macOS {mac_version} {machine}",
|
|
417
|
+
"supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
303
421
|
filtered = {}
|
|
304
422
|
for model, value in models.items():
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
423
|
+
recipe = value.get("recipe")
|
|
424
|
+
|
|
425
|
+
# Filter OGA hybrid models based on package availability
|
|
426
|
+
if recipe == "oga-hybrid":
|
|
427
|
+
if not hybrid_installed:
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
if recipe == "flm":
|
|
431
|
+
if not ryzenai_npu_available:
|
|
432
|
+
continue
|
|
433
|
+
|
|
434
|
+
# On macOS, only show llamacpp models (GGUF format)
|
|
435
|
+
if is_macos and recipe != "llamacpp":
|
|
436
|
+
continue
|
|
437
|
+
|
|
438
|
+
filtered[model] = value
|
|
439
|
+
|
|
310
440
|
return filtered
|
|
311
441
|
|
|
312
442
|
def delete_model(self, model_name: str):
|
|
@@ -320,9 +450,20 @@ class ModelManager:
|
|
|
320
450
|
f"{list(self.supported_models.keys())}"
|
|
321
451
|
)
|
|
322
452
|
|
|
323
|
-
|
|
453
|
+
model_info = self.supported_models[model_name]
|
|
454
|
+
checkpoint = model_info["checkpoint"]
|
|
324
455
|
print(f"Deleting {model_name} ({checkpoint})")
|
|
325
456
|
|
|
457
|
+
# Handle FLM models
|
|
458
|
+
if model_info.get("recipe") == "flm":
|
|
459
|
+
try:
|
|
460
|
+
command = ["flm", "remove", checkpoint]
|
|
461
|
+
subprocess.run(command, check=True, encoding="utf-8", errors="replace")
|
|
462
|
+
print(f"Successfully deleted FLM model: {model_name}")
|
|
463
|
+
return
|
|
464
|
+
except subprocess.CalledProcessError as e:
|
|
465
|
+
raise ValueError(f"Failed to delete FLM model {model_name}: {e}") from e
|
|
466
|
+
|
|
326
467
|
# Parse checkpoint to get base and variant
|
|
327
468
|
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
328
469
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import platform
|
|
2
3
|
from typing import Optional, Union, List
|
|
3
4
|
|
|
4
5
|
from pydantic import BaseModel
|
|
@@ -6,7 +7,28 @@ from pydantic import BaseModel
|
|
|
6
7
|
DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
|
|
7
8
|
DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
|
|
8
9
|
DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
|
|
9
|
-
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Platform-aware default backend selection
|
|
13
|
+
def _get_default_llamacpp_backend():
|
|
14
|
+
"""
|
|
15
|
+
Get the default llamacpp backend based on the current platform.
|
|
16
|
+
"""
|
|
17
|
+
# Allow environment variable override
|
|
18
|
+
env_backend = os.getenv("LEMONADE_LLAMACPP")
|
|
19
|
+
if env_backend:
|
|
20
|
+
return env_backend
|
|
21
|
+
|
|
22
|
+
# Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
|
|
23
|
+
if platform.system() == "Darwin" and platform.machine().lower() in [
|
|
24
|
+
"arm64",
|
|
25
|
+
"aarch64",
|
|
26
|
+
]:
|
|
27
|
+
return "metal"
|
|
28
|
+
return "vulkan"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
|
|
10
32
|
DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
|
|
11
33
|
|
|
12
34
|
|
|
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
|
|
|
23
45
|
recipe: Optional[str] = None
|
|
24
46
|
# Indicates whether the model is a reasoning model, like DeepSeek
|
|
25
47
|
reasoning: Optional[bool] = False
|
|
48
|
+
# Indicates whether the model is a vision model with image processing capabilities
|
|
49
|
+
vision: Optional[bool] = False
|
|
26
50
|
# Indicates which Multimodal Projector (mmproj) file to use
|
|
27
51
|
mmproj: Optional[str] = None
|
|
28
52
|
|