lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/system_info.py +570 -1
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +255 -0
- lemonade/tools/llamacpp/utils.py +62 -13
- lemonade/tools/server/flm.py +137 -0
- lemonade/tools/server/llamacpp.py +23 -5
- lemonade/tools/server/serve.py +292 -135
- lemonade/tools/server/static/js/chat.js +165 -82
- lemonade/tools/server/static/js/models.js +87 -54
- lemonade/tools/server/static/js/shared.js +5 -3
- lemonade/tools/server/static/logs.html +47 -0
- lemonade/tools/server/static/styles.css +159 -8
- lemonade/tools/server/static/webapp.html +28 -10
- lemonade/tools/server/tray.py +158 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +91 -25
- lemonade/version.py +1 -1
- lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +33 -28
- lemonade_server/cli.py +105 -14
- lemonade_server/model_manager.py +186 -45
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +162 -62
- lemonade_server/settings.py +39 -39
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
lemonade_server/model_manager.py
CHANGED
|
@@ -1,16 +1,25 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import subprocess
|
|
3
4
|
from typing import Optional
|
|
4
5
|
import shutil
|
|
5
6
|
import huggingface_hub
|
|
6
7
|
from importlib.metadata import distributions
|
|
7
8
|
from lemonade_server.pydantic_models import PullConfig
|
|
9
|
+
from lemonade_server.pydantic_models import PullConfig
|
|
8
10
|
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
9
11
|
from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
|
|
10
12
|
from lemonade.common.network import custom_snapshot_download
|
|
11
13
|
|
|
12
14
|
USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
|
|
13
15
|
|
|
16
|
+
from lemonade.tools.flm.utils import (
|
|
17
|
+
get_flm_installed_models,
|
|
18
|
+
is_flm_available,
|
|
19
|
+
install_flm,
|
|
20
|
+
download_flm_model,
|
|
21
|
+
)
|
|
22
|
+
|
|
14
23
|
|
|
15
24
|
class ModelManager:
|
|
16
25
|
|
|
@@ -81,46 +90,61 @@ class ModelManager:
|
|
|
81
90
|
"""
|
|
82
91
|
downloaded_models = {}
|
|
83
92
|
downloaded_checkpoints = self.downloaded_hf_checkpoints
|
|
93
|
+
|
|
94
|
+
# Get FLM installed model checkpoints
|
|
95
|
+
flm_installed_checkpoints = get_flm_installed_models()
|
|
96
|
+
|
|
84
97
|
for model in self.supported_models:
|
|
85
98
|
model_info = self.supported_models[model]
|
|
86
|
-
checkpoint = model_info["checkpoint"]
|
|
87
|
-
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
88
|
-
|
|
89
|
-
if base_checkpoint in downloaded_checkpoints:
|
|
90
|
-
# For GGUF models with variants, verify the specific variant files exist
|
|
91
|
-
if variant and model_info.get("recipe") == "llamacpp":
|
|
92
|
-
try:
|
|
93
|
-
from lemonade.tools.llamacpp.utils import identify_gguf_models
|
|
94
|
-
from lemonade.common.network import custom_snapshot_download
|
|
95
|
-
|
|
96
|
-
# Get the local snapshot path
|
|
97
|
-
snapshot_path = custom_snapshot_download(
|
|
98
|
-
base_checkpoint, local_files_only=True
|
|
99
|
-
)
|
|
100
99
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
)
|
|
105
|
-
all_variant_files = list(core_files.values()) + sharded_files
|
|
106
|
-
|
|
107
|
-
# Verify all required files exist locally
|
|
108
|
-
all_files_exist = True
|
|
109
|
-
for file_path in all_variant_files:
|
|
110
|
-
full_file_path = os.path.join(snapshot_path, file_path)
|
|
111
|
-
if not os.path.exists(full_file_path):
|
|
112
|
-
all_files_exist = False
|
|
113
|
-
break
|
|
114
|
-
|
|
115
|
-
if all_files_exist:
|
|
116
|
-
downloaded_models[model] = model_info
|
|
117
|
-
|
|
118
|
-
except Exception:
|
|
119
|
-
# If we can't verify the variant, don't include it
|
|
120
|
-
pass
|
|
121
|
-
else:
|
|
122
|
-
# For non-GGUF models or GGUF without variants, use the original logic
|
|
100
|
+
# Handle FLM models
|
|
101
|
+
if model_info.get("recipe") == "flm":
|
|
102
|
+
if model_info["checkpoint"] in flm_installed_checkpoints:
|
|
123
103
|
downloaded_models[model] = model_info
|
|
104
|
+
else:
|
|
105
|
+
# Handle other models
|
|
106
|
+
checkpoint = model_info["checkpoint"]
|
|
107
|
+
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
108
|
+
|
|
109
|
+
if base_checkpoint in downloaded_checkpoints:
|
|
110
|
+
# For GGUF models with variants, verify the specific variant files exist
|
|
111
|
+
if variant and model_info.get("recipe") == "llamacpp":
|
|
112
|
+
try:
|
|
113
|
+
from lemonade.tools.llamacpp.utils import (
|
|
114
|
+
identify_gguf_models,
|
|
115
|
+
)
|
|
116
|
+
from lemonade.common.network import custom_snapshot_download
|
|
117
|
+
|
|
118
|
+
# Get the local snapshot path
|
|
119
|
+
snapshot_path = custom_snapshot_download(
|
|
120
|
+
base_checkpoint, local_files_only=True
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Check if the specific variant files exist
|
|
124
|
+
core_files, sharded_files = identify_gguf_models(
|
|
125
|
+
base_checkpoint, variant, model_info.get("mmproj", "")
|
|
126
|
+
)
|
|
127
|
+
all_variant_files = (
|
|
128
|
+
list(core_files.values()) + sharded_files
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Verify all required files exist locally
|
|
132
|
+
all_files_exist = True
|
|
133
|
+
for file_path in all_variant_files:
|
|
134
|
+
full_file_path = os.path.join(snapshot_path, file_path)
|
|
135
|
+
if not os.path.exists(full_file_path):
|
|
136
|
+
all_files_exist = False
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
if all_files_exist:
|
|
140
|
+
downloaded_models[model] = model_info
|
|
141
|
+
|
|
142
|
+
except Exception:
|
|
143
|
+
# If we can't verify the variant, don't include it
|
|
144
|
+
pass
|
|
145
|
+
else:
|
|
146
|
+
# For non-GGUF models or GGUF without variants, use the original logic
|
|
147
|
+
downloaded_models[model] = model_info
|
|
124
148
|
return downloaded_models
|
|
125
149
|
|
|
126
150
|
@property
|
|
@@ -137,6 +161,7 @@ class ModelManager:
|
|
|
137
161
|
checkpoint: Optional[str] = None,
|
|
138
162
|
recipe: Optional[str] = None,
|
|
139
163
|
reasoning: bool = False,
|
|
164
|
+
vision: bool = False,
|
|
140
165
|
mmproj: str = "",
|
|
141
166
|
do_not_upgrade: bool = False,
|
|
142
167
|
):
|
|
@@ -150,6 +175,7 @@ class ModelManager:
|
|
|
150
175
|
if model not in self.supported_models:
|
|
151
176
|
# Register the model as a user model if the model name
|
|
152
177
|
# is not already registered
|
|
178
|
+
import logging
|
|
153
179
|
|
|
154
180
|
# Ensure the model name includes the `user` namespace
|
|
155
181
|
model_parsed = model.split(".", 1)
|
|
@@ -172,11 +198,17 @@ class ModelManager:
|
|
|
172
198
|
)
|
|
173
199
|
|
|
174
200
|
# JSON content that will be used for registration if the download succeeds
|
|
201
|
+
labels = ["custom"]
|
|
202
|
+
if reasoning:
|
|
203
|
+
labels.append("reasoning")
|
|
204
|
+
if vision:
|
|
205
|
+
labels.append("vision")
|
|
206
|
+
|
|
175
207
|
new_user_model = {
|
|
176
208
|
"checkpoint": checkpoint,
|
|
177
209
|
"recipe": recipe,
|
|
178
210
|
"suggested": True,
|
|
179
|
-
"labels":
|
|
211
|
+
"labels": labels,
|
|
180
212
|
}
|
|
181
213
|
|
|
182
214
|
if mmproj:
|
|
@@ -199,6 +231,7 @@ class ModelManager:
|
|
|
199
231
|
checkpoint=checkpoint,
|
|
200
232
|
recipe=recipe,
|
|
201
233
|
reasoning=reasoning,
|
|
234
|
+
vision=vision,
|
|
202
235
|
)
|
|
203
236
|
else:
|
|
204
237
|
# Model is already registered - check if trying to register with different parameters
|
|
@@ -207,18 +240,21 @@ class ModelManager:
|
|
|
207
240
|
existing_recipe = existing_model.get("recipe")
|
|
208
241
|
existing_reasoning = "reasoning" in existing_model.get("labels", [])
|
|
209
242
|
existing_mmproj = existing_model.get("mmproj", "")
|
|
243
|
+
existing_vision = "vision" in existing_model.get("labels", [])
|
|
210
244
|
|
|
211
245
|
# Compare parameters
|
|
212
246
|
checkpoint_differs = checkpoint and checkpoint != existing_checkpoint
|
|
213
247
|
recipe_differs = recipe and recipe != existing_recipe
|
|
214
248
|
reasoning_differs = reasoning and reasoning != existing_reasoning
|
|
215
249
|
mmproj_differs = mmproj and mmproj != existing_mmproj
|
|
250
|
+
vision_differs = vision and vision != existing_vision
|
|
216
251
|
|
|
217
252
|
if (
|
|
218
253
|
checkpoint_differs
|
|
219
254
|
or recipe_differs
|
|
220
255
|
or reasoning_differs
|
|
221
256
|
or mmproj_differs
|
|
257
|
+
or vision_differs
|
|
222
258
|
):
|
|
223
259
|
conflicts = []
|
|
224
260
|
if checkpoint_differs:
|
|
@@ -237,6 +273,10 @@ class ModelManager:
|
|
|
237
273
|
conflicts.append(
|
|
238
274
|
f"mmproj (existing: '{existing_mmproj}', new: '{mmproj}')"
|
|
239
275
|
)
|
|
276
|
+
if vision_differs:
|
|
277
|
+
conflicts.append(
|
|
278
|
+
f"vision (existing: {existing_vision}, new: {vision})"
|
|
279
|
+
)
|
|
240
280
|
|
|
241
281
|
conflict_details = ", ".join(conflicts)
|
|
242
282
|
|
|
@@ -260,7 +300,34 @@ class ModelManager:
|
|
|
260
300
|
gguf_model_config = PullConfig(**self.supported_models[model])
|
|
261
301
|
print(f"Downloading {model} ({checkpoint_to_download})")
|
|
262
302
|
|
|
263
|
-
|
|
303
|
+
# Handle FLM models
|
|
304
|
+
current_recipe = (
|
|
305
|
+
recipe
|
|
306
|
+
if new_registration_model_config
|
|
307
|
+
else self.supported_models[model].get("recipe")
|
|
308
|
+
)
|
|
309
|
+
if current_recipe == "flm":
|
|
310
|
+
# Check if FLM is available, and install it if not
|
|
311
|
+
if not is_flm_available():
|
|
312
|
+
print(
|
|
313
|
+
"FLM is not installed or not at the minimum required version. Installing FLM..."
|
|
314
|
+
)
|
|
315
|
+
install_flm()
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
download_flm_model(checkpoint_to_download, None, do_not_upgrade)
|
|
319
|
+
print(f"Successfully downloaded FLM model: {model}")
|
|
320
|
+
except subprocess.CalledProcessError as e:
|
|
321
|
+
raise RuntimeError(
|
|
322
|
+
f"Failed to download FLM model {model}: {e}"
|
|
323
|
+
) from e
|
|
324
|
+
except FileNotFoundError as e:
|
|
325
|
+
# This shouldn't happen after install_flm(), but just in case
|
|
326
|
+
raise RuntimeError(
|
|
327
|
+
f"FLM command not found even after installation attempt. "
|
|
328
|
+
f"Please manually install FLM using 'lemonade-install --flm'."
|
|
329
|
+
) from e
|
|
330
|
+
elif "gguf" in checkpoint_to_download.lower():
|
|
264
331
|
download_gguf(
|
|
265
332
|
gguf_model_config.checkpoint,
|
|
266
333
|
gguf_model_config.mmproj,
|
|
@@ -292,21 +359,84 @@ class ModelManager:
|
|
|
292
359
|
def filter_models_by_backend(self, models: dict) -> dict:
|
|
293
360
|
"""
|
|
294
361
|
Returns a filtered dict of models that are enabled by the
|
|
295
|
-
current environment.
|
|
362
|
+
current environment and platform.
|
|
296
363
|
"""
|
|
364
|
+
import platform
|
|
365
|
+
|
|
297
366
|
installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
|
|
298
367
|
|
|
299
368
|
hybrid_installed = (
|
|
300
369
|
"onnxruntime-vitisai" in installed_packages
|
|
301
370
|
and "onnxruntime-genai-directml-ryzenai" in installed_packages
|
|
302
371
|
)
|
|
372
|
+
|
|
373
|
+
from lemonade_install.install import (
|
|
374
|
+
check_ryzen_ai_processor,
|
|
375
|
+
UnsupportedPlatformError,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
check_ryzen_ai_processor()
|
|
380
|
+
ryzenai_npu_available = True
|
|
381
|
+
except UnsupportedPlatformError:
|
|
382
|
+
ryzenai_npu_available = False
|
|
383
|
+
|
|
384
|
+
# On macOS, only llamacpp (GGUF) models are supported, and only on Apple Silicon with macOS 14+
|
|
385
|
+
is_macos = platform.system() == "Darwin"
|
|
386
|
+
if is_macos:
|
|
387
|
+
machine = platform.machine().lower()
|
|
388
|
+
if machine == "x86_64":
|
|
389
|
+
# Intel Macs are not supported - return empty model list with error info
|
|
390
|
+
return {
|
|
391
|
+
"_unsupported_platform_error": {
|
|
392
|
+
"error": "Intel Mac Not Supported",
|
|
393
|
+
"message": (
|
|
394
|
+
"Lemonade Server requires Apple Silicon processors on macOS. "
|
|
395
|
+
"Intel Macs are not currently supported. "
|
|
396
|
+
"Please use a Mac with Apple Silicon or try Lemonade on Windows/Linux."
|
|
397
|
+
),
|
|
398
|
+
"platform": f"macOS {machine}",
|
|
399
|
+
"supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
# Check macOS version requirement
|
|
404
|
+
mac_version = platform.mac_ver()[0]
|
|
405
|
+
if mac_version:
|
|
406
|
+
major_version = int(mac_version.split(".")[0])
|
|
407
|
+
if major_version < 14:
|
|
408
|
+
return {
|
|
409
|
+
"_unsupported_platform_error": {
|
|
410
|
+
"error": "macOS Version Not Supported",
|
|
411
|
+
"message": (
|
|
412
|
+
f"Lemonade Server requires macOS 14 or later. "
|
|
413
|
+
f"Your system is running macOS {mac_version}. "
|
|
414
|
+
f"Please update your macOS version to use Lemonade Server."
|
|
415
|
+
),
|
|
416
|
+
"platform": f"macOS {mac_version} {machine}",
|
|
417
|
+
"supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
303
421
|
filtered = {}
|
|
304
422
|
for model, value in models.items():
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
423
|
+
recipe = value.get("recipe")
|
|
424
|
+
|
|
425
|
+
# Filter OGA hybrid models based on package availability
|
|
426
|
+
if recipe == "oga-hybrid":
|
|
427
|
+
if not hybrid_installed:
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
if recipe == "flm":
|
|
431
|
+
if not ryzenai_npu_available:
|
|
432
|
+
continue
|
|
433
|
+
|
|
434
|
+
# On macOS, only show llamacpp models (GGUF format)
|
|
435
|
+
if is_macos and recipe != "llamacpp":
|
|
436
|
+
continue
|
|
437
|
+
|
|
438
|
+
filtered[model] = value
|
|
439
|
+
|
|
310
440
|
return filtered
|
|
311
441
|
|
|
312
442
|
def delete_model(self, model_name: str):
|
|
@@ -320,9 +450,20 @@ class ModelManager:
|
|
|
320
450
|
f"{list(self.supported_models.keys())}"
|
|
321
451
|
)
|
|
322
452
|
|
|
323
|
-
|
|
453
|
+
model_info = self.supported_models[model_name]
|
|
454
|
+
checkpoint = model_info["checkpoint"]
|
|
324
455
|
print(f"Deleting {model_name} ({checkpoint})")
|
|
325
456
|
|
|
457
|
+
# Handle FLM models
|
|
458
|
+
if model_info.get("recipe") == "flm":
|
|
459
|
+
try:
|
|
460
|
+
command = ["flm", "remove", checkpoint]
|
|
461
|
+
subprocess.run(command, check=True, encoding="utf-8", errors="replace")
|
|
462
|
+
print(f"Successfully deleted FLM model: {model_name}")
|
|
463
|
+
return
|
|
464
|
+
except subprocess.CalledProcessError as e:
|
|
465
|
+
raise ValueError(f"Failed to delete FLM model {model_name}: {e}") from e
|
|
466
|
+
|
|
326
467
|
# Parse checkpoint to get base and variant
|
|
327
468
|
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
328
469
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import platform
|
|
2
3
|
from typing import Optional, Union, List
|
|
3
4
|
|
|
4
5
|
from pydantic import BaseModel
|
|
@@ -6,7 +7,28 @@ from pydantic import BaseModel
|
|
|
6
7
|
DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
|
|
7
8
|
DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
|
|
8
9
|
DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
|
|
9
|
-
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Platform-aware default backend selection
|
|
13
|
+
def _get_default_llamacpp_backend():
|
|
14
|
+
"""
|
|
15
|
+
Get the default llamacpp backend based on the current platform.
|
|
16
|
+
"""
|
|
17
|
+
# Allow environment variable override
|
|
18
|
+
env_backend = os.getenv("LEMONADE_LLAMACPP")
|
|
19
|
+
if env_backend:
|
|
20
|
+
return env_backend
|
|
21
|
+
|
|
22
|
+
# Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
|
|
23
|
+
if platform.system() == "Darwin" and platform.machine().lower() in [
|
|
24
|
+
"arm64",
|
|
25
|
+
"aarch64",
|
|
26
|
+
]:
|
|
27
|
+
return "metal"
|
|
28
|
+
return "vulkan"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
|
|
10
32
|
DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
|
|
11
33
|
|
|
12
34
|
|
|
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
|
|
|
23
45
|
recipe: Optional[str] = None
|
|
24
46
|
# Indicates whether the model is a reasoning model, like DeepSeek
|
|
25
47
|
reasoning: Optional[bool] = False
|
|
48
|
+
# Indicates whether the model is a vision model with image processing capabilities
|
|
49
|
+
vision: Optional[bool] = False
|
|
26
50
|
# Indicates which Multimodal Projector (mmproj) file to use
|
|
27
51
|
mmproj: Optional[str] = None
|
|
28
52
|
|