lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lemonade/__init__.py +5 -0
  2. lemonade/api.py +180 -0
  3. lemonade/cache.py +92 -0
  4. lemonade/cli.py +173 -0
  5. lemonade/common/__init__.py +0 -0
  6. lemonade/common/build.py +176 -0
  7. lemonade/common/cli_helpers.py +139 -0
  8. lemonade/common/exceptions.py +98 -0
  9. lemonade/common/filesystem.py +368 -0
  10. lemonade/common/inference_engines.py +408 -0
  11. lemonade/common/network.py +93 -0
  12. lemonade/common/printing.py +110 -0
  13. lemonade/common/status.py +471 -0
  14. lemonade/common/system_info.py +1411 -0
  15. lemonade/common/test_helpers.py +28 -0
  16. lemonade/profilers/__init__.py +1 -0
  17. lemonade/profilers/agt_power.py +437 -0
  18. lemonade/profilers/hwinfo_power.py +429 -0
  19. lemonade/profilers/memory_tracker.py +259 -0
  20. lemonade/profilers/profiler.py +58 -0
  21. lemonade/sequence.py +363 -0
  22. lemonade/state.py +159 -0
  23. lemonade/tools/__init__.py +1 -0
  24. lemonade/tools/accuracy.py +432 -0
  25. lemonade/tools/adapter.py +114 -0
  26. lemonade/tools/bench.py +302 -0
  27. lemonade/tools/flm/__init__.py +1 -0
  28. lemonade/tools/flm/utils.py +305 -0
  29. lemonade/tools/huggingface/bench.py +187 -0
  30. lemonade/tools/huggingface/load.py +235 -0
  31. lemonade/tools/huggingface/utils.py +359 -0
  32. lemonade/tools/humaneval.py +264 -0
  33. lemonade/tools/llamacpp/bench.py +255 -0
  34. lemonade/tools/llamacpp/load.py +222 -0
  35. lemonade/tools/llamacpp/utils.py +1260 -0
  36. lemonade/tools/management_tools.py +319 -0
  37. lemonade/tools/mmlu.py +319 -0
  38. lemonade/tools/oga/__init__.py +0 -0
  39. lemonade/tools/oga/bench.py +120 -0
  40. lemonade/tools/oga/load.py +804 -0
  41. lemonade/tools/oga/migration.py +403 -0
  42. lemonade/tools/oga/utils.py +462 -0
  43. lemonade/tools/perplexity.py +147 -0
  44. lemonade/tools/prompt.py +263 -0
  45. lemonade/tools/report/__init__.py +0 -0
  46. lemonade/tools/report/llm_report.py +203 -0
  47. lemonade/tools/report/table.py +899 -0
  48. lemonade/tools/server/__init__.py +0 -0
  49. lemonade/tools/server/flm.py +133 -0
  50. lemonade/tools/server/llamacpp.py +320 -0
  51. lemonade/tools/server/serve.py +2123 -0
  52. lemonade/tools/server/static/favicon.ico +0 -0
  53. lemonade/tools/server/static/index.html +279 -0
  54. lemonade/tools/server/static/js/chat.js +1059 -0
  55. lemonade/tools/server/static/js/model-settings.js +183 -0
  56. lemonade/tools/server/static/js/models.js +1395 -0
  57. lemonade/tools/server/static/js/shared.js +556 -0
  58. lemonade/tools/server/static/logs.html +191 -0
  59. lemonade/tools/server/static/styles.css +2654 -0
  60. lemonade/tools/server/static/webapp.html +321 -0
  61. lemonade/tools/server/tool_calls.py +153 -0
  62. lemonade/tools/server/tray.py +664 -0
  63. lemonade/tools/server/utils/macos_tray.py +226 -0
  64. lemonade/tools/server/utils/port.py +77 -0
  65. lemonade/tools/server/utils/thread.py +85 -0
  66. lemonade/tools/server/utils/windows_tray.py +408 -0
  67. lemonade/tools/server/webapp.py +34 -0
  68. lemonade/tools/server/wrapped_server.py +559 -0
  69. lemonade/tools/tool.py +374 -0
  70. lemonade/version.py +1 -0
  71. lemonade_install/__init__.py +1 -0
  72. lemonade_install/install.py +239 -0
  73. lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
  74. lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
  75. lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
  76. lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
  77. lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
  78. lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
  79. lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
  80. lemonade_server/cli.py +805 -0
  81. lemonade_server/model_manager.py +758 -0
  82. lemonade_server/pydantic_models.py +159 -0
  83. lemonade_server/server_models.json +643 -0
  84. lemonade_server/settings.py +39 -0
@@ -0,0 +1,758 @@
1
+ import json
2
+ import os
3
+ import subprocess
4
+ from typing import Optional
5
+ import shutil
6
+ import huggingface_hub
7
+ from importlib.metadata import distributions
8
+ from lemonade_server.pydantic_models import PullConfig
9
+ from lemonade_server.pydantic_models import PullConfig
10
+ from lemonade.cache import DEFAULT_CACHE_DIR
11
+ from lemonade.tools.llamacpp.utils import (
12
+ parse_checkpoint,
13
+ download_gguf,
14
+ resolve_local_gguf_model,
15
+ )
16
+ from lemonade.common.network import custom_snapshot_download
17
+ from lemonade.tools.oga.migration import (
18
+ detect_incompatible_ryzenai_models,
19
+ delete_incompatible_models,
20
+ )
21
+
22
+ USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
23
+
24
+ from lemonade.tools.flm.utils import (
25
+ get_flm_installed_models,
26
+ is_flm_available,
27
+ install_flm,
28
+ download_flm_model,
29
+ )
30
+
31
+
32
+ class ModelManager:
33
+
34
+ @property
35
+ def supported_models(self) -> dict:
36
+ """
37
+ Returns a dictionary of supported models.
38
+ Note: Models must be downloaded before they are locally available.
39
+ """
40
+ # Load the models dictionary from the built-in JSON file
41
+ server_models_file = os.path.join(
42
+ os.path.dirname(__file__), "server_models.json"
43
+ )
44
+ with open(server_models_file, "r", encoding="utf-8") as file:
45
+ models: dict = json.load(file)
46
+
47
+ # Load the user's JSON file, if it exists, and merge into the models dict
48
+ if os.path.exists(USER_MODELS_FILE):
49
+ with open(USER_MODELS_FILE, "r", encoding="utf-8") as file:
50
+ user_models: dict = json.load(file)
51
+ # Prepend the user namespace to the model names
52
+ user_models = {
53
+ f"user.{model_name}": model_info
54
+ for model_name, model_info in user_models.items()
55
+ }
56
+
57
+ # Backwards compatibility for user models that were created before version 8.0.4
58
+ # "reasoning" was a boolean, but as of 8.0.4 it became a label
59
+ for _, model_info in user_models.items():
60
+ if "reasoning" in model_info:
61
+ model_info["labels"] = (
62
+ ["reasoning"]
63
+ if not model_info.get("labels", None)
64
+ else model_info["labels"] + ["reasoning"]
65
+ )
66
+ del model_info["reasoning"]
67
+
68
+ models.update(user_models)
69
+
70
+ # Add the model name as a key in each entry, to make it easier
71
+ # to access later
72
+ # Also convert labels to boolean fields for LoadConfig compatibility
73
+ for key, value in models.items():
74
+ value["model_name"] = key
75
+
76
+ # Convert labels to boolean fields for backwards compatibility with LoadConfig
77
+ labels = value.get("labels", [])
78
+ if "reasoning" in labels and "reasoning" not in value:
79
+ value["reasoning"] = True
80
+ if "vision" in labels and "vision" not in value:
81
+ value["vision"] = True
82
+
83
+ return models
84
+
85
+ @property
86
+ def downloaded_hf_checkpoints(self) -> list[str]:
87
+ """
88
+ Returns a list of Hugging Face checkpoints that have been downloaded.
89
+ """
90
+ downloaded_hf_checkpoints = []
91
+ try:
92
+ hf_cache_info = huggingface_hub.scan_cache_dir()
93
+ downloaded_hf_checkpoints = [entry.repo_id for entry in hf_cache_info.repos]
94
+ except huggingface_hub.CacheNotFound:
95
+ pass
96
+ except Exception as e: # pylint: disable=broad-exception-caught
97
+ print(f"Error scanning Hugging Face cache: {e}")
98
+ return downloaded_hf_checkpoints
99
+
100
+ @property
101
+ def downloaded_models(self) -> dict:
102
+ """
103
+ Returns a dictionary of locally available models.
104
+ For GGUF models with variants, checks if the specific variant files exist.
105
+ """
106
+ from huggingface_hub.constants import HF_HUB_CACHE
107
+
108
+ downloaded_models = {}
109
+ downloaded_checkpoints = self.downloaded_hf_checkpoints
110
+
111
+ # Get FLM installed model checkpoints
112
+ flm_installed_checkpoints = get_flm_installed_models()
113
+
114
+ for model in self.supported_models:
115
+ model_info = self.supported_models[model]
116
+
117
+ # Handle FLM models
118
+ if model_info.get("recipe") == "flm":
119
+ if model_info["checkpoint"] in flm_installed_checkpoints:
120
+ downloaded_models[model] = model_info
121
+ else:
122
+ # Handle other models
123
+ checkpoint = model_info["checkpoint"]
124
+ base_checkpoint, variant = parse_checkpoint(checkpoint)
125
+
126
+ # Special handling for locally uploaded user models (not internet-downloaded)
127
+ if (
128
+ model.startswith("user.")
129
+ and model_info.get("source") == "local_upload"
130
+ ):
131
+ # Locally uploaded model: checkpoint is in cache directory format (models--xxx)
132
+ local_model_path = os.path.join(HF_HUB_CACHE, base_checkpoint)
133
+ if os.path.exists(local_model_path):
134
+ downloaded_models[model] = model_info
135
+ continue
136
+
137
+ # For all other models (server models and internet-downloaded user models),
138
+ # use the standard verification logic with variant checks
139
+ if base_checkpoint in downloaded_checkpoints:
140
+ # For GGUF models with variants, verify the specific variant files exist
141
+ if variant and model_info.get("recipe") == "llamacpp":
142
+ try:
143
+ from lemonade.tools.llamacpp.utils import (
144
+ identify_gguf_models,
145
+ )
146
+ from lemonade.common.network import custom_snapshot_download
147
+
148
+ # Get the local snapshot path
149
+ snapshot_path = custom_snapshot_download(
150
+ base_checkpoint, local_files_only=True
151
+ )
152
+
153
+ # Check if the specific variant files exist
154
+ core_files, sharded_files = identify_gguf_models(
155
+ base_checkpoint, variant, model_info.get("mmproj", "")
156
+ )
157
+ all_variant_files = (
158
+ list(core_files.values()) + sharded_files
159
+ )
160
+
161
+ # Verify all required files exist locally
162
+ all_files_exist = True
163
+ for file_path in all_variant_files:
164
+ full_file_path = os.path.join(snapshot_path, file_path)
165
+ if not os.path.exists(full_file_path):
166
+ all_files_exist = False
167
+ break
168
+
169
+ if all_files_exist:
170
+ downloaded_models[model] = model_info
171
+
172
+ except Exception:
173
+ # If we can't verify the variant, don't include it
174
+ pass
175
+ else:
176
+ # For non-GGUF models or GGUF without variants, use the original logic
177
+ downloaded_models[model] = model_info
178
+ return downloaded_models
179
+
180
+ def register_local_model(
181
+ self,
182
+ model_name: str,
183
+ checkpoint: str,
184
+ recipe: str,
185
+ reasoning: bool = False,
186
+ vision: bool = False,
187
+ mmproj: str = "",
188
+ snapshot_path: str = "",
189
+ ):
190
+
191
+ model_name_clean = model_name[5:]
192
+
193
+ # Prepare model info
194
+ labels = ["custom"]
195
+ if reasoning:
196
+ labels.append("reasoning")
197
+ if vision:
198
+ labels.append("vision")
199
+
200
+ new_user_model = {
201
+ "checkpoint": checkpoint,
202
+ "recipe": recipe,
203
+ "suggested": True,
204
+ "labels": labels,
205
+ "source": "local_upload",
206
+ }
207
+ if mmproj:
208
+ new_user_model["mmproj"] = mmproj
209
+
210
+ # Load existing user models
211
+ user_models = {}
212
+ if os.path.exists(USER_MODELS_FILE):
213
+ with open(USER_MODELS_FILE, "r", encoding="utf-8") as file:
214
+ user_models = json.load(file)
215
+
216
+ # Check for conflicts
217
+ if model_name_clean in user_models:
218
+ raise ValueError(
219
+ f"{model_name_clean} is already registered."
220
+ f"Please use a different model name or delete the existing model."
221
+ )
222
+
223
+ # Save to user_models.json
224
+ user_models[model_name_clean] = new_user_model
225
+ os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
226
+ with open(USER_MODELS_FILE, "w", encoding="utf-8") as file:
227
+ json.dump(user_models, file)
228
+
229
+ @property
230
+ def downloaded_models_enabled(self) -> dict:
231
+ """
232
+ Returns a dictionary of locally available models that are enabled by
233
+ the current installation.
234
+ """
235
+ return self.filter_models_by_backend(self.downloaded_models)
236
+
237
+ def download_models(
238
+ self,
239
+ models: list[str],
240
+ checkpoint: Optional[str] = None,
241
+ recipe: Optional[str] = None,
242
+ reasoning: bool = False,
243
+ vision: bool = False,
244
+ mmproj: str = "",
245
+ do_not_upgrade: bool = False,
246
+ ):
247
+ """
248
+ Downloads the specified models from Hugging Face.
249
+
250
+ do_not_upgrade: prioritize any local copy of the model over any updated copy
251
+ from the Hugging Face Hub.
252
+ """
253
+ for model in models:
254
+ if model not in self.supported_models:
255
+ # Register the model as a user model if the model name
256
+ # is not already registered
257
+ import logging
258
+
259
+ # Ensure the model name includes the `user` namespace
260
+ model_parsed = model.split(".", 1)
261
+ if len(model_parsed) != 2 or model_parsed[0] != "user":
262
+ raise ValueError(
263
+ f"When registering a new model, the model name must "
264
+ "include the `user` namespace, for example "
265
+ f"`user.Phi-4-Mini-GGUF`. Received: {model}"
266
+ )
267
+
268
+ model_name = model_parsed[1]
269
+
270
+ # Check that required arguments are provided
271
+ if not recipe or not checkpoint:
272
+ raise ValueError(
273
+ f"Model {model} is not registered with Lemonade Server. "
274
+ "To register and install it, provide the `checkpoint` "
275
+ "and `recipe` arguments, as well as the optional "
276
+ "`reasoning` and `mmproj` arguments as appropriate. "
277
+ )
278
+
279
+ # JSON content that will be used for registration if the download succeeds
280
+ labels = ["custom"]
281
+ if reasoning:
282
+ labels.append("reasoning")
283
+ if vision:
284
+ labels.append("vision")
285
+
286
+ new_user_model = {
287
+ "checkpoint": checkpoint,
288
+ "recipe": recipe,
289
+ "suggested": True,
290
+ "labels": labels,
291
+ }
292
+
293
+ if mmproj:
294
+ new_user_model["mmproj"] = mmproj
295
+
296
+ # Make sure that a variant is provided for GGUF models before registering the model
297
+ if "gguf" in checkpoint.lower() and ":" not in checkpoint.lower():
298
+ raise ValueError(
299
+ "You are required to provide a 'variant' in the checkpoint field when "
300
+ "registering a GGUF model. The variant is provided "
301
+ "as CHECKPOINT:VARIANT. For example: "
302
+ "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_0 or "
303
+ "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
304
+ "qwen2.5-coder-3b-instruct-q4_0.gguf"
305
+ )
306
+
307
+ # Create a PullConfig we will use to download the model
308
+ new_registration_model_config = PullConfig(
309
+ model_name=model_name,
310
+ checkpoint=checkpoint,
311
+ recipe=recipe,
312
+ reasoning=reasoning,
313
+ vision=vision,
314
+ )
315
+ else:
316
+ # Model is already registered - check if trying to register with different parameters
317
+ existing_model = self.supported_models[model]
318
+ existing_checkpoint = existing_model.get("checkpoint")
319
+ existing_recipe = existing_model.get("recipe")
320
+ existing_reasoning = "reasoning" in existing_model.get("labels", [])
321
+ existing_mmproj = existing_model.get("mmproj", "")
322
+ existing_vision = "vision" in existing_model.get("labels", [])
323
+
324
+ # Compare parameters
325
+ checkpoint_differs = checkpoint and checkpoint != existing_checkpoint
326
+ recipe_differs = recipe and recipe != existing_recipe
327
+ reasoning_differs = reasoning and reasoning != existing_reasoning
328
+ mmproj_differs = mmproj and mmproj != existing_mmproj
329
+ vision_differs = vision and vision != existing_vision
330
+
331
+ if (
332
+ checkpoint_differs
333
+ or recipe_differs
334
+ or reasoning_differs
335
+ or mmproj_differs
336
+ or vision_differs
337
+ ):
338
+ conflicts = []
339
+ if checkpoint_differs:
340
+ conflicts.append(
341
+ f"checkpoint (existing: '{existing_checkpoint}', new: '{checkpoint}')"
342
+ )
343
+ if recipe_differs:
344
+ conflicts.append(
345
+ f"recipe (existing: '{existing_recipe}', new: '{recipe}')"
346
+ )
347
+ if reasoning_differs:
348
+ conflicts.append(
349
+ f"reasoning (existing: {existing_reasoning}, new: {reasoning})"
350
+ )
351
+ if mmproj_differs:
352
+ conflicts.append(
353
+ f"mmproj (existing: '{existing_mmproj}', new: '{mmproj}')"
354
+ )
355
+ if vision_differs:
356
+ conflicts.append(
357
+ f"vision (existing: {existing_vision}, new: {vision})"
358
+ )
359
+
360
+ conflict_details = ", ".join(conflicts)
361
+
362
+ additional_suggestion = ""
363
+ if model.startswith("user."):
364
+ additional_suggestion = f" or delete the existing model first using `lemonade-server delete {model}`"
365
+
366
+ raise ValueError(
367
+ f"Model {model} is already registered with a different configuration. "
368
+ f"Conflicting parameters: {conflict_details}. "
369
+ f"Please use a different model name{additional_suggestion}."
370
+ )
371
+ new_registration_model_config = None
372
+
373
+ # Download the model
374
+ if new_registration_model_config:
375
+ checkpoint_to_download = checkpoint
376
+ gguf_model_config = new_registration_model_config
377
+ else:
378
+ checkpoint_to_download = self.supported_models[model]["checkpoint"]
379
+ gguf_model_config = PullConfig(**self.supported_models[model])
380
+ print(f"Downloading {model} ({checkpoint_to_download})")
381
+
382
+ # Handle FLM models
383
+ current_recipe = (
384
+ recipe
385
+ if new_registration_model_config
386
+ else self.supported_models[model].get("recipe")
387
+ )
388
+ if current_recipe == "flm":
389
+ # Check if FLM is available, and install it if not
390
+ if not is_flm_available():
391
+ print(
392
+ "FLM is not installed or not at the latest version. Installing FLM..."
393
+ )
394
+ install_flm()
395
+
396
+ try:
397
+ download_flm_model(checkpoint_to_download, None, do_not_upgrade)
398
+ print(f"Successfully downloaded FLM model: {model}")
399
+ except subprocess.CalledProcessError as e:
400
+ raise RuntimeError(
401
+ f"Failed to download FLM model {model}: {e}"
402
+ ) from e
403
+ except FileNotFoundError as e:
404
+ # This shouldn't happen after install_flm(), but just in case
405
+ raise RuntimeError(
406
+ f"FLM command not found even after installation attempt. "
407
+ f"Please manually install FLM using 'lemonade-install --flm'."
408
+ ) from e
409
+ elif "gguf" in checkpoint_to_download.lower():
410
+ # Parse checkpoint to check local cache first
411
+ base_checkpoint, variant = parse_checkpoint(
412
+ gguf_model_config.checkpoint
413
+ )
414
+ local_result = resolve_local_gguf_model(
415
+ base_checkpoint, variant, gguf_model_config.mmproj
416
+ )
417
+
418
+ # Only download if not found locally
419
+ if not local_result:
420
+ download_gguf(
421
+ gguf_model_config.checkpoint,
422
+ gguf_model_config.mmproj,
423
+ do_not_upgrade=do_not_upgrade,
424
+ )
425
+ else:
426
+ print(f"Model already exists locally, skipping download")
427
+ else:
428
+ custom_snapshot_download(
429
+ checkpoint_to_download, do_not_upgrade=do_not_upgrade
430
+ )
431
+
432
+ # Register the model in user_models.json, creating that file if needed
433
+ # We do this registration after the download so that we don't register
434
+ # any incorrectly configured models where the download would fail
435
+ if new_registration_model_config:
436
+ # For models downloaded from the internet (HuggingFace),
437
+ # keep the original checkpoint format (e.g., "amd/Llama-3.2-1B-Instruct-...")
438
+ # Do NOT convert to cache directory format - that's only for locally uploaded models
439
+ new_user_model["checkpoint"] = checkpoint
440
+
441
+ if os.path.exists(USER_MODELS_FILE):
442
+ with open(USER_MODELS_FILE, "r", encoding="utf-8") as file:
443
+ user_models: dict = json.load(file)
444
+ else:
445
+ user_models = {}
446
+
447
+ user_models[model_name] = new_user_model
448
+
449
+ # Ensure the cache directory exists before writing the file
450
+ os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
451
+
452
+ with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
453
+ json.dump(user_models, fp=file)
454
+
455
+ def filter_models_by_backend(self, models: dict) -> dict:
456
+ """
457
+ Returns a filtered dict of models that are enabled by the
458
+ current environment and platform.
459
+ """
460
+ import platform
461
+
462
+ installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
463
+
464
+ ryzenai_installed = (
465
+ "onnxruntime-vitisai" in installed_packages
466
+ and "onnxruntime-genai-directml-ryzenai" in installed_packages
467
+ )
468
+
469
+ from lemonade_install.install import (
470
+ check_ryzen_ai_processor,
471
+ UnsupportedPlatformError,
472
+ )
473
+
474
+ try:
475
+ check_ryzen_ai_processor()
476
+ ryzenai_npu_available = True
477
+ except UnsupportedPlatformError:
478
+ ryzenai_npu_available = False
479
+
480
+ # On macOS, only llamacpp (GGUF) models are supported, and only on Apple Silicon with macOS 14+
481
+ is_macos = platform.system() == "Darwin"
482
+ if is_macos:
483
+ machine = platform.machine().lower()
484
+ if machine == "x86_64":
485
+ # Intel Macs are not supported - return empty model list with error info
486
+ return {
487
+ "_unsupported_platform_error": {
488
+ "error": "Intel Mac Not Supported",
489
+ "message": (
490
+ "Lemonade Server requires Apple Silicon processors on macOS. "
491
+ "Intel Macs are not currently supported. "
492
+ "Please use a Mac with Apple Silicon or try Lemonade on Windows/Linux."
493
+ ),
494
+ "platform": f"macOS {machine}",
495
+ "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
496
+ }
497
+ }
498
+
499
+ # Check macOS version requirement
500
+ mac_version = platform.mac_ver()[0]
501
+ if mac_version:
502
+ major_version = int(mac_version.split(".")[0])
503
+ if major_version < 14:
504
+ return {
505
+ "_unsupported_platform_error": {
506
+ "error": "macOS Version Not Supported",
507
+ "message": (
508
+ f"Lemonade Server requires macOS 14 or later. "
509
+ f"Your system is running macOS {mac_version}. "
510
+ f"Please update your macOS version to use Lemonade Server."
511
+ ),
512
+ "platform": f"macOS {mac_version} {machine}",
513
+ "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
514
+ }
515
+ }
516
+
517
+ filtered = {}
518
+ for model, value in models.items():
519
+ recipe = value.get("recipe")
520
+
521
+ # Filter Ryzen AI models based on package availability
522
+ if recipe == "oga-hybrid" or recipe == "oga-npu":
523
+ if not ryzenai_installed:
524
+ continue
525
+
526
+ if recipe == "flm":
527
+ if not ryzenai_npu_available:
528
+ continue
529
+
530
+ # On macOS, only show llamacpp models (GGUF format)
531
+ if is_macos and recipe != "llamacpp":
532
+ continue
533
+
534
+ filtered[model] = value
535
+
536
+ return filtered
537
+
538
+ def delete_model(self, model_name: str):
539
+ """
540
+ Deletes the specified model from local storage.
541
+ For GGUF models with variants, only deletes the specific variant files.
542
+ """
543
+ from huggingface_hub.constants import HF_HUB_CACHE
544
+
545
+ if model_name not in self.supported_models:
546
+ raise ValueError(
547
+ f"Model {model_name} is not supported. Please choose from the following: "
548
+ f"{list(self.supported_models.keys())}"
549
+ )
550
+
551
+ model_info = self.supported_models[model_name]
552
+ checkpoint = model_info["checkpoint"]
553
+ print(f"Deleting {model_name} ({checkpoint})")
554
+
555
+ # Handle FLM models
556
+ if model_info.get("recipe") == "flm":
557
+ try:
558
+ command = ["flm", "remove", checkpoint]
559
+ subprocess.run(command, check=True, encoding="utf-8", errors="replace")
560
+ print(f"Successfully deleted FLM model: {model_name}")
561
+ return
562
+ except subprocess.CalledProcessError as e:
563
+ raise ValueError(f"Failed to delete FLM model {model_name}: {e}") from e
564
+
565
+ if checkpoint.startswith("models--"):
566
+ # This is already in cache directory format (local model)
567
+ # Extract just the base directory name (models--{name}) from checkpoint
568
+ # which might contain full file path like models--name\files\model.gguf
569
+ checkpoint_parts = checkpoint.replace("\\", "/").split("/")
570
+ base_checkpoint = checkpoint_parts[0] # Just the models--{name} part
571
+ model_cache_dir = os.path.join(HF_HUB_CACHE, base_checkpoint)
572
+
573
+ if os.path.exists(model_cache_dir):
574
+ shutil.rmtree(model_cache_dir)
575
+ print(
576
+ f"Successfully deleted local model {model_name} from {model_cache_dir}"
577
+ )
578
+ else:
579
+ print(
580
+ f"Model {model_name} directory not found at {model_cache_dir} - may have been manually deleted"
581
+ )
582
+
583
+ # Clean up user models registry
584
+ if model_name.startswith("user.") and os.path.exists(USER_MODELS_FILE):
585
+ with open(USER_MODELS_FILE, "r", encoding="utf-8") as file:
586
+ user_models = json.load(file)
587
+
588
+ base_model_name = model_name[5:] # Remove "user." prefix
589
+ if base_model_name in user_models:
590
+ del user_models[base_model_name]
591
+ with open(USER_MODELS_FILE, "w", encoding="utf-8") as file:
592
+ json.dump(user_models, file)
593
+ print(f"Removed {model_name} from user models registry")
594
+
595
+ return
596
+ # Parse checkpoint to get base and variant
597
+ base_checkpoint, variant = parse_checkpoint(checkpoint)
598
+
599
+ # Get the repository cache directory
600
+ snapshot_path = None
601
+ model_cache_dir = None
602
+ try:
603
+ # First, try to get the local path using snapshot_download with local_files_only=True
604
+ snapshot_path = custom_snapshot_download(
605
+ base_checkpoint, local_files_only=True
606
+ )
607
+ # Navigate up to the model directory (parent of snapshots directory)
608
+ model_cache_dir = os.path.dirname(os.path.dirname(snapshot_path))
609
+
610
+ except Exception as e:
611
+ # If snapshot_download fails, try to construct the cache path manually
612
+ if (
613
+ "not found in cache" in str(e).lower()
614
+ or "localentrynotfounderror" in str(e).lower()
615
+ or "cannot find an appropriate cached snapshot" in str(e).lower()
616
+ ):
617
+ # Construct the Hugging Face cache path manually
618
+ cache_home = huggingface_hub.constants.HF_HUB_CACHE
619
+ # Convert repo format (e.g., "unsloth/GLM-4.5-Air-GGUF") to cache format
620
+ repo_cache_name = base_checkpoint.replace("/", "--")
621
+ model_cache_dir = os.path.join(cache_home, f"models--{repo_cache_name}")
622
+ # Try to find the snapshot path within the model cache directory
623
+ if os.path.exists(model_cache_dir):
624
+ snapshots_dir = os.path.join(model_cache_dir, "snapshots")
625
+ if os.path.exists(snapshots_dir):
626
+ snapshot_dirs = [
627
+ d
628
+ for d in os.listdir(snapshots_dir)
629
+ if os.path.isdir(os.path.join(snapshots_dir, d))
630
+ ]
631
+ if snapshot_dirs:
632
+ # Use the first (likely only) snapshot directory
633
+ snapshot_path = os.path.join(
634
+ snapshots_dir, snapshot_dirs[0]
635
+ )
636
+ else:
637
+ raise ValueError(f"Failed to delete model {model_name}: {str(e)}")
638
+
639
+ # Handle deletion based on whether this is a GGUF model with variants
640
+ if variant and snapshot_path and os.path.exists(snapshot_path):
641
+ # This is a GGUF model with a specific variant - delete only variant files
642
+ try:
643
+ from lemonade.tools.llamacpp.utils import identify_gguf_models
644
+
645
+ # Get the specific files for this variant
646
+ core_files, sharded_files = identify_gguf_models(
647
+ base_checkpoint,
648
+ variant,
649
+ self.supported_models[model_name].get("mmproj", ""),
650
+ )
651
+ all_variant_files = list(core_files.values()) + sharded_files
652
+
653
+ # Delete the specific variant files
654
+ deleted_files = []
655
+ for file_path in all_variant_files:
656
+ full_file_path = os.path.join(snapshot_path, file_path)
657
+ if os.path.exists(full_file_path):
658
+ if os.path.isfile(full_file_path):
659
+ os.remove(full_file_path)
660
+ deleted_files.append(file_path)
661
+ elif os.path.isdir(full_file_path):
662
+ shutil.rmtree(full_file_path)
663
+ deleted_files.append(file_path)
664
+
665
+ if deleted_files:
666
+ print(f"Successfully deleted variant files: {deleted_files}")
667
+ else:
668
+ print(f"No variant files found for {variant} in {snapshot_path}")
669
+
670
+ # Check if the snapshot directory is now empty (only containing .gitattributes, README, etc.)
671
+ remaining_files = [
672
+ f
673
+ for f in os.listdir(snapshot_path)
674
+ if f.endswith(".gguf")
675
+ or os.path.isdir(os.path.join(snapshot_path, f))
676
+ ]
677
+
678
+ # If no GGUF files remain, we can delete the entire repository
679
+ if not remaining_files:
680
+ print(f"No other variants remain, deleting entire repository cache")
681
+ shutil.rmtree(model_cache_dir)
682
+ print(
683
+ f"Successfully deleted entire model cache at {model_cache_dir}"
684
+ )
685
+ else:
686
+ print(
687
+ f"Other variants still exist in repository, keeping cache directory"
688
+ )
689
+
690
+ except Exception as variant_error:
691
+ print(
692
+ f"Warning: Could not perform selective variant deletion: {variant_error}"
693
+ )
694
+ print("This may indicate the files were already manually deleted")
695
+
696
+ elif model_cache_dir and os.path.exists(model_cache_dir):
697
+ # Non-GGUF model or GGUF without variant - delete entire repository as before
698
+ shutil.rmtree(model_cache_dir)
699
+ print(f"Successfully deleted model {model_name} from {model_cache_dir}")
700
+
701
+ elif model_cache_dir:
702
+ # Model directory doesn't exist - it was likely already manually deleted
703
+ print(
704
+ f"Model {model_name} directory not found at {model_cache_dir} - may have been manually deleted"
705
+ )
706
+
707
+ else:
708
+ raise ValueError(f"Unable to determine cache path for model {model_name}")
709
+
710
+ # Clean up user models registry if applicable
711
+ if model_name.startswith("user.") and os.path.exists(USER_MODELS_FILE):
712
+ with open(USER_MODELS_FILE, "r", encoding="utf-8") as file:
713
+ user_models = json.load(file)
714
+
715
+ # Remove the "user." prefix to get the actual model name in the file
716
+ base_model_name = model_name[5:] # Remove "user." prefix
717
+
718
+ if base_model_name in user_models:
719
+ del user_models[base_model_name]
720
+ with open(USER_MODELS_FILE, "w", encoding="utf-8") as file:
721
+ json.dump(user_models, file)
722
+ print(f"Removed {model_name} from user models registry")
723
+
724
+ def get_incompatible_ryzenai_models(self):
725
+ """
726
+ Get information about incompatible RyzenAI models in the cache.
727
+
728
+ Returns:
729
+ dict with 'models' list and 'total_size' info
730
+ """
731
+ # Get HF_HOME from environment
732
+ hf_home = os.environ.get("HF_HOME", None)
733
+
734
+ incompatible_models, total_size = detect_incompatible_ryzenai_models(
735
+ DEFAULT_CACHE_DIR, hf_home
736
+ )
737
+
738
+ return {
739
+ "models": incompatible_models,
740
+ "total_size": total_size,
741
+ "count": len(incompatible_models),
742
+ }
743
+
744
+ def cleanup_incompatible_models(self, model_paths: list):
745
+ """
746
+ Delete incompatible RyzenAI models from the cache.
747
+
748
+ Args:
749
+ model_paths: List of model paths to delete
750
+
751
+ Returns:
752
+ dict with deletion results
753
+ """
754
+ return delete_incompatible_models(model_paths)
755
+
756
+
757
+ # This file was originally licensed under Apache 2.0. It has been modified.
758
+ # Modifications Copyright (c) 2025 AMD