lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (33) hide show
  1. lemonade/common/inference_engines.py +13 -4
  2. lemonade/common/system_info.py +570 -1
  3. lemonade/tools/flm/__init__.py +1 -0
  4. lemonade/tools/flm/utils.py +255 -0
  5. lemonade/tools/llamacpp/utils.py +62 -13
  6. lemonade/tools/server/flm.py +137 -0
  7. lemonade/tools/server/llamacpp.py +23 -5
  8. lemonade/tools/server/serve.py +292 -135
  9. lemonade/tools/server/static/js/chat.js +165 -82
  10. lemonade/tools/server/static/js/models.js +87 -54
  11. lemonade/tools/server/static/js/shared.js +5 -3
  12. lemonade/tools/server/static/logs.html +47 -0
  13. lemonade/tools/server/static/styles.css +159 -8
  14. lemonade/tools/server/static/webapp.html +28 -10
  15. lemonade/tools/server/tray.py +158 -38
  16. lemonade/tools/server/utils/macos_tray.py +226 -0
  17. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  18. lemonade/tools/server/webapp.py +4 -1
  19. lemonade/tools/server/wrapped_server.py +91 -25
  20. lemonade/version.py +1 -1
  21. lemonade_install/install.py +25 -2
  22. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
  23. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +33 -28
  24. lemonade_server/cli.py +105 -14
  25. lemonade_server/model_manager.py +186 -45
  26. lemonade_server/pydantic_models.py +25 -1
  27. lemonade_server/server_models.json +162 -62
  28. lemonade_server/settings.py +39 -39
  29. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
  30. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
  31. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
  32. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
  33. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,25 @@
1
1
  import json
2
2
  import os
3
+ import subprocess
3
4
  from typing import Optional
4
5
  import shutil
5
6
  import huggingface_hub
6
7
  from importlib.metadata import distributions
7
8
  from lemonade_server.pydantic_models import PullConfig
9
+ from lemonade_server.pydantic_models import PullConfig
8
10
  from lemonade.cache import DEFAULT_CACHE_DIR
9
11
  from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
10
12
  from lemonade.common.network import custom_snapshot_download
11
13
 
12
14
  USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
13
15
 
16
+ from lemonade.tools.flm.utils import (
17
+ get_flm_installed_models,
18
+ is_flm_available,
19
+ install_flm,
20
+ download_flm_model,
21
+ )
22
+
14
23
 
15
24
  class ModelManager:
16
25
 
@@ -81,46 +90,61 @@ class ModelManager:
81
90
  """
82
91
  downloaded_models = {}
83
92
  downloaded_checkpoints = self.downloaded_hf_checkpoints
93
+
94
+ # Get FLM installed model checkpoints
95
+ flm_installed_checkpoints = get_flm_installed_models()
96
+
84
97
  for model in self.supported_models:
85
98
  model_info = self.supported_models[model]
86
- checkpoint = model_info["checkpoint"]
87
- base_checkpoint, variant = parse_checkpoint(checkpoint)
88
-
89
- if base_checkpoint in downloaded_checkpoints:
90
- # For GGUF models with variants, verify the specific variant files exist
91
- if variant and model_info.get("recipe") == "llamacpp":
92
- try:
93
- from lemonade.tools.llamacpp.utils import identify_gguf_models
94
- from lemonade.common.network import custom_snapshot_download
95
-
96
- # Get the local snapshot path
97
- snapshot_path = custom_snapshot_download(
98
- base_checkpoint, local_files_only=True
99
- )
100
99
 
101
- # Check if the specific variant files exist
102
- core_files, sharded_files = identify_gguf_models(
103
- base_checkpoint, variant, model_info.get("mmproj", "")
104
- )
105
- all_variant_files = list(core_files.values()) + sharded_files
106
-
107
- # Verify all required files exist locally
108
- all_files_exist = True
109
- for file_path in all_variant_files:
110
- full_file_path = os.path.join(snapshot_path, file_path)
111
- if not os.path.exists(full_file_path):
112
- all_files_exist = False
113
- break
114
-
115
- if all_files_exist:
116
- downloaded_models[model] = model_info
117
-
118
- except Exception:
119
- # If we can't verify the variant, don't include it
120
- pass
121
- else:
122
- # For non-GGUF models or GGUF without variants, use the original logic
100
+ # Handle FLM models
101
+ if model_info.get("recipe") == "flm":
102
+ if model_info["checkpoint"] in flm_installed_checkpoints:
123
103
  downloaded_models[model] = model_info
104
+ else:
105
+ # Handle other models
106
+ checkpoint = model_info["checkpoint"]
107
+ base_checkpoint, variant = parse_checkpoint(checkpoint)
108
+
109
+ if base_checkpoint in downloaded_checkpoints:
110
+ # For GGUF models with variants, verify the specific variant files exist
111
+ if variant and model_info.get("recipe") == "llamacpp":
112
+ try:
113
+ from lemonade.tools.llamacpp.utils import (
114
+ identify_gguf_models,
115
+ )
116
+ from lemonade.common.network import custom_snapshot_download
117
+
118
+ # Get the local snapshot path
119
+ snapshot_path = custom_snapshot_download(
120
+ base_checkpoint, local_files_only=True
121
+ )
122
+
123
+ # Check if the specific variant files exist
124
+ core_files, sharded_files = identify_gguf_models(
125
+ base_checkpoint, variant, model_info.get("mmproj", "")
126
+ )
127
+ all_variant_files = (
128
+ list(core_files.values()) + sharded_files
129
+ )
130
+
131
+ # Verify all required files exist locally
132
+ all_files_exist = True
133
+ for file_path in all_variant_files:
134
+ full_file_path = os.path.join(snapshot_path, file_path)
135
+ if not os.path.exists(full_file_path):
136
+ all_files_exist = False
137
+ break
138
+
139
+ if all_files_exist:
140
+ downloaded_models[model] = model_info
141
+
142
+ except Exception:
143
+ # If we can't verify the variant, don't include it
144
+ pass
145
+ else:
146
+ # For non-GGUF models or GGUF without variants, use the original logic
147
+ downloaded_models[model] = model_info
124
148
  return downloaded_models
125
149
 
126
150
  @property
@@ -137,6 +161,7 @@ class ModelManager:
137
161
  checkpoint: Optional[str] = None,
138
162
  recipe: Optional[str] = None,
139
163
  reasoning: bool = False,
164
+ vision: bool = False,
140
165
  mmproj: str = "",
141
166
  do_not_upgrade: bool = False,
142
167
  ):
@@ -150,6 +175,7 @@ class ModelManager:
150
175
  if model not in self.supported_models:
151
176
  # Register the model as a user model if the model name
152
177
  # is not already registered
178
+ import logging
153
179
 
154
180
  # Ensure the model name includes the `user` namespace
155
181
  model_parsed = model.split(".", 1)
@@ -172,11 +198,17 @@ class ModelManager:
172
198
  )
173
199
 
174
200
  # JSON content that will be used for registration if the download succeeds
201
+ labels = ["custom"]
202
+ if reasoning:
203
+ labels.append("reasoning")
204
+ if vision:
205
+ labels.append("vision")
206
+
175
207
  new_user_model = {
176
208
  "checkpoint": checkpoint,
177
209
  "recipe": recipe,
178
210
  "suggested": True,
179
- "labels": ["custom"] + (["reasoning"] if reasoning else []),
211
+ "labels": labels,
180
212
  }
181
213
 
182
214
  if mmproj:
@@ -199,6 +231,7 @@ class ModelManager:
199
231
  checkpoint=checkpoint,
200
232
  recipe=recipe,
201
233
  reasoning=reasoning,
234
+ vision=vision,
202
235
  )
203
236
  else:
204
237
  # Model is already registered - check if trying to register with different parameters
@@ -207,18 +240,21 @@ class ModelManager:
207
240
  existing_recipe = existing_model.get("recipe")
208
241
  existing_reasoning = "reasoning" in existing_model.get("labels", [])
209
242
  existing_mmproj = existing_model.get("mmproj", "")
243
+ existing_vision = "vision" in existing_model.get("labels", [])
210
244
 
211
245
  # Compare parameters
212
246
  checkpoint_differs = checkpoint and checkpoint != existing_checkpoint
213
247
  recipe_differs = recipe and recipe != existing_recipe
214
248
  reasoning_differs = reasoning and reasoning != existing_reasoning
215
249
  mmproj_differs = mmproj and mmproj != existing_mmproj
250
+ vision_differs = vision and vision != existing_vision
216
251
 
217
252
  if (
218
253
  checkpoint_differs
219
254
  or recipe_differs
220
255
  or reasoning_differs
221
256
  or mmproj_differs
257
+ or vision_differs
222
258
  ):
223
259
  conflicts = []
224
260
  if checkpoint_differs:
@@ -237,6 +273,10 @@ class ModelManager:
237
273
  conflicts.append(
238
274
  f"mmproj (existing: '{existing_mmproj}', new: '{mmproj}')"
239
275
  )
276
+ if vision_differs:
277
+ conflicts.append(
278
+ f"vision (existing: {existing_vision}, new: {vision})"
279
+ )
240
280
 
241
281
  conflict_details = ", ".join(conflicts)
242
282
 
@@ -260,7 +300,34 @@ class ModelManager:
260
300
  gguf_model_config = PullConfig(**self.supported_models[model])
261
301
  print(f"Downloading {model} ({checkpoint_to_download})")
262
302
 
263
- if "gguf" in checkpoint_to_download.lower():
303
+ # Handle FLM models
304
+ current_recipe = (
305
+ recipe
306
+ if new_registration_model_config
307
+ else self.supported_models[model].get("recipe")
308
+ )
309
+ if current_recipe == "flm":
310
+ # Check if FLM is available, and install it if not
311
+ if not is_flm_available():
312
+ print(
313
+ "FLM is not installed or not at the minimum required version. Installing FLM..."
314
+ )
315
+ install_flm()
316
+
317
+ try:
318
+ download_flm_model(checkpoint_to_download, None, do_not_upgrade)
319
+ print(f"Successfully downloaded FLM model: {model}")
320
+ except subprocess.CalledProcessError as e:
321
+ raise RuntimeError(
322
+ f"Failed to download FLM model {model}: {e}"
323
+ ) from e
324
+ except FileNotFoundError as e:
325
+ # This shouldn't happen after install_flm(), but just in case
326
+ raise RuntimeError(
327
+ f"FLM command not found even after installation attempt. "
328
+ f"Please manually install FLM using 'lemonade-install --flm'."
329
+ ) from e
330
+ elif "gguf" in checkpoint_to_download.lower():
264
331
  download_gguf(
265
332
  gguf_model_config.checkpoint,
266
333
  gguf_model_config.mmproj,
@@ -292,21 +359,84 @@ class ModelManager:
292
359
  def filter_models_by_backend(self, models: dict) -> dict:
293
360
  """
294
361
  Returns a filtered dict of models that are enabled by the
295
- current environment.
362
+ current environment and platform.
296
363
  """
364
+ import platform
365
+
297
366
  installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
298
367
 
299
368
  hybrid_installed = (
300
369
  "onnxruntime-vitisai" in installed_packages
301
370
  and "onnxruntime-genai-directml-ryzenai" in installed_packages
302
371
  )
372
+
373
+ from lemonade_install.install import (
374
+ check_ryzen_ai_processor,
375
+ UnsupportedPlatformError,
376
+ )
377
+
378
+ try:
379
+ check_ryzen_ai_processor()
380
+ ryzenai_npu_available = True
381
+ except UnsupportedPlatformError:
382
+ ryzenai_npu_available = False
383
+
384
+ # On macOS, only llamacpp (GGUF) models are supported, and only on Apple Silicon with macOS 14+
385
+ is_macos = platform.system() == "Darwin"
386
+ if is_macos:
387
+ machine = platform.machine().lower()
388
+ if machine == "x86_64":
389
+ # Intel Macs are not supported - return empty model list with error info
390
+ return {
391
+ "_unsupported_platform_error": {
392
+ "error": "Intel Mac Not Supported",
393
+ "message": (
394
+ "Lemonade Server requires Apple Silicon processors on macOS. "
395
+ "Intel Macs are not currently supported. "
396
+ "Please use a Mac with Apple Silicon or try Lemonade on Windows/Linux."
397
+ ),
398
+ "platform": f"macOS {machine}",
399
+ "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
400
+ }
401
+ }
402
+
403
+ # Check macOS version requirement
404
+ mac_version = platform.mac_ver()[0]
405
+ if mac_version:
406
+ major_version = int(mac_version.split(".")[0])
407
+ if major_version < 14:
408
+ return {
409
+ "_unsupported_platform_error": {
410
+ "error": "macOS Version Not Supported",
411
+ "message": (
412
+ f"Lemonade Server requires macOS 14 or later. "
413
+ f"Your system is running macOS {mac_version}. "
414
+ f"Please update your macOS version to use Lemonade Server."
415
+ ),
416
+ "platform": f"macOS {mac_version} {machine}",
417
+ "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
418
+ }
419
+ }
420
+
303
421
  filtered = {}
304
422
  for model, value in models.items():
305
- if value.get("recipe") == "oga-hybrid":
306
- if hybrid_installed:
307
- filtered[model] = value
308
- else:
309
- filtered[model] = value
423
+ recipe = value.get("recipe")
424
+
425
+ # Filter OGA hybrid models based on package availability
426
+ if recipe == "oga-hybrid":
427
+ if not hybrid_installed:
428
+ continue
429
+
430
+ if recipe == "flm":
431
+ if not ryzenai_npu_available:
432
+ continue
433
+
434
+ # On macOS, only show llamacpp models (GGUF format)
435
+ if is_macos and recipe != "llamacpp":
436
+ continue
437
+
438
+ filtered[model] = value
439
+
310
440
  return filtered
311
441
 
312
442
  def delete_model(self, model_name: str):
@@ -320,9 +450,20 @@ class ModelManager:
320
450
  f"{list(self.supported_models.keys())}"
321
451
  )
322
452
 
323
- checkpoint = self.supported_models[model_name]["checkpoint"]
453
+ model_info = self.supported_models[model_name]
454
+ checkpoint = model_info["checkpoint"]
324
455
  print(f"Deleting {model_name} ({checkpoint})")
325
456
 
457
+ # Handle FLM models
458
+ if model_info.get("recipe") == "flm":
459
+ try:
460
+ command = ["flm", "remove", checkpoint]
461
+ subprocess.run(command, check=True, encoding="utf-8", errors="replace")
462
+ print(f"Successfully deleted FLM model: {model_name}")
463
+ return
464
+ except subprocess.CalledProcessError as e:
465
+ raise ValueError(f"Failed to delete FLM model {model_name}: {e}") from e
466
+
326
467
  # Parse checkpoint to get base and variant
327
468
  base_checkpoint, variant = parse_checkpoint(checkpoint)
328
469
 
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import platform
2
3
  from typing import Optional, Union, List
3
4
 
4
5
  from pydantic import BaseModel
@@ -6,7 +7,28 @@ from pydantic import BaseModel
6
7
  DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
7
8
  DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
8
9
  DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
9
- DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
10
+
11
+
12
+ # Platform-aware default backend selection
13
+ def _get_default_llamacpp_backend():
14
+ """
15
+ Get the default llamacpp backend based on the current platform.
16
+ """
17
+ # Allow environment variable override
18
+ env_backend = os.getenv("LEMONADE_LLAMACPP")
19
+ if env_backend:
20
+ return env_backend
21
+
22
+ # Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
23
+ if platform.system() == "Darwin" and platform.machine().lower() in [
24
+ "arm64",
25
+ "aarch64",
26
+ ]:
27
+ return "metal"
28
+ return "vulkan"
29
+
30
+
31
+ DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
10
32
  DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
11
33
 
12
34
 
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
23
45
  recipe: Optional[str] = None
24
46
  # Indicates whether the model is a reasoning model, like DeepSeek
25
47
  reasoning: Optional[bool] = False
48
+ # Indicates whether the model is a vision model with image processing capabilities
49
+ vision: Optional[bool] = False
26
50
  # Indicates which Multimodal Projector (mmproj) file to use
27
51
  mmproj: Optional[str] = None
28
52