lemonade-sdk 8.1.1__tar.gz → 8.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (78) hide show
  1. {lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.2}/PKG-INFO +1 -1
  2. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/setup.py +1 -0
  3. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/inference_engines.py +1 -1
  4. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/utils.py +114 -14
  5. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/management_tools.py +1 -1
  6. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/serve.py +7 -3
  7. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/webapp.html +2 -1
  8. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/tray.py +1 -1
  9. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/port.py +2 -2
  10. lemonade_sdk-8.1.2/src/lemonade/version.py +1 -0
  11. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2/src/lemonade_sdk.egg-info}/PKG-INFO +1 -1
  12. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/entry_points.txt +1 -0
  13. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/cli.py +37 -2
  14. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/model_manager.py +1 -1
  15. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/server_models.json +45 -0
  16. lemonade_sdk-8.1.1/src/lemonade/version.py +0 -1
  17. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/LICENSE +0 -0
  18. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/NOTICE.md +0 -0
  19. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/README.md +0 -0
  20. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/setup.cfg +0 -0
  21. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/__init__.py +0 -0
  22. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/api.py +0 -0
  23. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/cache.py +0 -0
  24. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/cli.py +0 -0
  25. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/__init__.py +0 -0
  26. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/build.py +0 -0
  27. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/cli_helpers.py +0 -0
  28. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/exceptions.py +0 -0
  29. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/filesystem.py +0 -0
  30. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/network.py +0 -0
  31. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/printing.py +0 -0
  32. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/status.py +0 -0
  33. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/system_info.py +0 -0
  34. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/test_helpers.py +0 -0
  35. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/__init__.py +0 -0
  36. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/memory_tracker.py +0 -0
  37. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/profiler.py +0 -0
  38. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/sequence.py +0 -0
  39. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/state.py +0 -0
  40. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/__init__.py +0 -0
  41. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/accuracy.py +0 -0
  42. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/adapter.py +0 -0
  43. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/bench.py +0 -0
  44. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/bench.py +0 -0
  45. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/load.py +0 -0
  46. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/utils.py +0 -0
  47. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/humaneval.py +0 -0
  48. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/bench.py +0 -0
  49. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/load.py +0 -0
  50. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/mmlu.py +0 -0
  51. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/__init__.py +0 -0
  52. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/bench.py +0 -0
  53. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/load.py +0 -0
  54. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/utils.py +0 -0
  55. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/perplexity.py +0 -0
  56. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/prompt.py +0 -0
  57. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/__init__.py +0 -0
  58. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/quark_load.py +0 -0
  59. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  60. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/__init__.py +0 -0
  61. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/llm_report.py +0 -0
  62. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/table.py +0 -0
  63. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/__init__.py +0 -0
  64. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/llamacpp.py +0 -0
  65. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/favicon.ico +0 -0
  66. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/styles.css +0 -0
  67. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/tool_calls.py +0 -0
  68. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  69. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/thread.py +0 -0
  70. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/webapp.py +0 -0
  71. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/tool.py +0 -0
  72. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_install/__init__.py +0 -0
  73. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_install/install.py +0 -0
  74. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
  75. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  76. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/requires.txt +0 -0
  77. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
  78. {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/pydantic_models.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.1
3
+ Version: 8.1.2
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -125,6 +125,7 @@ setup(
125
125
  "lemonade=lemonade:lemonadecli",
126
126
  "lemonade-install=lemonade_install:installcli",
127
127
  "lemonade-server-dev=lemonade_server.cli:main",
128
+ "lsdev=lemonade_server.cli:developer_entrypoint",
128
129
  ]
129
130
  },
130
131
  python_requires=">=3.10, <3.13",
@@ -5,7 +5,6 @@ import importlib.metadata
5
5
  import subprocess
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Dict, Optional
8
- import transformers
9
8
 
10
9
 
11
10
  class InferenceEngineDetector:
@@ -352,6 +351,7 @@ class TransformersDetector(BaseEngineDetector):
352
351
 
353
352
  try:
354
353
  import torch
354
+ import transformers
355
355
 
356
356
  if device_type == "cpu":
357
357
  result = {
@@ -57,7 +57,7 @@ def identify_rocm_arch_from_name(device_name: str) -> str | None:
57
57
  return None
58
58
 
59
59
 
60
- def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
60
+ def identify_rocm_arch() -> str:
61
61
  """
62
62
  Identify the appropriate ROCm target architecture based on the device info
63
63
  Returns tuple of (architecture, gpu_type) where gpu_type is 'igpu' or 'dgpu'
@@ -68,21 +68,54 @@ def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
68
68
  amd_igpu = system_info.get_amd_igpu_device()
69
69
  amd_dgpu = system_info.get_amd_dgpu_devices()
70
70
  target_arch = None
71
- gpu_count = 0
72
71
  for gpu in [amd_igpu] + amd_dgpu:
73
72
  if gpu.get("available") and gpu.get("name"):
74
- gpu_count += 1
75
73
  target_arch = identify_rocm_arch_from_name(gpu["name"].lower())
76
74
  if target_arch:
77
75
  break
78
76
 
79
- # Get HIP ID based on the number of GPUs available
80
- # Here, we assume that the iGPU will always show up before the dGPUs (if available)
81
- # We also assume that selecting the dGPU is preferred over the iGPU
82
- # Multiple GPUs are not supported at the moment
83
- hip_id = str(gpu_count - 1)
77
+ return target_arch
84
78
 
85
- return target_arch, hip_id
79
+
80
+ def identify_hip_id() -> str:
81
+ """
82
+ Identify the HIP ID
83
+ """
84
+ # Get HIP devices
85
+ hip_devices = get_hip_devices()
86
+ logging.debug(f"HIP devices found: {hip_devices}")
87
+ if len(hip_devices) == 0:
88
+ raise ValueError("No HIP devices found when identifying HIP ID")
89
+
90
+ # Identify HIP devices that are compatible with our ROCm builds
91
+ rocm_devices = []
92
+ for device in hip_devices:
93
+ device_id, device_name = device
94
+ if identify_rocm_arch_from_name(device_name):
95
+ rocm_devices.append([device_id, device_name])
96
+ logging.debug(f"ROCm devices found: {rocm_devices}")
97
+
98
+ # If no ROCm devices are found, use the last HIP device
99
+ # This might be needed in some scenarios where HIP reports generic device names
100
+ # Example: "AMD Radeon Graphics" for STX Halo iGPU on Ubuntu 24.04
101
+ if len(rocm_devices) == 0:
102
+ rocm_devices = [hip_devices[-1]]
103
+ logging.warning(
104
+ "No ROCm devices found when identifying HIP ID. "
105
+ f"Falling back to the following device: {rocm_devices[0]}"
106
+ )
107
+ elif len(rocm_devices) > 1:
108
+ logging.warning(
109
+ f"Multiple ROCm devices found when identifying HIP ID: {rocm_devices}"
110
+ "The last device will be used."
111
+ )
112
+
113
+ # Select the last device
114
+ device_selected = rocm_devices[-1]
115
+ logging.debug(f"Selected ROCm device: {device_selected}")
116
+
117
+ # Return the device ID
118
+ return device_selected[0]
86
119
 
87
120
 
88
121
  def get_llama_version(backend: str) -> str:
@@ -277,7 +310,7 @@ def install_llamacpp(backend):
277
310
  target_arch = None
278
311
  if backend == "rocm":
279
312
  # Identify the target architecture
280
- target_arch, hip_id = identify_rocm_arch_and_hip_id()
313
+ target_arch = identify_rocm_arch()
281
314
  if not target_arch:
282
315
  system = platform.system().lower()
283
316
  if system == "linux":
@@ -293,10 +326,6 @@ def install_llamacpp(backend):
293
326
  f"for supported configurations. {hint}"
294
327
  )
295
328
 
296
- # Set HIP_VISIBLE_DEVICES=0 for igpu, =1 for dgpu
297
- env_file_path = os.path.join(llama_server_exe_dir, ".env")
298
- set_key(env_file_path, "HIP_VISIBLE_DEVICES", hip_id)
299
-
300
329
  # Direct download for Vulkan/ROCm
301
330
  llama_archive_url, filename = get_binary_url_and_filename(backend, target_arch)
302
331
  llama_archive_path = os.path.join(llama_server_exe_dir, filename)
@@ -315,6 +344,12 @@ def install_llamacpp(backend):
315
344
  else:
316
345
  raise NotImplementedError(f"Unsupported archive format: {filename}")
317
346
 
347
+ # Identify and set HIP ID
348
+ if backend == "rocm":
349
+ hip_id = identify_hip_id()
350
+ env_file_path = os.path.join(llama_server_exe_dir, ".env")
351
+ set_key(env_file_path, "HIP_VISIBLE_DEVICES", str(hip_id))
352
+
318
353
  # Make executable on Linux - need to update paths after extraction
319
354
  if platform.system().lower() == "linux":
320
355
  # Re-get the paths since extraction might have changed the directory structure
@@ -778,3 +813,68 @@ class LlamaCppAdapter(ModelAdapter):
778
813
  error_msg = f"Failed to run llama.cpp command: {str(e)}\n"
779
814
  error_msg += f"Command: {' '.join(cmd)}"
780
815
  raise Exception(error_msg)
816
+
817
+
818
+ def get_hip_devices():
819
+ """Get list of HIP devices with their IDs and names."""
820
+ import ctypes
821
+ import sys
822
+ import os
823
+ import glob
824
+ from ctypes import c_int, POINTER
825
+ from ctypes.util import find_library
826
+
827
+ # Get llama.cpp path
828
+ rocm_path = get_llama_folder_path("rocm")
829
+
830
+ # Load HIP library
831
+ hip_library_pattern = (
832
+ "amdhip64*.dll" if sys.platform.startswith("win") else "libamdhip64*.so"
833
+ )
834
+ search_pattern = os.path.join(rocm_path, hip_library_pattern)
835
+ matching_files = glob.glob(search_pattern)
836
+ if not matching_files:
837
+ raise RuntimeError(
838
+ f"Could not find HIP runtime library matching pattern: {search_pattern}"
839
+ )
840
+ try:
841
+ libhip = ctypes.CDLL(matching_files[0])
842
+ except OSError:
843
+ raise RuntimeError(f"Could not load HIP runtime library from {path}")
844
+
845
+ # Setup function signatures
846
+ hipError_t = c_int
847
+ hipDeviceProp_t = ctypes.c_char * 2048
848
+ libhip.hipGetDeviceCount.restype = hipError_t
849
+ libhip.hipGetDeviceCount.argtypes = [POINTER(c_int)]
850
+ libhip.hipGetDeviceProperties.restype = hipError_t
851
+ libhip.hipGetDeviceProperties.argtypes = [POINTER(hipDeviceProp_t), c_int]
852
+ libhip.hipGetErrorString.restype = ctypes.c_char_p
853
+ libhip.hipGetErrorString.argtypes = [hipError_t]
854
+
855
+ # Get device count
856
+ device_count = c_int()
857
+ err = libhip.hipGetDeviceCount(ctypes.byref(device_count))
858
+ if err != 0:
859
+ logging.error(
860
+ "hipGetDeviceCount failed:", libhip.hipGetErrorString(err).decode()
861
+ )
862
+ return []
863
+
864
+ # Get device properties
865
+ devices = []
866
+ for i in range(device_count.value):
867
+ prop = hipDeviceProp_t()
868
+ err = libhip.hipGetDeviceProperties(ctypes.byref(prop), i)
869
+ if err != 0:
870
+ logging.error(
871
+ f"hipGetDeviceProperties failed for device {i}:",
872
+ libhip.hipGetErrorString(err).decode(),
873
+ )
874
+ continue
875
+
876
+ # Extract device name from HIP device properties
877
+ device_name = ctypes.string_at(prop, 256).decode("utf-8").rstrip("\x00")
878
+ devices.append([i, device_name])
879
+
880
+ return devices
@@ -109,7 +109,7 @@ class Cache(ManagementTool):
109
109
  # pylint: disable=pointless-statement,f-string-without-interpolation
110
110
  f"""
111
111
  A set of functions for managing the lemonade build cache. The default
112
- cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
112
+ cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
113
113
  selected with
114
114
  the global --cache-dir option or the LEMONADE_CACHE_DIR environment variable.
115
115
 
@@ -72,6 +72,7 @@ if platform.system() == "Windows":
72
72
 
73
73
 
74
74
  DEFAULT_PORT = 8000
75
+ DEFAULT_HOST = "localhost"
75
76
  DEFAULT_LOG_LEVEL = "info"
76
77
  DEFAULT_LLAMACPP_BACKEND = "vulkan"
77
78
  DEFAULT_CTX_SIZE = 4096
@@ -150,6 +151,7 @@ class Server:
150
151
  def __init__(
151
152
  self,
152
153
  port: int = DEFAULT_PORT,
154
+ host: str = DEFAULT_HOST,
153
155
  log_level: str = DEFAULT_LOG_LEVEL,
154
156
  ctx_size: int = DEFAULT_CTX_SIZE,
155
157
  tray: bool = False,
@@ -160,6 +162,7 @@ class Server:
160
162
 
161
163
  # Save args as members
162
164
  self.port = port
165
+ self.host = host
163
166
  self.log_level = log_level
164
167
  self.ctx_size = ctx_size
165
168
  self.tray = tray
@@ -332,6 +335,9 @@ class Server:
332
335
  # Let the app know what port it's running on, so
333
336
  # that the lifespan can access it
334
337
  self.app.port = self.port
338
+ # FastAPI already has a `host` function and we cannot use `_host` as
339
+ # PyLint will believe its private
340
+ self.app.host_ = self.host
335
341
 
336
342
  def run(self):
337
343
  # Common setup
@@ -340,9 +346,7 @@ class Server:
340
346
  tray=self.tray,
341
347
  )
342
348
 
343
- uvicorn.run(
344
- self.app, host="localhost", port=self.port, log_level=self.log_level
345
- )
349
+ uvicorn.run(self.app, host=self.host, port=self.port, log_level=self.log_level)
346
350
 
347
351
  def run_in_thread(self, host: str = "localhost"):
348
352
  """
@@ -369,7 +369,8 @@
369
369
  // Helper to get server base URL
370
370
  function getServerBaseUrl() {
371
371
  const port = window.SERVER_PORT || 8000;
372
- return `http://localhost:${port}`;
372
+ const host = window.location.hostname || 'localhost';
373
+ return `http://${host}:${port}`;
373
374
  }
374
375
 
375
376
  // Check if current model supports vision
@@ -427,7 +427,7 @@ class LemonadeTray(SystemTray):
427
427
  Start the uvicorn server.
428
428
  """
429
429
  self.server = self.server_factory()
430
- self.server.uvicorn_server = self.server.run_in_thread()
430
+ self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
431
431
  self.server.uvicorn_server.run()
432
432
 
433
433
  def run(self):
@@ -43,7 +43,7 @@ async def lifespan(app: FastAPI):
43
43
  "\n"
44
44
  "\n"
45
45
  "🍋 Lemonade Server Ready!\n"
46
- f"🍋 Open http://localhost:{app.port} in your browser for:\n"
46
+ f"🍋 Open http://{app.host_}:{app.port} in your browser for:\n"
47
47
  "🍋 💬 chat\n"
48
48
  "🍋 💻 model management\n"
49
49
  "🍋 📄 docs\n"
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
53
53
  "\n"
54
54
  "\n"
55
55
  "[Lemonade] Lemonade Server Ready!\n"
56
- f"[Lemonade] Open http://localhost:{app.port} in your browser for:\n"
56
+ f"[Lemonade] Open http://{app.host_}:{app.port} in your browser for:\n"
57
57
  "[Lemonade] chat\n"
58
58
  "[Lemonade] model management\n"
59
59
  "[Lemonade] docs\n"
@@ -0,0 +1 @@
1
+ __version__ = "8.1.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.1
3
+ Version: 8.1.2
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -2,3 +2,4 @@
2
2
  lemonade = lemonade:lemonadecli
3
3
  lemonade-install = lemonade_install:installcli
4
4
  lemonade-server-dev = lemonade_server.cli:main
5
+ lsdev = lemonade_server.cli:developer_entrypoint
@@ -47,6 +47,7 @@ class ModelLoadError(Exception):
47
47
 
48
48
  def serve(
49
49
  port: int = None,
50
+ host: str = "localhost",
50
51
  log_level: str = None,
51
52
  tray: bool = False,
52
53
  use_thread: bool = False,
@@ -79,6 +80,7 @@ def serve(
79
80
  # Start the server
80
81
  server = Server(
81
82
  port=port,
83
+ host=host,
82
84
  log_level=log_level,
83
85
  ctx_size=ctx_size,
84
86
  tray=tray,
@@ -259,7 +261,9 @@ def delete(model_names: List[str]):
259
261
  def run(
260
262
  model_name: str,
261
263
  port: int = None,
264
+ host: str = "localhost",
262
265
  log_level: str = None,
266
+ tray: bool = False,
263
267
  llamacpp_backend: str = None,
264
268
  ctx_size: int = None,
265
269
  ):
@@ -275,8 +279,9 @@ def run(
275
279
  if not server_previously_running:
276
280
  port, server_thread = serve(
277
281
  port=port,
282
+ host=host,
278
283
  log_level=log_level,
279
- tray=True,
284
+ tray=tray,
280
285
  use_thread=True,
281
286
  llamacpp_backend=llamacpp_backend,
282
287
  ctx_size=ctx_size,
@@ -291,7 +296,7 @@ def run(
291
296
  load(model_name, port)
292
297
 
293
298
  # Open the webapp with the specified model
294
- url = f"http://localhost:{port}/?model={model_name}#llm-chat"
299
+ url = f"http://{host}:{port}/?model={model_name}#llm-chat"
295
300
  print(f"You can now chat with {model_name} at {url}")
296
301
  webbrowser.open(url)
297
302
 
@@ -440,9 +445,36 @@ def list_models():
440
445
  print(tabulate(table_data, headers=headers, tablefmt="simple"))
441
446
 
442
447
 
448
+ def developer_entrypoint():
449
+ """
450
+ Developer entry point that starts the server with debug logging
451
+ Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
452
+
453
+ This function automatically prepends "serve --log-level debug" to any arguments
454
+ passed to the lsdev command.
455
+ """
456
+ # Save original sys.argv
457
+ original_argv = sys.argv.copy()
458
+
459
+ try:
460
+ # Take any additional arguments passed to lsdev and append them
461
+ # after "serve --log-level debug"
462
+ additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
463
+
464
+ # Set sys.argv to simulate "serve --log-level debug" + additional args
465
+ sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
466
+ main()
467
+ finally:
468
+ # Restore original sys.argv
469
+ sys.argv = original_argv
470
+
471
+
443
472
  def _add_server_arguments(parser):
444
473
  """Add common server arguments to a parser"""
445
474
  parser.add_argument("--port", type=int, help="Port number to serve on")
475
+ parser.add_argument(
476
+ "--host", type=str, help="Address to bind for connections", default="localhost"
477
+ )
446
478
  parser.add_argument(
447
479
  "--log-level",
448
480
  type=str,
@@ -578,6 +610,7 @@ def main():
578
610
  sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
579
611
  serve(
580
612
  port=args.port,
613
+ host=args.host,
581
614
  log_level=args.log_level,
582
615
  tray=not args.no_tray,
583
616
  llamacpp_backend=args.llamacpp,
@@ -603,7 +636,9 @@ def main():
603
636
  run(
604
637
  args.model,
605
638
  port=args.port,
639
+ host=args.host,
606
640
  log_level=args.log_level,
641
+ tray=not args.no_tray,
607
642
  llamacpp_backend=args.llamacpp,
608
643
  ctx_size=args.ctx_size,
609
644
  )
@@ -43,7 +43,7 @@ class ModelManager:
43
43
  if "reasoning" in model_info:
44
44
  model_info["labels"] = (
45
45
  ["reasoning"]
46
- if not model_info["labels"]
46
+ if not model_info.get("labels", None)
47
47
  else model_info["labels"] + ["reasoning"]
48
48
  )
49
49
  del model_info["reasoning"]
@@ -114,6 +114,51 @@
114
114
  "recipe": "oga-npu",
115
115
  "suggested": true
116
116
  },
117
+ "DeepSeek-R1-Distill-Llama-8B-NPU": {
118
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
119
+ "recipe": "oga-npu",
120
+ "suggested": true
121
+ },
122
+ "DeepSeek-R1-Distill-Qwen-7B-NPU": {
123
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
124
+ "recipe": "oga-npu",
125
+ "suggested": false
126
+ },
127
+ "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
128
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
129
+ "recipe": "oga-npu",
130
+ "suggested": false
131
+ },
132
+ "Llama-3.2-3B-Instruct-NPU": {
133
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
134
+ "recipe": "oga-npu",
135
+ "suggested": false
136
+ },
137
+ "Llama-3.2-1B-Instruct-NPU": {
138
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
139
+ "recipe": "oga-npu",
140
+ "suggested": false
141
+ },
142
+ "Mistral-7B-v0.3-Instruct-NPU": {
143
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
144
+ "recipe": "oga-npu",
145
+ "suggested": true
146
+ },
147
+ "Phi-3.5-Mini-Instruct-NPU": {
148
+ "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
149
+ "recipe": "oga-npu",
150
+ "suggested": true
151
+ },
152
+ "ChatGLM-3-6b-Instruct-NPU": {
153
+ "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
154
+ "recipe": "oga-npu",
155
+ "suggested": false
156
+ },
157
+ "AMD-OLMo-1B-Instruct-NPU": {
158
+ "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
159
+ "recipe": "oga-npu",
160
+ "suggested": false
161
+ },
117
162
  "Llama-3.2-1B-Instruct-DirectML": {
118
163
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
119
164
  "recipe": "oga-igpu",
@@ -1 +0,0 @@
1
- __version__ = "8.1.1"
File without changes
File without changes
File without changes
File without changes