PyPI - lemonade-sdk - Versions diffs - 8.1.1__tar.gz → 8.1.2__tar.gz - Mend

lemonade-sdk 8.1.1tar.gz → 8.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (78) hide show

{lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.1.1
+Version: 8.1.2
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.13

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/setup.py RENAMED Viewed

@@ -125,6 +125,7 @@ setup(
             "lemonade=lemonade:lemonadecli",
             "lemonade-install=lemonade_install:installcli",
             "lemonade-server-dev=lemonade_server.cli:main",
+            "lsdev=lemonade_server.cli:developer_entrypoint",
         ]
     },
     python_requires=">=3.10, <3.13",

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/inference_engines.py RENAMED Viewed

@@ -5,7 +5,6 @@ import importlib.metadata
 import subprocess
 from abc import ABC, abstractmethod
 from typing import Dict, Optional
-import transformers
 class InferenceEngineDetector:
@@ -352,6 +351,7 @@ class TransformersDetector(BaseEngineDetector):
         try:
             import torch
+            import transformers
             if device_type == "cpu":
                 result = {

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/utils.py RENAMED Viewed

@@ -57,7 +57,7 @@ def identify_rocm_arch_from_name(device_name: str) -> str | None:
     return None
-def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
+def identify_rocm_arch() -> str:
     """
     Identify the appropriate ROCm target architecture based on the device info
     Returns tuple of (architecture, gpu_type) where gpu_type is 'igpu' or 'dgpu'
@@ -68,21 +68,54 @@ def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
     amd_igpu = system_info.get_amd_igpu_device()
     amd_dgpu = system_info.get_amd_dgpu_devices()
     target_arch = None
-    gpu_count = 0
     for gpu in [amd_igpu] + amd_dgpu:
         if gpu.get("available") and gpu.get("name"):
-            gpu_count += 1
             target_arch = identify_rocm_arch_from_name(gpu["name"].lower())
             if target_arch:
                 break
-    # Get HIP ID based on the number of GPUs available
-    # Here, we assume that the iGPU will always show up before the dGPUs (if available)
-    # We also assume that selecting the dGPU is preferred over the iGPU
-    # Multiple GPUs are not supported at the moment
-    hip_id = str(gpu_count - 1)
+    return target_arch
-    return target_arch, hip_id
+def identify_hip_id() -> str:
+    """
+    Identify the HIP ID
+    """
+    # Get HIP devices
+    hip_devices = get_hip_devices()
+    logging.debug(f"HIP devices found: {hip_devices}")
+    if len(hip_devices) == 0:
+        raise ValueError("No HIP devices found when identifying HIP ID")
+    # Identify HIP devices that are compatible with our ROCm builds
+    rocm_devices = []
+    for device in hip_devices:
+        device_id, device_name = device
+        if identify_rocm_arch_from_name(device_name):
+            rocm_devices.append([device_id, device_name])
+    logging.debug(f"ROCm devices found: {rocm_devices}")
+    # If no ROCm devices are found, use the last HIP device
+    # This might be needed in some scenarios where HIP reports generic device names
+    # Example: "AMD Radeon Graphics" for STX Halo iGPU on Ubuntu 24.04
+    if len(rocm_devices) == 0:
+        rocm_devices = [hip_devices[-1]]
+        logging.warning(
+            "No ROCm devices found when identifying HIP ID. "
+            f"Falling back to the following device: {rocm_devices[0]}"
+        )
+    elif len(rocm_devices) > 1:
+        logging.warning(
+            f"Multiple ROCm devices found when identifying HIP ID: {rocm_devices}"
+            "The last device will be used."
+        )
+    # Select the last device
+    device_selected = rocm_devices[-1]
+    logging.debug(f"Selected ROCm device: {device_selected}")
+    # Return the device ID
+    return device_selected[0]
 def get_llama_version(backend: str) -> str:
@@ -277,7 +310,7 @@ def install_llamacpp(backend):
         target_arch = None
         if backend == "rocm":
             # Identify the target architecture
-            target_arch, hip_id = identify_rocm_arch_and_hip_id()
+            target_arch = identify_rocm_arch()
             if not target_arch:
                 system = platform.system().lower()
                 if system == "linux":
@@ -293,10 +326,6 @@ def install_llamacpp(backend):
                     f"for supported configurations. {hint}"
                 )
-            # Set HIP_VISIBLE_DEVICES=0 for igpu, =1 for dgpu
-            env_file_path = os.path.join(llama_server_exe_dir, ".env")
-            set_key(env_file_path, "HIP_VISIBLE_DEVICES", hip_id)
         # Direct download for Vulkan/ROCm
         llama_archive_url, filename = get_binary_url_and_filename(backend, target_arch)
         llama_archive_path = os.path.join(llama_server_exe_dir, filename)
@@ -315,6 +344,12 @@ def install_llamacpp(backend):
         else:
             raise NotImplementedError(f"Unsupported archive format: {filename}")
+        # Identify and set HIP ID
+        if backend == "rocm":
+            hip_id = identify_hip_id()
+            env_file_path = os.path.join(llama_server_exe_dir, ".env")
+            set_key(env_file_path, "HIP_VISIBLE_DEVICES", str(hip_id))
         # Make executable on Linux - need to update paths after extraction
         if platform.system().lower() == "linux":
             # Re-get the paths since extraction might have changed the directory structure
@@ -778,3 +813,68 @@ class LlamaCppAdapter(ModelAdapter):
             error_msg = f"Failed to run llama.cpp command: {str(e)}\n"
             error_msg += f"Command: {' '.join(cmd)}"
             raise Exception(error_msg)
+def get_hip_devices():
+    """Get list of HIP devices with their IDs and names."""
+    import ctypes
+    import sys
+    import os
+    import glob
+    from ctypes import c_int, POINTER
+    from ctypes.util import find_library
+    # Get llama.cpp path
+    rocm_path = get_llama_folder_path("rocm")
+    # Load HIP library
+    hip_library_pattern = (
+        "amdhip64*.dll" if sys.platform.startswith("win") else "libamdhip64*.so"
+    )
+    search_pattern = os.path.join(rocm_path, hip_library_pattern)
+    matching_files = glob.glob(search_pattern)
+    if not matching_files:
+        raise RuntimeError(
+            f"Could not find HIP runtime library matching pattern: {search_pattern}"
+        )
+    try:
+        libhip = ctypes.CDLL(matching_files[0])
+    except OSError:
+        raise RuntimeError(f"Could not load HIP runtime library from {path}")
+    # Setup function signatures
+    hipError_t = c_int
+    hipDeviceProp_t = ctypes.c_char * 2048
+    libhip.hipGetDeviceCount.restype = hipError_t
+    libhip.hipGetDeviceCount.argtypes = [POINTER(c_int)]
+    libhip.hipGetDeviceProperties.restype = hipError_t
+    libhip.hipGetDeviceProperties.argtypes = [POINTER(hipDeviceProp_t), c_int]
+    libhip.hipGetErrorString.restype = ctypes.c_char_p
+    libhip.hipGetErrorString.argtypes = [hipError_t]
+    # Get device count
+    device_count = c_int()
+    err = libhip.hipGetDeviceCount(ctypes.byref(device_count))
+    if err != 0:
+        logging.error(
+            "hipGetDeviceCount failed:", libhip.hipGetErrorString(err).decode()
+        )
+        return []
+    # Get device properties
+    devices = []
+    for i in range(device_count.value):
+        prop = hipDeviceProp_t()
+        err = libhip.hipGetDeviceProperties(ctypes.byref(prop), i)
+        if err != 0:
+            logging.error(
+                f"hipGetDeviceProperties failed for device {i}:",
+                libhip.hipGetErrorString(err).decode(),
+            )
+            continue
+        # Extract device name from HIP device properties
+        device_name = ctypes.string_at(prop, 256).decode("utf-8").rstrip("\x00")
+        devices.append([i, device_name])
+    return devices

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/management_tools.py RENAMED Viewed

@@ -109,7 +109,7 @@ class Cache(ManagementTool):
     # pylint: disable=pointless-statement,f-string-without-interpolation
     f"""
     A set of functions for managing the lemonade build cache. The default
-    cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
+    cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
     selected with
     the global --cache-dir option or the LEMONADE_CACHE_DIR environment variable.

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/serve.py RENAMED Viewed

@@ -72,6 +72,7 @@ if platform.system() == "Windows":
 DEFAULT_PORT = 8000
+DEFAULT_HOST = "localhost"
 DEFAULT_LOG_LEVEL = "info"
 DEFAULT_LLAMACPP_BACKEND = "vulkan"
 DEFAULT_CTX_SIZE = 4096
@@ -150,6 +151,7 @@ class Server:
     def __init__(
         self,
         port: int = DEFAULT_PORT,
+        host: str = DEFAULT_HOST,
         log_level: str = DEFAULT_LOG_LEVEL,
         ctx_size: int = DEFAULT_CTX_SIZE,
         tray: bool = False,
@@ -160,6 +162,7 @@ class Server:
         # Save args as members
         self.port = port
+        self.host = host
         self.log_level = log_level
         self.ctx_size = ctx_size
         self.tray = tray
@@ -332,6 +335,9 @@ class Server:
         # Let the app know what port it's running on, so
         # that the lifespan can access it
         self.app.port = self.port
+        # FastAPI already has a `host` function and we cannot use `_host` as
+        # PyLint will believe its private
+        self.app.host_ = self.host
     def run(self):
         # Common setup
@@ -340,9 +346,7 @@ class Server:
             tray=self.tray,
         )
-        uvicorn.run(
-            self.app, host="localhost", port=self.port, log_level=self.log_level
-        )
+        uvicorn.run(self.app, host=self.host, port=self.port, log_level=self.log_level)
     def run_in_thread(self, host: str = "localhost"):
         """

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/webapp.html RENAMED Viewed

@@ -369,7 +369,8 @@
     // Helper to get server base URL
     function getServerBaseUrl() {
         const port = window.SERVER_PORT || 8000;
-        return `http://localhost:${port}`;
+        const host = window.location.hostname || 'localhost';
+        return `http://${host}:${port}`;
     }
     // Check if current model supports vision

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/tray.py RENAMED Viewed

@@ -427,7 +427,7 @@ class LemonadeTray(SystemTray):
         Start the uvicorn server.
         """
         self.server = self.server_factory()
-        self.server.uvicorn_server = self.server.run_in_thread()
+        self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
         self.server.uvicorn_server.run()
     def run(self):

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/port.py RENAMED Viewed

@@ -43,7 +43,7 @@ async def lifespan(app: FastAPI):
             "\n"
             "\n"
             "🍋  Lemonade Server Ready!\n"
-            f"🍋    Open http://localhost:{app.port} in your browser for:\n"
+            f"🍋    Open http://{app.host_}:{app.port} in your browser for:\n"
             "🍋      💬 chat\n"
             "🍋      💻 model management\n"
             "🍋      📄 docs\n"
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
             "\n"
             "\n"
             "[Lemonade]  Lemonade Server Ready!\n"
-            f"[Lemonade]    Open http://localhost:{app.port} in your browser for:\n"
+            f"[Lemonade]    Open http://{app.host_}:{app.port} in your browser for:\n"
             "[Lemonade]      chat\n"
             "[Lemonade]      model management\n"
             "[Lemonade]      docs\n"

lemonade_sdk-8.1.2/src/lemonade/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "8.1.2"

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2/src/lemonade_sdk.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.1.1
+Version: 8.1.2
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.13

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/entry_points.txt RENAMED Viewed

@@ -2,3 +2,4 @@
 lemonade = lemonade:lemonadecli
 lemonade-install = lemonade_install:installcli
 lemonade-server-dev = lemonade_server.cli:main
+lsdev = lemonade_server.cli:developer_entrypoint

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/cli.py RENAMED Viewed

@@ -47,6 +47,7 @@ class ModelLoadError(Exception):
 def serve(
     port: int = None,
+    host: str = "localhost",
     log_level: str = None,
     tray: bool = False,
     use_thread: bool = False,
@@ -79,6 +80,7 @@ def serve(
     # Start the server
     server = Server(
         port=port,
+        host=host,
         log_level=log_level,
         ctx_size=ctx_size,
         tray=tray,
@@ -259,7 +261,9 @@ def delete(model_names: List[str]):
 def run(
     model_name: str,
     port: int = None,
+    host: str = "localhost",
     log_level: str = None,
+    tray: bool = False,
     llamacpp_backend: str = None,
     ctx_size: int = None,
 ):
@@ -275,8 +279,9 @@ def run(
     if not server_previously_running:
         port, server_thread = serve(
             port=port,
+            host=host,
             log_level=log_level,
-            tray=True,
+            tray=tray,
             use_thread=True,
             llamacpp_backend=llamacpp_backend,
             ctx_size=ctx_size,
@@ -291,7 +296,7 @@ def run(
     load(model_name, port)
     # Open the webapp with the specified model
-    url = f"http://localhost:{port}/?model={model_name}#llm-chat"
+    url = f"http://{host}:{port}/?model={model_name}#llm-chat"
     print(f"You can now chat with {model_name} at {url}")
     webbrowser.open(url)
@@ -440,9 +445,36 @@ def list_models():
     print(tabulate(table_data, headers=headers, tablefmt="simple"))
+def developer_entrypoint():
+    """
+    Developer entry point that starts the server with debug logging
+    Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
+    This function automatically prepends "serve --log-level debug" to any arguments
+    passed to the lsdev command.
+    """
+    # Save original sys.argv
+    original_argv = sys.argv.copy()
+    try:
+        # Take any additional arguments passed to lsdev and append them
+        # after "serve --log-level debug"
+        additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
+        # Set sys.argv to simulate "serve --log-level debug" + additional args
+        sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
+        main()
+    finally:
+        # Restore original sys.argv
+        sys.argv = original_argv
 def _add_server_arguments(parser):
     """Add common server arguments to a parser"""
     parser.add_argument("--port", type=int, help="Port number to serve on")
+    parser.add_argument(
+        "--host", type=str, help="Address to bind for connections", default="localhost"
+    )
     parser.add_argument(
         "--log-level",
         type=str,
@@ -578,6 +610,7 @@ def main():
             sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
         serve(
             port=args.port,
+            host=args.host,
             log_level=args.log_level,
             tray=not args.no_tray,
             llamacpp_backend=args.llamacpp,
@@ -603,7 +636,9 @@ def main():
         run(
             args.model,
             port=args.port,
+            host=args.host,
             log_level=args.log_level,
+            tray=not args.no_tray,
             llamacpp_backend=args.llamacpp,
             ctx_size=args.ctx_size,
         )

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/model_manager.py RENAMED Viewed

@@ -43,7 +43,7 @@ class ModelManager:
                 if "reasoning" in model_info:
                     model_info["labels"] = (
                         ["reasoning"]
-                        if not model_info["labels"]
+                        if not model_info.get("labels", None)
                         else model_info["labels"] + ["reasoning"]
                     )
                     del model_info["reasoning"]

{lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/server_models.json RENAMED Viewed

@@ -114,6 +114,51 @@
         "recipe": "oga-npu",
         "suggested": true
     },
+    "DeepSeek-R1-Distill-Llama-8B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": true
+    },
+    "DeepSeek-R1-Distill-Qwen-7B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
+    "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
+    "Llama-3.2-3B-Instruct-NPU": {
+        "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
+    "Llama-3.2-1B-Instruct-NPU": {
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
+    "Mistral-7B-v0.3-Instruct-NPU": {
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": true
+    },
+    "Phi-3.5-Mini-Instruct-NPU": {
+        "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": true
+    },
+    "ChatGLM-3-6b-Instruct-NPU": {
+        "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
+    "AMD-OLMo-1B-Instruct-NPU": {
+        "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "recipe": "oga-npu",
+        "suggested": false
+    },
     "Llama-3.2-1B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",