PyPI - lemonade-sdk - Versions diffs - 9.1.1__py3-none-any.whl - Mend

lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

lemonade/__init__.py +5 -0
lemonade/api.py +180 -0
lemonade/cache.py +92 -0
lemonade/cli.py +173 -0
lemonade/common/__init__.py +0 -0
lemonade/common/build.py +176 -0
lemonade/common/cli_helpers.py +139 -0
lemonade/common/exceptions.py +98 -0
lemonade/common/filesystem.py +368 -0
lemonade/common/inference_engines.py +408 -0
lemonade/common/network.py +93 -0
lemonade/common/printing.py +110 -0
lemonade/common/status.py +471 -0
lemonade/common/system_info.py +1411 -0
lemonade/common/test_helpers.py +28 -0
lemonade/profilers/__init__.py +1 -0
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/profilers/memory_tracker.py +259 -0
lemonade/profilers/profiler.py +58 -0
lemonade/sequence.py +363 -0
lemonade/state.py +159 -0
lemonade/tools/__init__.py +1 -0
lemonade/tools/accuracy.py +432 -0
lemonade/tools/adapter.py +114 -0
lemonade/tools/bench.py +302 -0
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +305 -0
lemonade/tools/huggingface/bench.py +187 -0
lemonade/tools/huggingface/load.py +235 -0
lemonade/tools/huggingface/utils.py +359 -0
lemonade/tools/humaneval.py +264 -0
lemonade/tools/llamacpp/bench.py +255 -0
lemonade/tools/llamacpp/load.py +222 -0
lemonade/tools/llamacpp/utils.py +1260 -0
lemonade/tools/management_tools.py +319 -0
lemonade/tools/mmlu.py +319 -0
lemonade/tools/oga/__init__.py +0 -0
lemonade/tools/oga/bench.py +120 -0
lemonade/tools/oga/load.py +804 -0
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/oga/utils.py +462 -0
lemonade/tools/perplexity.py +147 -0
lemonade/tools/prompt.py +263 -0
lemonade/tools/report/__init__.py +0 -0
lemonade/tools/report/llm_report.py +203 -0
lemonade/tools/report/table.py +899 -0
lemonade/tools/server/__init__.py +0 -0
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +320 -0
lemonade/tools/server/serve.py +2123 -0
lemonade/tools/server/static/favicon.ico +0 -0
lemonade/tools/server/static/index.html +279 -0
lemonade/tools/server/static/js/chat.js +1059 -0
lemonade/tools/server/static/js/model-settings.js +183 -0
lemonade/tools/server/static/js/models.js +1395 -0
lemonade/tools/server/static/js/shared.js +556 -0
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +2654 -0
lemonade/tools/server/static/webapp.html +321 -0
lemonade/tools/server/tool_calls.py +153 -0
lemonade/tools/server/tray.py +664 -0
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/port.py +77 -0
lemonade/tools/server/utils/thread.py +85 -0
lemonade/tools/server/utils/windows_tray.py +408 -0
lemonade/tools/server/webapp.py +34 -0
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/tools/tool.py +374 -0
lemonade/version.py +1 -0
lemonade_install/__init__.py +1 -0
lemonade_install/install.py +239 -0
lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
lemonade_server/cli.py +805 -0
lemonade_server/model_manager.py +758 -0
lemonade_server/pydantic_models.py +159 -0
lemonade_server/server_models.json +643 -0
lemonade_server/settings.py +39 -0

lemonade/common/inference_engines.py ADDED Viewed

@@ -0,0 +1,408 @@
+import os
+import sys
+import importlib.util
+import importlib.metadata
+import subprocess
+from abc import ABC, abstractmethod
+from typing import Dict, Optional
+class InferenceEngineDetector:
+    """
+    Main class for detecting inference engine availability.
+    """
+    def __init__(self):
+        self.oga_detector = OGADetector()
+        self.llamacpp_detector = LlamaCppDetector()
+        self.transformers_detector = TransformersDetector()
+    def detect_engines_for_device(
+        self, device_type: str, device_name: str
+    ) -> Dict[str, Dict]:
+        """
+        Detect all available inference engines for a specific device type.
+        Args:
+            device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
+        Returns:
+            dict: Engine availability information
+        """
+        engines = {}
+        # Detect OGA availability
+        oga_info = self.oga_detector.detect_for_device(device_type)
+        if oga_info:
+            engines["oga"] = oga_info
+        # Detect llama.cpp vulkan availability
+        llamacpp_info = self.llamacpp_detector.detect_for_device(
+            device_type, device_name, "vulkan"
+        )
+        if llamacpp_info:
+            engines["llamacpp-vulkan"] = llamacpp_info
+        # Detect llama.cpp rocm availability
+        llamacpp_info = self.llamacpp_detector.detect_for_device(
+            device_type, device_name, "rocm"
+        )
+        if llamacpp_info:
+            engines["llamacpp-rocm"] = llamacpp_info
+        # Detect Transformers availability
+        transformers_info = self.transformers_detector.detect_for_device(device_type)
+        if transformers_info:
+            engines["transformers"] = transformers_info
+        return engines
+class BaseEngineDetector(ABC):
+    """
+    Base class for engine-specific detectors.
+    """
+    @abstractmethod
+    def detect_for_device(self, device_type: str) -> Optional[Dict]:
+        """
+        Detect engine availability for specific device type.
+        """
+    @abstractmethod
+    def is_installed(self) -> bool:
+        """
+        Check if the engine package/binary is installed.
+        """
+class OGADetector(BaseEngineDetector):
+    """
+    Detector for ONNX Runtime GenAI (OGA).
+    """
+    def detect_for_device(self, device_type: str) -> Optional[Dict]:
+        """
+        Detect OGA availability for specific device.
+        """
+        # Check package installation based on device type
+        if device_type == "npu":
+            if not self.is_npu_package_installed():
+                return {
+                    "available": False,
+                    "error": "NPU packages not installed (need "
+                    "onnxruntime-genai-directml-ryzenai or onnxruntime-vitisai)",
+                }
+        else:
+            # For other devices, check general OGA installation
+            if not self.is_installed():
+                return None
+        try:
+            import onnxruntime as ort
+            # Map device types to ORT providers
+            device_provider_map = {
+                "cpu": "cpu",
+                "amd_igpu": "dml",
+                "amd_dgpu": "dml",
+                "npu": "vitisai",
+            }
+            if device_type not in device_provider_map:
+                return None
+            backend = device_provider_map[device_type]
+            # Map backends to ORT provider names
+            provider_map = {
+                "cpu": "CPUExecutionProvider",
+                "dml": "DmlExecutionProvider",
+                "vitisai": "VitisAIExecutionProvider",
+            }
+            required_provider = provider_map[backend]
+            available_providers = ort.get_available_providers()
+            if required_provider in available_providers:
+                result = {
+                    "available": True,
+                    "version": self._get_oga_version(device_type),
+                    "backend": backend,
+                }
+                # Add dependency versions in details
+                result["details"] = {
+                    "dependency_versions": {"onnxruntime": ort.__version__}
+                }
+                return result
+            else:
+                if device_type == "npu":
+                    error_msg = (
+                        "VitisAI provider not available - "
+                        "check AMD NPU driver installation"
+                    )
+                else:
+                    error_msg = f"{backend.upper()} provider not available"
+                return {
+                    "available": False,
+                    "error": error_msg,
+                }
+        except (ImportError, AttributeError) as e:
+            return {"available": False, "error": f"OGA detection failed: {str(e)}"}
+    def is_installed(self) -> bool:
+        """
+        Check if OGA is installed.
+        """
+        return importlib.util.find_spec("onnxruntime_genai") is not None
+    def is_npu_package_installed(self) -> bool:
+        """
+        Check if NPU-specific OGA packages are installed.
+        """
+        try:
+            installed_packages = [
+                dist.metadata["name"].lower()
+                for dist in importlib.metadata.distributions()
+            ]
+            # Check for NPU-specific packages
+            npu_packages = ["onnxruntime-genai-directml-ryzenai", "onnxruntime-vitisai"]
+            for package in npu_packages:
+                if package.lower() in installed_packages:
+                    return True
+            return False
+        except (ImportError, AttributeError):
+            return False
+    def _get_oga_version(self, device_type: str) -> str:
+        """
+        Get OGA version.
+        """
+        try:
+            # For NPU, try NPU-specific packages first
+            if device_type == "npu":
+                try:
+                    import onnxruntime_genai_directml_ryzenai as og
+                    return og.__version__
+                except ImportError:
+                    pass
+                try:
+                    import onnxruntime_vitisai as og
+                    return og.__version__
+                except ImportError:
+                    pass
+            # Fall back to general onnxruntime_genai
+            import onnxruntime_genai as og
+            return og.__version__
+        except (ImportError, AttributeError):
+            return "unknown"
+class LlamaCppDetector(BaseEngineDetector):
+    """
+    Detector for llama.cpp.
+    """
+    def detect_for_device(
+        self, device_type: str, device_name: str, backend: str
+    ) -> Optional[Dict]:
+        """
+        Detect llama.cpp availability for specific device.
+        """
+        try:
+            if device_type not in ["cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu"]:
+                return None
+            # Check if the device is supported by the backend
+            if device_type == "cpu":
+                device_supported = True
+            elif device_type in ["amd_igpu", "amd_dgpu"]:
+                if backend == "vulkan":
+                    device_supported = self._check_vulkan_support()
+                elif backend == "rocm":
+                    device_supported = self._check_rocm_support(device_name.lower())
+                else:
+                    device_supported = False
+            elif device_type == "nvidia_dgpu":
+                if backend == "vulkan":
+                    device_supported = self._check_vulkan_support()
+                else:
+                    device_supported = False
+            else:
+                device_supported = False
+            if not device_supported:
+                return {"available": False, "error": f"{backend} not available"}
+            is_installed = self.is_installed(backend)
+            if not is_installed:
+                return {
+                    "available": False,
+                    "error": f"{backend} binaries not installed",
+                }
+            return {
+                "available": True,
+                "version": self._get_llamacpp_version(backend),
+                "backend": backend,
+            }
+        except (ImportError, OSError, subprocess.SubprocessError) as e:
+            return {
+                "available": False,
+                "error": f"llama.cpp detection failed: {str(e)}",
+            }
+    def is_installed(self, backend: str) -> bool:
+        """
+        Check if llama.cpp binaries are available for any backend.
+        """
+        from lemonade.tools.llamacpp.utils import get_llama_server_exe_path
+        try:
+            server_exe_path = get_llama_server_exe_path(backend)
+            if os.path.exists(server_exe_path):
+                return True
+        except (ImportError, OSError, ValueError):
+            pass
+        return False
+    def _check_vulkan_support(self) -> bool:
+        """
+        Check if Vulkan is available for GPU acceleration.
+        """
+        try:
+            # Run vulkaninfo to check Vulkan availability
+            result = subprocess.run(
+                ["vulkaninfo", "--summary"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+                check=False,
+            )
+            return result.returncode == 0
+        except (
+            subprocess.TimeoutExpired,
+            FileNotFoundError,
+            subprocess.SubprocessError,
+        ):
+            try:
+                # Check for Vulkan DLL on Windows
+                vulkan_dll_paths = [
+                    "C:\\Windows\\System32\\vulkan-1.dll",
+                    "C:\\Windows\\SysWOW64\\vulkan-1.dll",
+                ]
+                # Check for Vulkan libraries on Linux
+                vulkan_lib_paths = [
+                    "/usr/lib/x86_64-linux-gnu/libvulkan.so.1",
+                    "/usr/lib/libvulkan.so.1",
+                    "/lib/x86_64-linux-gnu/libvulkan.so.1",
+                ]
+                return any(os.path.exists(path) for path in vulkan_dll_paths) or any(
+                    os.path.exists(path) for path in vulkan_lib_paths
+                )
+            except OSError:
+                return False
+    def _check_rocm_support(self, device_name: str) -> bool:
+        """
+        Check if ROCM is available for GPU acceleration.
+        """
+        from lemonade.tools.llamacpp.utils import identify_rocm_arch_from_name
+        return identify_rocm_arch_from_name(device_name) is not None
+    def _get_llamacpp_version(self, backend: str) -> str:
+        """
+        Get llama.cpp version from lemonade's managed installation for specific backend.
+        """
+        try:
+            # Use backend-specific path - same logic as get_llama_folder_path in utils.py
+            # Uses sys.prefix to get the environment root (works for both venv and conda)
+            server_base_dir = os.path.join(sys.prefix, backend, "llama_server")
+            version_file = os.path.join(server_base_dir, "version.txt")
+            if os.path.exists(version_file):
+                with open(version_file, "r", encoding="utf-8") as f:
+                    version = f.read().strip()
+                    return version
+        except (ImportError, OSError):
+            pass
+        return "unknown"
+class TransformersDetector(BaseEngineDetector):
+    """
+    Detector for Transformers/PyTorch.
+    """
+    def detect_for_device(self, device_type: str) -> Optional[Dict]:
+        """
+        Detect Transformers availability for specific device.
+        """
+        if not self.is_installed():
+            return None
+        try:
+            import torch
+            import transformers
+            if device_type == "cpu":
+                result = {
+                    "available": True,
+                    "version": transformers.__version__,
+                    "backend": "cpu",
+                }
+                # Add dependency versions in details
+                result["details"] = {
+                    "dependency_versions": {"torch": torch.__version__}
+                }
+                return result
+            else:
+                return None
+        except (ImportError, AttributeError) as e:
+            return {
+                "available": False,
+                "error": f"Transformers detection failed: {str(e)}",
+            }
+    def is_installed(self) -> bool:
+        """
+        Check if Transformers and PyTorch are installed.
+        """
+        return (
+            importlib.util.find_spec("transformers") is not None
+            and importlib.util.find_spec("torch") is not None
+        )
+def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Dict]:
+    """
+    Helper function to detect inference engines for a device type.
+    Args:
+        device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
+        device_name: device name
+    Returns:
+        dict: Engine availability information.
+    """
+    detector = InferenceEngineDetector()
+    return detector.detect_engines_for_device(device_type, device_name)

lemonade/common/network.py ADDED Viewed

@@ -0,0 +1,93 @@
+import os
+from typing import Optional
+import socket
+from huggingface_hub import model_info, snapshot_download
+from huggingface_hub.errors import LocalEntryNotFoundError
+def is_offline():
+    """
+    Check if the system is offline by attempting to connect to huggingface.co.
+    Returns:
+        bool: True if the system is offline (cannot connect to huggingface.co),
+              False otherwise.
+    """
+    if os.environ.get("LEMONADE_OFFLINE"):
+        return True
+    try:
+        socket.gethostbyname("huggingface.co")
+        return False
+    except socket.gaierror:
+        return True
+def get_base_model(checkpoint: str) -> Optional[str]:
+    """
+    Get the base model information for a given checkpoint from the Hugging Face Hub.
+    Will auto-detect if we're offline and skip the network call in that case.
+    Args:
+        checkpoint: The model checkpoint to query
+    Returns:
+        The base model name if found, or None if not found or error occurs
+    """
+    # Skip network call in offline mode
+    if is_offline():
+        return None
+    try:
+        info = model_info(checkpoint)
+        if info.cardData and "base_model" in info.cardData:
+            if info.cardData["base_model"] is not None:
+                # This is a derived model
+                return info.cardData["base_model"]
+            else:
+                # This is itself a base model
+                return [checkpoint]
+    except Exception:  # pylint: disable=broad-except
+        pass
+    return None
+def _symlink_safe_snapshot_download(repo_id, **kwargs):
+    """
+    Custom snapshot download with retry logic for Windows symlink privilege errors.
+    """
+    for attempt in range(2):
+        try:
+            return snapshot_download(repo_id=repo_id, **kwargs)
+        except OSError as e:
+            if (
+                hasattr(e, "winerror")
+                and e.winerror == 1314  # pylint: disable=no-member
+                and attempt < 1
+            ):
+                continue
+            raise
+def custom_snapshot_download(repo_id, do_not_upgrade=False, **kwargs):
+    """
+    Custom snapshot download with:
+        1) retry logic for Windows symlink privilege errors.
+        2) do_not_upgrade allows the caller to prioritize a local copy
+            of the model over an upgraded remote copy.
+    """
+    if do_not_upgrade:
+        try:
+            # Prioritize the local model, if available
+            return _symlink_safe_snapshot_download(
+                repo_id, local_files_only=True, **kwargs
+            )
+        except LocalEntryNotFoundError:
+            # LocalEntryNotFoundError means there was no local model, at this point
+            # we'll accept a remote model
+            return _symlink_safe_snapshot_download(
+                repo_id, local_files_only=False, **kwargs
+            )
+    else:
+        return _symlink_safe_snapshot_download(repo_id, **kwargs)

lemonade/common/printing.py ADDED Viewed

@@ -0,0 +1,110 @@
+import os
+import re
+import enum
+import sys
+import math
+class Colors:
+    HEADER = "\033[95m"
+    OKBLUE = "\033[94m"
+    OKCYAN = "\033[96m"
+    OKGREEN = "\033[92m"
+    WARNING = "\033[93m"
+    FAIL = "\033[91m"
+    ENDC = "\033[0m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+def log(txt, c=Colors.ENDC, end="", is_error=False):
+    logn(txt, c=c, end=end, is_error=is_error)
+def logn(txt, c=Colors.ENDC, end="\n", is_error=False):
+    file = sys.stderr if is_error else sys.stdout
+    print(c + txt + Colors.ENDC, end=end, flush=True, file=file)
+class LogType(enum.Enum):
+    ERROR = "Error:"
+    SUCCESS = "Woohoo!"
+    WARNING = "Warning:"
+    INFO = "Info:"
+def clean_print(type: LogType, msg):
+    # Replace path to user’s home directory by a tilde symbol (~)
+    home_directory = os.path.expanduser("~")
+    home_directory_escaped = re.escape(home_directory)
+    msg = re.sub(home_directory_escaped, "~", msg)
+    # Split message into list, remove leading spaces and line breaks
+    msg = msg.split("\n")
+    msg = [line.lstrip() for line in msg]
+    while msg[0] == "" and len(msg) > 1:
+        msg.pop(0)
+    # Print message
+    indentation = len(type.value) + 1
+    if type == LogType.ERROR:
+        log(f"\n{type.value} ".rjust(indentation), c=Colors.FAIL, is_error=True)
+    elif type == LogType.SUCCESS:
+        log(f"\n{type.value} ".rjust(indentation), c=Colors.OKGREEN)
+    elif type == LogType.WARNING:
+        log(f"\n{type.value} ".rjust(indentation), c=Colors.WARNING)
+    elif type == LogType.INFO:
+        log(f"\n{type.value} ".rjust(indentation), c=Colors.OKCYAN)
+    is_error = type == LogType.ERROR
+    for line_idx, line in enumerate(msg):
+        if line_idx != 0:
+            log(" " * indentation)
+        s_line = line.split("**")
+        for idx, l in enumerate(s_line):
+            c = Colors.ENDC if idx % 2 == 0 else Colors.BOLD
+            if idx != len(s_line) - 1:
+                log(l, c=c, is_error=is_error)
+            else:
+                logn(l, c=c, is_error=is_error)
+def log_error(msg):
+    clean_print(LogType.ERROR, str(msg))
+    # ASCII art credit:
+    # https://textart4u.blogspot.com/2014/05/the-fail-whale-ascii-art-code.html
+    logn(
+        """\n▄██████████████▄▐█▄▄▄▄█▌
+██████▌▄▌▄▐▐▌███▌▀▀██▀▀
+████▄█▌▄▌▄▐▐▌▀███▄▄█▌
+▄▄▄▄▄██████████████\n\n""",
+        is_error=True,
+    )
+def log_success(msg):
+    clean_print(LogType.SUCCESS, msg)
+def log_warning(msg):
+    clean_print(LogType.WARNING, msg)
+def log_info(msg):
+    clean_print(LogType.INFO, msg)
+def list_table(list, padding=25, num_cols=4):
+    lines_per_column = int(math.ceil(len(list) / num_cols))
+    for i in range(lines_per_column):
+        for col in range(num_cols):
+            if i + col * lines_per_column < len(list):
+                print(
+                    list[i + col * lines_per_column].ljust(padding),
+                    end="",
+                )
+        print("\n\t", end="")
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD