PyPI - lemonade-sdk - Versions diffs - 8.1.12__tar.gz → 8.2.0__tar.gz - Mend

lemonade-sdk 8.1.12tar.gz → 8.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (91) hide show

{lemonade_sdk-8.1.12/src/lemonade_sdk.egg-info → lemonade_sdk-8.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.1.12
+Version: 8.2.0
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.14
@@ -29,12 +29,13 @@ Requires-Dist: tabulate
 Requires-Dist: sentencepiece
 Requires-Dist: huggingface-hub[hf_xet]==0.33.0
 Requires-Dist: python-dotenv
+Requires-Dist: python-multipart
 Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
 Provides-Extra: oga-ryzenai
-Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
+Requires-Dist: onnxruntime-genai-directml-ryzenai==0.9.2; extra == "oga-ryzenai"
 Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
 Provides-Extra: oga-cpu
-Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
+Requires-Dist: onnxruntime-genai==0.9.2; extra == "oga-cpu"
 Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
 Provides-Extra: dev
 Requires-Dist: torch>=2.6.0; extra == "dev"

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/setup.py RENAMED Viewed

@@ -49,6 +49,7 @@ setup(
         "sentencepiece",
         "huggingface-hub[hf_xet]==0.33.0",
         "python-dotenv",
+        "python-multipart",
         # macOS-specific dependencies
         "rumps>=0.4.0; sys_platform == 'darwin'",
     ],
@@ -57,11 +58,11 @@ setup(
         # applications, without including developer-focused tools
         # Primary NPU extra using unified PyPI package
         "oga-ryzenai": [
-            "onnxruntime-genai-directml-ryzenai==0.7.0.2.1",
+            "onnxruntime-genai-directml-ryzenai==0.9.2",
             "protobuf>=6.30.1",
         ],
         "oga-cpu": [
-            "onnxruntime-genai==0.8.2",
+            "onnxruntime-genai==0.9.2",
             "onnxruntime >=1.22.0",
         ],
         # Developer-focused tools for benchmarking, accuracy testing, and

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/system_info.py RENAMED Viewed

@@ -1110,32 +1110,6 @@ class LinuxSystemInfo(SystemInfo):
         return ""
-    def _get_nvidia_vram_smi_linux(self) -> float:
-        """
-        Get NVIDIA GPU VRAM on Linux using nvidia-smi command.
-        Returns:
-            float: VRAM in GB, or 0.0 if detection fails
-        """
-        try:
-            output = (
-                subprocess.check_output(
-                    "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
-                    shell=True,
-                    stderr=subprocess.DEVNULL,
-                )
-                .decode()
-                .strip()
-            )
-            # nvidia-smi returns memory in MB
-            vram_mb = int(output.split("\n")[0])
-            vram_gb = round(vram_mb / 1024, 1)
-            return vram_gb
-        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
-            pass
-        return 0.0
     @staticmethod
     def get_processor_name() -> str:
         """

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/flm/utils.py RENAMED Viewed

@@ -10,16 +10,46 @@ import time
 from typing import List, Optional
 import requests
-from packaging.version import Version
+from packaging.version import Version, InvalidVersion
-FLM_MINIMUM_VERSION = "0.9.12"
+def get_flm_latest_version() -> Optional[str]:
+    """
+    Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
+    This uses the GitHub tags API.
+    """
+    url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        tags = response.json()
+        if not tags:
+            return None
+        # Tags are sorted in reverse chronological order; find the first that looks like a version
+        for tag in tags:
+            tag_name = tag.get("name", "")
+            # Accept tags of the form v0.9.10, 0.9.10, etc.
+            if tag_name.startswith("v"):
+                version_candidate = tag_name[1:]
+            else:
+                version_candidate = tag_name
+            try:
+                # validate it's a version string
+                _ = Version(version_candidate)
+                return version_candidate
+            except InvalidVersion:
+                continue
+        return None
+    except requests.exceptions.RequestException as e:
+        logging.debug("Error retrieving latest FLM version: %s", e)
+        return None
 def check_flm_version() -> Optional[str]:
     """
     Check if FLM is installed and return version, or None if not available.
     """
+    latest_version_str = get_flm_latest_version()
     try:
         result = subprocess.run(
             ["flm", "version"],
@@ -34,11 +64,11 @@ def check_flm_version() -> Optional[str]:
         output = result.stdout.strip()
         if output.startswith("FLM v"):
             version_str = output[5:]  # Remove "FLM v" prefix
-            return version_str
-        return None
+            return version_str, latest_version_str
+        return None, latest_version_str
     except (subprocess.CalledProcessError, FileNotFoundError):
-        return None
+        return None, latest_version_str
 def refresh_environment():
@@ -76,31 +106,42 @@ def install_flm():
     If not, download and run the GUI installer, then wait for completion.
     """
     # Check current FLM installation
-    current_version = check_flm_version()
+    current_version, latest_version = check_flm_version()
-    if current_version and Version(current_version) >= Version(FLM_MINIMUM_VERSION):
+    if (
+        current_version
+        and latest_version
+        and Version(current_version) == Version(latest_version)
+    ):
         logging.info(
-            "FLM v%s is already installed and meets minimum version requirement (v%s)",
+            "FLM v%s is already installed and is up to date (latest version: v%s).",
             current_version,
-            FLM_MINIMUM_VERSION,
+            latest_version,
         )
         return
     if current_version:
+        if not latest_version:
+            logging.info(
+                "Unable to detect the latest FLM version; continuing with installed FLM v%s.",
+                current_version,
+            )
+            return
         logging.info(
-            "FLM v%s is installed but below minimum version v%s. Upgrading...",
+            "FLM v%s is installed but below latest version v%s. Upgrading...",
             current_version,
-            FLM_MINIMUM_VERSION,
+            latest_version,
         )
+        verysilent = True
     else:
-        logging.info(
-            "FLM not found. Installing FLM v%s or later...", FLM_MINIMUM_VERSION
-        )
+        logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
+        verysilent = False
     # Download the installer
     # pylint: disable=line-too-long
     installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
     installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
+    installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
     try:
         # Remove existing installer if present
@@ -123,13 +164,15 @@ def install_flm():
         # Launch the installer GUI
         logging.warning(
             "Launching FLM installer GUI. Please complete the installation..."
+            if not verysilent
+            else "Installing FLM..."
         )
         # Launch installer and wait for it to complete
         if os.name == "nt":  # Windows
-            process = subprocess.Popen([installer_path], shell=True)
+            process = subprocess.Popen(installer_args, shell=True)
         else:
-            process = subprocess.Popen([installer_path])
+            process = subprocess.Popen(installer_args)
         # Wait for installer to complete
         process.wait()
@@ -150,8 +193,8 @@ def install_flm():
         # Verify installation
         max_retries = 10
         for attempt in range(max_retries):
-            new_version = check_flm_version()
-            if new_version and Version(new_version) >= Version(FLM_MINIMUM_VERSION):
+            new_version, latest_version = check_flm_version()
+            if new_version and Version(new_version) == Version(latest_version):
                 logging.info("FLM v%s successfully installed and verified", new_version)
                 return
@@ -240,7 +283,12 @@ def get_flm_installed_models() -> List[str]:
         return installed_checkpoints
-    except (subprocess.CalledProcessError, FileNotFoundError, AttributeError):
+    except (
+        subprocess.CalledProcessError,
+        FileNotFoundError,
+        AttributeError,
+        NotADirectoryError,
+    ):
         # FLM not installed, not available, or output parsing failed
         return []
@@ -249,7 +297,7 @@ def is_flm_available() -> bool:
     """
     Check if FLM is available and meets minimum version requirements.
     """
-    current_version = check_flm_version()
-    return current_version is not None and Version(current_version) >= Version(
-        FLM_MINIMUM_VERSION
+    current_version, latest_version = check_flm_version()
+    return current_version is not None and Version(current_version) == Version(
+        latest_version
     )

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/load.py RENAMED Viewed

@@ -97,6 +97,7 @@ class LoadLlamaCpp(FirstTool):
             get_llama_installed_version,
             parse_checkpoint,
             download_gguf,
+            resolve_local_gguf_model,
             get_local_checkpoint_path,
             LlamaCppTokenizerAdapter,
             LlamaCppAdapter,
@@ -169,8 +170,16 @@ class LoadLlamaCpp(FirstTool):
                     )
             else:
+                # First, try to resolve from local cache to avoid unnecessary downloads
+                base_checkpoint, variant = parse_checkpoint(checkpoint)
+                snapshot_files = resolve_local_gguf_model(
+                    base_checkpoint, variant, None
+                )
+                # If not found locally, download from internet
+                if not snapshot_files:
+                    snapshot_files = download_gguf(checkpoint)
-                snapshot_files = download_gguf(checkpoint)
                 full_model_path = snapshot_files["variant"]
                 model_to_use = os.path.basename(full_model_path)

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/utils.py RENAMED Viewed

@@ -10,9 +10,7 @@ import requests
 import lemonade.common.build as build
 import lemonade.common.printing as printing
 from lemonade.tools.adapter import PassthroughTokenizer, ModelAdapter
 from lemonade.common.system_info import get_system_info
 from dotenv import set_key, load_dotenv
 LLAMA_VERSION_VULKAN = "b6510"
@@ -378,7 +376,7 @@ def install_llamacpp(backend):
                 import stat
                 # Find and make executable files executable
-                for root, dirs, files in os.walk(llama_server_exe_dir):
+                for root, _, files in os.walk(llama_server_exe_dir):
                     for file in files:
                         file_path = os.path.join(root, file)
                         # Make files in bin/ directories executable
@@ -656,15 +654,91 @@ def identify_gguf_models(
     return core_files, sharded_files
-def download_gguf(config_checkpoint, config_mmproj=None, do_not_upgrade=False) -> dict:
+def resolve_local_gguf_model(
+    checkpoint: str, variant: str, config_mmproj: str = None
+) -> dict | None:
     """
-    Downloads the GGUF file for the given model configuration.
+    Attempts to resolve a GGUF model from the local HuggingFace cache.
+    """
+    from huggingface_hub.constants import HF_HUB_CACHE
+    # Convert checkpoint to cache directory format
+    if checkpoint.startswith("models--"):
+        model_cache_dir = os.path.join(HF_HUB_CACHE, checkpoint)
+    else:
+        # This is a HuggingFace repo - convert to cache directory format
+        repo_cache_name = checkpoint.replace("/", "--")
+        model_cache_dir = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
+    # Check if the cache directory exists
+    if not os.path.exists(model_cache_dir):
+        return None
+    gguf_file_found = None
+    # If variant is specified, look for that specific file
+    if variant:
+        search_term = variant if variant.endswith(".gguf") else f"{variant}.gguf"
+        for root, _, files in os.walk(model_cache_dir):
+            if search_term in files:
+                gguf_file_found = os.path.join(root, search_term)
+                break
+    # If no variant or variant not found, find any .gguf file (excluding mmproj)
+    if not gguf_file_found:
+        for root, _, files in os.walk(model_cache_dir):
+            gguf_files = [
+                f for f in files if f.endswith(".gguf") and "mmproj" not in f.lower()
+            ]
+            if gguf_files:
+                gguf_file_found = os.path.join(root, gguf_files[0])
+                break
+    # If no GGUF file found, model is not in cache
+    if not gguf_file_found:
+        return None
+    # Build result dictionary
+    result = {"variant": gguf_file_found}
+    # Search for mmproj file if provided
+    if config_mmproj:
+        for root, _, files in os.walk(model_cache_dir):
+            if config_mmproj in files:
+                result["mmproj"] = os.path.join(root, config_mmproj)
+                break
+    logging.info(f"Resolved local GGUF model: {result}")
+    return result
-    For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
-    will be downloaded but only the first file will be returned for loading.
+def download_gguf(
+    config_checkpoint: str, config_mmproj=None, do_not_upgrade: bool = False
+) -> dict:
     """
+    Downloads the GGUF file for the given model configuration from HuggingFace.
+    This function downloads models from the internet. It does NOT check the local cache first.
+    Callers should use resolve_local_gguf_model() if they want to check for existing models first.
+    Args:
+        config_checkpoint: Checkpoint identifier (file path or HF repo with variant)
+        config_mmproj: Optional mmproj file to also download
+        do_not_upgrade: If True, use local cache only without attempting to download updates
-    # This code handles all cases by constructing the appropriate filename or pattern
+    Returns:
+        Dictionary with "variant" (and optionally "mmproj") file paths
+    """
+    # Handle direct file path case - if the checkpoint is an actual file on disk
+    if os.path.exists(config_checkpoint):
+        result = {"variant": config_checkpoint}
+        if config_mmproj:
+            result["mmproj"] = config_mmproj
+        return result
+    # Parse checkpoint to extract base and variant
+    # Checkpoint format: repo_name:variant (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0")
     checkpoint, variant = parse_checkpoint(config_checkpoint)
     # Identify the GGUF model files in the repository that match the variant

{lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/load.py RENAMED Viewed

@@ -4,7 +4,6 @@
 import argparse
 import subprocess
-import sys
 import os
 import json
 import webbrowser
@@ -38,6 +37,17 @@ execution_providers = {
 }
+def find_onnx_files_recursively(directory):
+    """
+    Recursively search for ONNX files in a directory and its subdirectories.
+    """
+    for _, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".onnx"):
+                return True
+    return False
 def _get_npu_driver_version():
     """
     Get the NPU driver version using PowerShell directly.
@@ -321,6 +331,7 @@ class OgaLoad(FirstTool):
     @staticmethod
     def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
+        # pylint: disable=unused-argument
         """
         Sets up model dependencies for hybrid and NPU inference by:
         1. Configuring the custom_ops_library path in genai_config.json.
@@ -328,116 +339,35 @@ class OgaLoad(FirstTool):
         3. Check NPU driver version if required for device and ryzenai_version.
         """
-        env_path = sys.prefix
+        # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
+        if device in ["npu", "hybrid"]:
+            required_driver_version = REQUIRED_NPU_DRIVER_VERSION
-        if "1.4.0" in ryzenai_version:
-            if device == "npu":
-                custom_ops_path = os.path.join(
-                    oga_path, "libs", "onnxruntime_vitis_ai_custom_ops.dll"
-                )
-            else:
-                custom_ops_path = os.path.join(oga_path, "libs", "onnx_custom_ops.dll")
-        else:
-            # For 1.5.0+, check NPU driver version for NPU and hybrid devices
-            if device in ["npu", "hybrid"]:
-                required_driver_version = REQUIRED_NPU_DRIVER_VERSION
-                current_driver_version = _get_npu_driver_version()
-                if not current_driver_version:
-                    printing.log_warning(
-                        f"NPU driver not found. {device.upper()} inference requires NPU driver "
-                        f"version {required_driver_version}.\n"
-                        "Please download and install the NPU Driver from:\n"
-                        f"{NPU_DRIVER_DOWNLOAD_URL}\n"
-                        "NPU functionality may not work properly."
-                    )
-                    _open_driver_install_page()
-                elif not _compare_driver_versions(
-                    current_driver_version, required_driver_version
-                ):
-                    printing.log_warning(
-                        f"Incorrect NPU driver version detected: {current_driver_version}\n"
-                        f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
-                        f"version {required_driver_version} or higher.\n"
-                        "Please download and install the correct NPU Driver from:\n"
-                        f"{NPU_DRIVER_DOWNLOAD_URL}\n"
-                        "NPU functionality may not work properly."
-                    )
-                    _open_driver_install_page()
-            if device == "npu":
-                # For 1.5.0, custom ops are in the conda environment's onnxruntime package
-                custom_ops_path = os.path.join(
-                    env_path,
-                    "Lib",
-                    "site-packages",
-                    "onnxruntime",
-                    "capi",
-                    "onnxruntime_vitis_ai_custom_ops.dll",
-                )
-                dll_source_path = os.path.join(
-                    env_path, "Lib", "site-packages", "onnxruntime", "capi"
-                )
-                required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
-            else:
-                custom_ops_path = os.path.join(
-                    env_path,
-                    "Lib",
-                    "site-packages",
-                    "onnxruntime_genai",
-                    "onnx_custom_ops.dll",
-                )
-                dll_source_path = os.path.join(
-                    env_path, "Lib", "site-packages", "onnxruntime_genai"
-                )
-                required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
-            # Validate that all required DLLs exist in the source directory
-            missing_dlls = []
-            if not os.path.exists(custom_ops_path):
-                missing_dlls.append(custom_ops_path)
-            for dll_name in required_dlls:
-                dll_source = os.path.join(dll_source_path, dll_name)
-                if not os.path.exists(dll_source):
-                    missing_dlls.append(dll_source)
-            if missing_dlls:
-                dll_list = "\n  - ".join(missing_dlls)
-                raise RuntimeError(
-                    f"Required DLLs not found for {device} inference:\n  - {dll_list}\n"
-                    f"Please ensure your RyzenAI installation is complete and supports {device}."
+            current_driver_version = _get_npu_driver_version()
+            rai_version, _ = _get_ryzenai_version_info(device)
+            if not current_driver_version:
+                printing.log_warning(
+                    f"NPU driver not found. {device.upper()} inference requires NPU driver "
+                    f"version {required_driver_version}.\n"
+                    "Please download and install the NPU Driver from:\n"
+                    f"{NPU_DRIVER_DOWNLOAD_URL}\n"
+                    "NPU functionality may not work properly."
                 )
+                _open_driver_install_page()
-            # Add the DLL source directory to PATH
-            current_path = os.environ.get("PATH", "")
-            if dll_source_path not in current_path:
-                os.environ["PATH"] = dll_source_path + os.pathsep + current_path
-        # Update the model config with custom_ops_library path
-        config_path = os.path.join(full_model_path, "genai_config.json")
-        if os.path.exists(config_path):
-            with open(config_path, "r", encoding="utf-8") as f:
-                config = json.load(f)
-            if (
-                "model" in config
-                and "decoder" in config["model"]
-                and "session_options" in config["model"]["decoder"]
+            elif not _compare_driver_versions(
+                current_driver_version, required_driver_version
             ):
-                config["model"]["decoder"]["session_options"][
-                    "custom_ops_library"
-                ] = custom_ops_path
-            with open(config_path, "w", encoding="utf-8") as f:
-                json.dump(config, f, indent=4)
-        else:
-            printing.log_info(
-                f"Model's `genai_config.json` not found in {full_model_path}"
-            )
+                printing.log_warning(
+                    f"Incorrect NPU driver version detected: {current_driver_version}\n"
+                    f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
+                    f"version {required_driver_version} or higher.\n"
+                    "Please download and install the correct NPU Driver from:\n"
+                    f"{NPU_DRIVER_DOWNLOAD_URL}\n"
+                    "NPU functionality may not work properly."
+                )
+                _open_driver_install_page()
     @staticmethod
     def _is_preoptimized_model(input_model_path):
@@ -502,34 +432,6 @@ class OgaLoad(FirstTool):
         return full_model_path
-    @staticmethod
-    def _setup_npu_environment(ryzenai_version, oga_path):
-        """
-        Sets up environment for NPU flow of ONNX model and returns saved state to be restored
-        later in cleanup.
-        """
-        if "1.5.0" in ryzenai_version:
-            # For PyPI installation (1.5.0+), no environment setup needed
-            return None
-        elif "1.4.0" in ryzenai_version:
-            # Legacy lemonade-install approach for 1.4.0
-            if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
-                raise RuntimeError(
-                    f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
-                )
-            # Save current state so they can be restored after inference.
-            saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
-            # Setup NPU environment (cwd and path will be restored later)
-            os.chdir(oga_path)
-            os.environ["PATH"] = (
-                os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
-            )
-            return saved_state
-        else:
-            raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
     @staticmethod
     def _load_model_and_setup_state(
         state, full_model_path, checkpoint, trust_remote_code
@@ -702,8 +604,7 @@ class OgaLoad(FirstTool):
             state.save_stat(Keys.CHECKPOINT, checkpoint)
             state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
             # See if there is a file ending in ".onnx" in this folder
-            dir = os.listdir(input)
-            has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
+            has_onnx_file = find_onnx_files_recursively(input)
             if not has_onnx_file:
                 raise ValueError(
                     f"The folder {input} does not contain an ONNX model file."
@@ -852,15 +753,10 @@ class OgaLoad(FirstTool):
             try:
                 if device == "npu":
-                    saved_env_state = self._setup_npu_environment(
-                        ryzenai_version, oga_path
-                    )
                     # Set USE_AIE_RoPE based on model type
                     os.environ["USE_AIE_RoPE"] = (
                         "0" if "phi-" in checkpoint.lower() else "1"
                     )
-                elif device == "hybrid":
-                    saved_env_state = None
                 self._load_model_and_setup_state(
                     state, full_model_path, checkpoint, trust_remote_code

lemonade-sdk 8.1.12__tar.gz → 8.2.0__tar.gz

Potentially problematic release.

lemonade-sdk 8.1.12tar.gz → 8.2.0tar.gz