PyPI - lemonade-sdk - Versions diffs - 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl - Mend

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show

lemonade/cache.py +6 -1
lemonade/cli.py +47 -5
lemonade/common/inference_engines.py +13 -4
lemonade/common/status.py +4 -4
lemonade/common/system_info.py +544 -1
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/tools/accuracy.py +143 -48
lemonade/tools/adapter.py +6 -1
lemonade/tools/bench.py +26 -8
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +303 -0
lemonade/tools/huggingface/bench.py +6 -1
lemonade/tools/llamacpp/bench.py +146 -27
lemonade/tools/llamacpp/load.py +30 -2
lemonade/tools/llamacpp/utils.py +393 -33
lemonade/tools/oga/bench.py +5 -26
lemonade/tools/oga/load.py +60 -121
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/report/table.py +76 -8
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +220 -553
lemonade/tools/server/serve.py +684 -168
lemonade/tools/server/static/js/chat.js +666 -342
lemonade/tools/server/static/js/model-settings.js +24 -3
lemonade/tools/server/static/js/models.js +597 -73
lemonade/tools/server/static/js/shared.js +79 -14
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +491 -66
lemonade/tools/server/static/webapp.html +83 -31
lemonade/tools/server/tray.py +158 -38
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
lemonade/tools/server/webapp.py +4 -1
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/version.py +1 -1
lemonade_install/install.py +54 -611
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
lemonade_server/cli.py +145 -37
lemonade_server/model_manager.py +521 -37
lemonade_server/pydantic_models.py +28 -1
lemonade_server/server_models.json +246 -92
lemonade_server/settings.py +39 -39
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +0 -173
lemonade/tools/quark/quark_quantize.py +0 -439
lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0

lemonade/tools/oga/bench.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import argparse
 import statistics
 from statistics import StatisticsError
+import psutil
 from lemonade.state import State
-from lemonade.cache import Keys
 from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
 from lemonade.tools.bench import Bench
@@ -20,16 +20,6 @@ class OgaBench(Bench):
     unique_name = "oga-bench"
-    def __init__(self):
-        super().__init__()
-        # Additional statistics generated by this bench tool
-        self.status_stats.insert(
-            self.status_stats.index(Keys.TOKEN_GENERATION_TOKENS_PER_SECOND) + 1,
-            Keys.STD_DEV_TOKENS_PER_SECOND,
-        )
-        self.std_dev_token_generation_tokens_per_second_list = []
     @staticmethod
     def parser(add_help: bool = True) -> argparse.ArgumentParser:
         parser = __class__.helpful_parser(
@@ -62,7 +52,7 @@ class OgaBench(Bench):
         iterations: int,
         warmup_iterations: int,
         output_tokens: int,
-    ) -> State:
+    ):
         model: ModelAdapter = state.model
         tokenizer: TokenizerAdapter = state.tokenizer
@@ -120,20 +110,9 @@ class OgaBench(Bench):
         except StatisticsError:
             # Less than 2 measurements
             self.std_dev_token_generation_tokens_per_second_list.append(None)
-    def save_stats(self, state):
-        super().save_stats(state)
-        # Save additional statistics
-        if not all(
-            element is None
-            for element in self.std_dev_token_generation_tokens_per_second_list
-        ):
-            state.save_stat(
-                Keys.STD_DEV_TOKENS_PER_SECOND,
-                self.get_item_or_list(
-                    self.std_dev_token_generation_tokens_per_second_list
-                ),
+        if self.save_max_memory_used:
+            self.max_memory_used_gb_list.append(
+                psutil.Process().memory_info().peak_wset / 1024**3
             )

lemonade/tools/oga/load.py CHANGED Viewed

@@ -38,6 +38,17 @@ execution_providers = {
 }
+def find_onnx_files_recursively(directory):
+    """
+    Recursively search for ONNX files in a directory and its subdirectories.
+    """
+    for _, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".onnx"):
+                return True
+    return False
 def _get_npu_driver_version():
     """
     Get the NPU driver version using PowerShell directly.
@@ -74,6 +85,17 @@ def _get_npu_driver_version():
         return None
+def _compare_driver_versions(current_version, required_version):
+    """
+    Compare two driver version strings.
+    Returns True if current_version >= required_version, False otherwise.
+    Uses packaging.version for proper semantic version comparison.
+    """
+    from packaging.version import Version
+    return Version(current_version) >= Version(required_version)
 def import_error_heler(e: Exception):
     """
     Print a helpful message in the event of an import error
@@ -310,6 +332,7 @@ class OgaLoad(FirstTool):
     @staticmethod
     def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
+        # pylint: disable=unused-argument
         """
         Sets up model dependencies for hybrid and NPU inference by:
         1. Configuring the custom_ops_library path in genai_config.json.
@@ -317,74 +340,45 @@ class OgaLoad(FirstTool):
         3. Check NPU driver version if required for device and ryzenai_version.
         """
-        env_path = sys.prefix
+        # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
+        if device in ["npu", "hybrid"]:
+            required_driver_version = REQUIRED_NPU_DRIVER_VERSION
-        if "1.4.0" in ryzenai_version:
-            if device == "npu":
-                custom_ops_path = os.path.join(
-                    oga_path, "libs", "onnxruntime_vitis_ai_custom_ops.dll"
-                )
-            else:
-                custom_ops_path = os.path.join(oga_path, "libs", "onnx_custom_ops.dll")
-        else:
-            # For 1.5.0+, check NPU driver version for NPU and hybrid devices
-            if device in ["npu", "hybrid"]:
-                required_driver_version = REQUIRED_NPU_DRIVER_VERSION
-                current_driver_version = _get_npu_driver_version()
-                if not current_driver_version:
-                    printing.log_warning(
-                        f"NPU driver not found. {device.upper()} inference requires NPU driver "
-                        f"version {required_driver_version}.\n"
-                        "Please download and install the NPU Driver from:\n"
-                        f"{NPU_DRIVER_DOWNLOAD_URL}\n"
-                        "NPU functionality may not work properly."
-                    )
-                    _open_driver_install_page()
-                elif current_driver_version != required_driver_version:
-                    printing.log_warning(
-                        f"Incorrect NPU driver version detected: {current_driver_version}\n"
-                        f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
-                        f"version {required_driver_version}.\n"
-                        "Please download and install the correct NPU Driver from:\n"
-                        f"{NPU_DRIVER_DOWNLOAD_URL}\n"
-                        "NPU functionality may not work properly."
-                    )
-                    _open_driver_install_page()
-            if device == "npu":
-                # For 1.5.0, custom ops are in the conda environment's onnxruntime package
-                custom_ops_path = os.path.join(
-                    env_path,
-                    "Lib",
-                    "site-packages",
-                    "onnxruntime",
-                    "capi",
-                    "onnxruntime_vitis_ai_custom_ops.dll",
-                )
-                dll_source_path = os.path.join(
-                    env_path, "Lib", "site-packages", "onnxruntime", "capi"
-                )
-                required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
-            else:
-                custom_ops_path = os.path.join(
-                    env_path,
-                    "Lib",
-                    "site-packages",
-                    "onnxruntime_genai",
-                    "onnx_custom_ops.dll",
+            current_driver_version = _get_npu_driver_version()
+            rai_version, _ = _get_ryzenai_version_info(device)
+            if not current_driver_version:
+                printing.log_warning(
+                    f"NPU driver not found. {device.upper()} inference requires NPU driver "
+                    f"version {required_driver_version}.\n"
+                    "Please download and install the NPU Driver from:\n"
+                    f"{NPU_DRIVER_DOWNLOAD_URL}\n"
+                    "NPU functionality may not work properly."
                 )
-                dll_source_path = os.path.join(
-                    env_path, "Lib", "site-packages", "onnxruntime_genai"
+                _open_driver_install_page()
+            elif not _compare_driver_versions(
+                current_driver_version, required_driver_version
+            ):
+                printing.log_warning(
+                    f"Incorrect NPU driver version detected: {current_driver_version}\n"
+                    f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
+                    f"version {required_driver_version} or higher.\n"
+                    "Please download and install the correct NPU Driver from:\n"
+                    f"{NPU_DRIVER_DOWNLOAD_URL}\n"
+                    "NPU functionality may not work properly."
                 )
-                required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
+                _open_driver_install_page()
+            # Setup DLL paths for NPU/hybrid inference
+            env_path = os.path.dirname(sys.executable)
+            dll_source_path = os.path.join(
+                env_path, "Lib", "site-packages", "onnxruntime_genai"
+            )
+            required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
             # Validate that all required DLLs exist in the source directory
             missing_dlls = []
-            if not os.path.exists(custom_ops_path):
-                missing_dlls.append(custom_ops_path)
             for dll_name in required_dlls:
                 dll_source = os.path.join(dll_source_path, dll_name)
@@ -395,7 +389,9 @@ class OgaLoad(FirstTool):
                 dll_list = "\n  - ".join(missing_dlls)
                 raise RuntimeError(
                     f"Required DLLs not found for {device} inference:\n  - {dll_list}\n"
-                    f"Please ensure your RyzenAI installation is complete and supports {device}."
+                    f"Please ensure your RyzenAI installation is complete and supports {device}.\n"
+                    "Please reinstall the RyzenAI Software for your platform. Run:\n"
+                    "    pip install lemonade-sdk[oga-ryzenai]\n"
                 )
             # Add the DLL source directory to PATH
@@ -403,29 +399,6 @@ class OgaLoad(FirstTool):
             if dll_source_path not in current_path:
                 os.environ["PATH"] = dll_source_path + os.pathsep + current_path
-        # Update the model config with custom_ops_library path
-        config_path = os.path.join(full_model_path, "genai_config.json")
-        if os.path.exists(config_path):
-            with open(config_path, "r", encoding="utf-8") as f:
-                config = json.load(f)
-            if (
-                "model" in config
-                and "decoder" in config["model"]
-                and "session_options" in config["model"]["decoder"]
-            ):
-                config["model"]["decoder"]["session_options"][
-                    "custom_ops_library"
-                ] = custom_ops_path
-            with open(config_path, "w", encoding="utf-8") as f:
-                json.dump(config, f, indent=4)
-        else:
-            printing.log_info(
-                f"Model's `genai_config.json` not found in {full_model_path}"
-            )
     @staticmethod
     def _is_preoptimized_model(input_model_path):
         """
@@ -489,34 +462,6 @@ class OgaLoad(FirstTool):
         return full_model_path
-    @staticmethod
-    def _setup_npu_environment(ryzenai_version, oga_path):
-        """
-        Sets up environment for NPU flow of ONNX model and returns saved state to be restored
-        later in cleanup.
-        """
-        if "1.5.0" in ryzenai_version:
-            # For PyPI installation (1.5.0+), no environment setup needed
-            return None
-        elif "1.4.0" in ryzenai_version:
-            # Legacy lemonade-install approach for 1.4.0
-            if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
-                raise RuntimeError(
-                    f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
-                )
-            # Save current state so they can be restored after inference.
-            saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
-            # Setup NPU environment (cwd and path will be restored later)
-            os.chdir(oga_path)
-            os.environ["PATH"] = (
-                os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
-            )
-            return saved_state
-        else:
-            raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
     @staticmethod
     def _load_model_and_setup_state(
         state, full_model_path, checkpoint, trust_remote_code
@@ -689,8 +634,7 @@ class OgaLoad(FirstTool):
             state.save_stat(Keys.CHECKPOINT, checkpoint)
             state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
             # See if there is a file ending in ".onnx" in this folder
-            dir = os.listdir(input)
-            has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
+            has_onnx_file = find_onnx_files_recursively(input)
             if not has_onnx_file:
                 raise ValueError(
                     f"The folder {input} does not contain an ONNX model file."
@@ -839,15 +783,10 @@ class OgaLoad(FirstTool):
             try:
                 if device == "npu":
-                    saved_env_state = self._setup_npu_environment(
-                        ryzenai_version, oga_path
-                    )
                     # Set USE_AIE_RoPE based on model type
                     os.environ["USE_AIE_RoPE"] = (
                         "0" if "phi-" in checkpoint.lower() else "1"
                     )
-                elif device == "hybrid":
-                    saved_env_state = None
                 self._load_model_and_setup_state(
                     state, full_model_path, checkpoint, trust_remote_code

lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl