PyPI - lemonade-sdk - Versions diffs - 8.0.5__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

lemonade-sdk 8.0.5py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (30) hide show

lemonade/cache.py +3 -1
lemonade/common/network.py +18 -1
lemonade/tools/adapter.py +6 -0
lemonade/tools/huggingface/utils.py +6 -5
lemonade/tools/llamacpp/bench.py +28 -46
lemonade/tools/llamacpp/load.py +104 -196
lemonade/tools/llamacpp/utils.py +612 -0
lemonade/tools/oga/bench.py +5 -6
lemonade/tools/oga/load.py +239 -112
lemonade/tools/oga/utils.py +27 -9
lemonade/tools/prompt.py +17 -25
lemonade/tools/report/table.py +12 -9
lemonade/tools/server/llamacpp.py +80 -92
lemonade/tools/server/serve.py +22 -28
lemonade/tools/server/static/styles.css +121 -26
lemonade/tools/server/static/webapp.html +14 -6
lemonade/tools/server/tray.py +7 -0
lemonade/version.py +1 -1
lemonade_install/install.py +65 -84
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +32 -21
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +30 -29
lemonade_server/cli.py +1 -1
lemonade_server/model_manager.py +8 -151
lemonade_server/pydantic_models.py +1 -4
lemonade_server/server_models.json +44 -9
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0

lemonade_install/install.py CHANGED Viewed

@@ -24,22 +24,6 @@
 # In any python environment, only one set of artifacts can be installed at a time.
 # Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
 #
-# The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
-# The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
-#
-#     RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
-#         onnxruntime_genai\lib
-#         onnxruntime_genai\wheel
-#         onnx_utils\bin
-#         eula\eula
-#
-# The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
-#
-#     RYZEN_AI\npu\amd_oga\
-#         bins\xclbin\stx
-#         libs
-#         wheels
-#
 import argparse
 import glob
@@ -56,6 +40,13 @@ import zipfile
 DEFAULT_RYZEN_AI_VERSION = "1.4.0"
 version_info_filename = "version_info.json"
+# NPU Driver configuration
+NPU_DRIVER_DOWNLOAD_URL = (
+    "https://account.amd.com/en/forms/downloads/"
+    "ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
+)
+REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
 lemonade_install_dir = Path(__file__).parent.parent.parent
 DEFAULT_QUARK_VERSION = "quark-0.6.0"
 DEFAULT_QUARK_DIR = os.path.join(
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
 SUPPORTED_RYZEN_AI_SERIES = ["300"]
 npu_install_data = {
-    "1.3.0": {
-        "artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
-        "license_file": (
-            "https://account.amd.com/content/dam/account/en/licenses/download/"
-            "amd-end-user-license-agreement.pdf"
-        ),
-        "license_tag": "Beta ",
-    },
     "1.4.0": {
         "artifacts_zipfile": (
             "https://www.xilinx.com/bin/public/openDownload?"
@@ -88,17 +71,6 @@ npu_install_data = {
 }
 hybrid_install_data = {
-    "1.3.0": {
-        "artifacts_zipfile": (
-            "https://www.xilinx.com/bin/public/openDownload?"
-            "filename=hybrid-llm-artifacts_1.3.0_012725.zip"
-        ),
-        "license_file": (
-            "https://www.xilinx.com/bin/public/openDownload?"
-            "filename=AMD%20End%20User%20License%20Agreement.pdf"
-        ),
-        "license_tag": "",
-    },
     "1.4.0": {
         "artifacts_zipfile": (
             "https://www.xilinx.com/bin/public/openDownload?"
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
     version_info = get_ryzen_ai_version_info()
     version = version_info["version"]
     ryzen_ai_folder = get_ryzen_ai_path()
-    if "1.3.0" in version:
-        npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
-    else:
-        npu_dir = os.path.join(ryzen_ai_folder, "npu")
+    npu_dir = os.path.join(ryzen_ai_folder, "npu")
     if not os.path.isdir(npu_dir):
         raise RuntimeError(
             f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
     version_info = get_ryzen_ai_version_info()
     version = version_info["version"]
     ryzen_ai_folder = get_ryzen_ai_path()
-    if "1.3.0" in version:
-        hybrid_dir = os.path.join(
-            ryzen_ai_folder,
-            "hybrid",
-            "hybrid-llm-artifacts_1.3.0_lounge",
-            "hybrid-llm-artifacts",
-        )
-    else:
-        hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
+    hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
     if not os.path.isdir(hybrid_dir):
         raise RuntimeError(
             f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
     return hybrid_dir, version
+def _get_ryzenai_version_info(device=None):
+    """
+    Centralized version detection for RyzenAI installations.
+    Uses lazy imports to avoid import errors when OGA is not installed.
+    """
+    try:
+        # Lazy import to avoid errors when OGA is not installed
+        from packaging.version import Version
+        import onnxruntime_genai as og
+        if Version(og.__version__) >= Version("0.7.0"):
+            oga_path = os.path.dirname(og.__file__)
+            if og.__version__ == "0.7.0.2":
+                return "1.5.0", oga_path
+            else:
+                return "1.4.0", oga_path
+        else:
+            if device == "npu":
+                oga_path, version = get_oga_npu_dir()
+            else:
+                oga_path, version = get_oga_hybrid_dir()
+            return version, oga_path
+    except ImportError as e:
+        raise ImportError(
+            f"{e}\n Please install lemonade-sdk with "
+            "one of the oga extras, for example:\n"
+            "pip install lemonade-sdk[dev,oga-cpu]\n"
+            "See https://lemonade_server.ai/install_options.html for details"
+        ) from e
 def download_lfs_file(token, file, output_filename):
     """Downloads a file from LFS"""
     import requests
@@ -426,8 +418,6 @@ class Install:
                 "npu",
                 "hybrid",
                 "unified",
-                "npu-1.3.0",
-                "hybrid-1.3.0",
                 "npu-1.4.0",
                 "hybrid-1.4.0",
                 "unified-1.4.0",
@@ -524,25 +514,14 @@ class Install:
         # Install all whl files in the specified wheels folder
         if wheels_full_path is not None:
             print(f"\nInstalling wheels from {wheels_full_path}\n")
-            if version == "1.3.0":
-                # Install one wheel file at a time (1.3.0 npu build only works this way)
-                for file in os.listdir(wheels_full_path):
-                    if file.endswith(".whl"):
-                        install_cmd = (
-                            f"{sys.executable} -m pip install "
-                            f"{os.path.join(wheels_full_path, file)}"
-                        )
-                        print(f"\nInstalling {file} with command {install_cmd}\n")
-                        subprocess.run(install_cmd, check=True, shell=True)
-            else:
-                # Install all the wheel files together, allowing pip to work out the dependencies
-                wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
-                install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
-                subprocess.run(
-                    install_cmd,
-                    check=True,
-                    shell=True,
-                )
+            # Install all the wheel files together, allowing pip to work out the dependencies
+            wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
+            install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
+            subprocess.run(
+                install_cmd,
+                check=True,
+                shell=True,
+            )
         # Delete the zip file
         print(f"\nCleaning up, removing {archive_file_path}\n")
@@ -611,10 +590,7 @@ class Install:
         license_file = npu_install_data[version].get("license_file", None)
         license_tag = npu_install_data[version].get("license_tag", None)
         install_dir = os.path.join(ryzen_ai_folder, "npu")
-        if version == "1.3.0":
-            wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
-        else:
-            wheels_full_path = os.path.join(install_dir, "wheels")
+        wheels_full_path = os.path.join(install_dir, "wheels")
         if license_file:
             Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -641,17 +617,7 @@ class Install:
         license_file = hybrid_install_data[version].get("license_file", None)
         license_tag = hybrid_install_data[version].get("license_tag", None)
         install_dir = os.path.join(ryzen_ai_folder, "hybrid")
-        if version == "1.3.0":
-            wheels_full_path = os.path.join(
-                ryzen_ai_folder,
-                "hybrid",
-                "hybrid-llm-artifacts_1.3.0_lounge",
-                "hybrid-llm-artifacts",
-                "onnxruntime_genai",
-                "wheel",
-            )
-        else:
-            wheels_full_path = os.path.join(install_dir, "wheels")
+        wheels_full_path = os.path.join(install_dir, "wheels")
         if license_file:
             Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -669,6 +635,21 @@ class Install:
         # Check if the processor is supported before proceeding
         check_ryzen_ai_processor()
+        warning_msg = (
+            "\n" + "=" * 80 + "\n"
+            "WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
+            + "=" * 80
+            + "\n"
+            "Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
+            "For new installations, consider using:\n\n"
+            "pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
+            "This legacy installation method (lemonade-install --ryzenai) is still\n"
+            "supported for version 1.4.0, but may be deprecated in future releases.\n"
+            + "=" * 80
+            + "\n"
+        )
+        print(warning_msg)
         # Delete any previous Ryzen AI installation in this environment
         ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
         if os.path.exists(ryzen_ai_folder):

{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.0.5
+Version: 8.1.0
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.13
@@ -22,16 +22,15 @@ Requires-Dist: pytz
 Requires-Dist: zstandard
 Requires-Dist: fastapi
 Requires-Dist: uvicorn[standard]
-Requires-Dist: openai>=1.81.0
-Requires-Dist: transformers<=4.51.3
+Requires-Dist: openai<1.97.1,>=1.81.0
+Requires-Dist: transformers<=4.53.2
 Requires-Dist: jinja2
 Requires-Dist: tabulate
 Requires-Dist: sentencepiece
 Requires-Dist: huggingface-hub==0.33.0
-Provides-Extra: oga-hybrid
-Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
-Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
-Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
+Provides-Extra: oga-ryzenai
+Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
+Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
 Provides-Extra: oga-cpu
 Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
 Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
 Requires-Dist: matplotlib; extra == "dev"
 Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
 Requires-Dist: lm-eval[api]; extra == "dev"
+Provides-Extra: oga-hybrid
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
+Provides-Extra: oga-unified
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
 Provides-Extra: oga-hybrid-minimal
-Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
 Provides-Extra: oga-cpu-minimal
 Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
+Provides-Extra: oga-npu-minimal
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
 Provides-Extra: llm
 Requires-Dist: lemonade-sdk[dev]; extra == "llm"
 Provides-Extra: llm-oga-cpu
 Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
+Provides-Extra: llm-oga-npu
+Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
+Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
+Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
+Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
+Provides-Extra: llm-oga-hybrid
+Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
+Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
+Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
+Provides-Extra: llm-oga-unified
+Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
 Provides-Extra: llm-oga-igpu
 Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
 Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
 Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
 Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
 Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
-Provides-Extra: llm-oga-npu
-Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
-Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
-Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
-Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
-Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
-Provides-Extra: llm-oga-hybrid
-Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
-Provides-Extra: llm-oga-unified
-Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
 Dynamic: author-email
 Dynamic: description
 Dynamic: description-content-type
@@ -174,7 +181,7 @@ lemonade-server list
 ## Model Library
-Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
+Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
 You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
 <p align="center">
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
 print(completion.choices[0].message.content)
 ```
-For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
+For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
 ## Beyond an LLM Server
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
 - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
 - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
+## FAQ
+To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
 ## Contributing
 We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -284,7 +295,7 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
 ## Maintainers
-This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
+This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
 ## License

{lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
 lemonade/api.py,sha256=kGz8N_9TuN3peFG8fES0odN0bWR9itLNomlR-FC2z8k,5515
-lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
+lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
 lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=obOXkQD52zgzH-mM2spS6LQ-gEWkuaiGpNTM_ISH0D8,22
+lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
 lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
 lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
 lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
-lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
+lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
 lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
 lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
 lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
@@ -21,51 +21,52 @@ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2E
 lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
 lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
 lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
-lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
+lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
 lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
 lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
 lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
 lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
-lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
+lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
 lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
-lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
-lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
-lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
+lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
+lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
+lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
+lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
-lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
-lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
+lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
+lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
+lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
 lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
 lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
-lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
+lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
-lemonade/tools/server/serve.py,sha256=Pp_w4iuRMkpJLF-XrTsBIBrSNBQIOl8PRZC_Cj4URnU,57334
+lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
+lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
-lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
+lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=jXFPIHPrhRz_CJyRJrYusAECSDTO00sKUu7ajrQgFuA,24655
-lemonade/tools/server/static/webapp.html,sha256=tmwASvULb3d2_NfHEH9rKbEEJl3D7ygXjaCLVYkyWbg,35969
+lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
+lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
-lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.5.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
-lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
-lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
-lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
-lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
-lemonade_sdk-8.0.5.dist-info/METADATA,sha256=e2w0jPyEnyk-SeLAbYZgeGldq-2CQHm9Hly_mQgZ8uo,15224
-lemonade_sdk-8.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.5.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.5.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.5.dist-info/RECORD,,
+lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
+lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
+lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
+lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
+lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
+lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
+lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.1.0.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -57,7 +57,7 @@ def serve(
     log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
     # Hidden environment variable to enable input truncation (experimental feature)
-    truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
+    truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
     # Start the server
     serve_kwargs = {

lemonade_server/model_manager.py CHANGED Viewed

@@ -6,31 +6,14 @@ import huggingface_hub
 from importlib.metadata import distributions
 from lemonade_server.pydantic_models import PullConfig
 from lemonade.cache import DEFAULT_CACHE_DIR
+from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
+from lemonade.common.network import custom_snapshot_download
 USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
 class ModelManager:
-    @staticmethod
-    def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
-        """
-        Parse a checkpoint string that may contain a variant separated by a colon.
-        For GGUF models, the format is "repository:variant" (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0").
-        For other models, there is no variant.
-        Args:
-            checkpoint: The checkpoint string, potentially with variant
-        Returns:
-            tuple: (base_checkpoint, variant) where variant is None if no colon is present
-        """
-        if ":" in checkpoint:
-            base_checkpoint, variant = checkpoint.split(":", 1)
-            return base_checkpoint, variant
-        return checkpoint, None
     @property
     def supported_models(self) -> dict:
         """
@@ -98,7 +81,7 @@ class ModelManager:
         downloaded_models = {}
         downloaded_checkpoints = self.downloaded_hf_checkpoints
         for model in self.supported_models:
-            base_checkpoint = self.parse_checkpoint(
+            base_checkpoint = parse_checkpoint(
                 self.supported_models[model]["checkpoint"]
             )[0]
             if base_checkpoint in downloaded_checkpoints:
@@ -113,132 +96,6 @@ class ModelManager:
         """
         return self.filter_models_by_backend(self.downloaded_models)
-    def identify_gguf_models(
-        self, checkpoint: str, variant: str, mmproj: str
-    ) -> tuple[dict, list[str]]:
-        """
-        Identifies the GGUF model files in the repository that match the variant.
-        """
-        hint = """
-        The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
-        The VARIANT format can be one of several types:
-        1. Full filename: exact file to download
-        2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
-        3. Quantization variant: find a single file ending with the variant name (case insensitive)
-        4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
-        Examples:
-        - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
-        - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
-        - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
-        - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
-        """
-        repo_files = huggingface_hub.list_repo_files(checkpoint)
-        sharded_files = []
-        # (case 1) If variant ends in .gguf, use it directly
-        if variant and variant.endswith(".gguf"):
-            variant_name = variant
-            if variant_name not in repo_files:
-                raise ValueError(
-                    f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
-                )
-        # (case 2) If no variant is provided, get the first .gguf file in the repository
-        elif variant is None:
-            all_variants = [
-                f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
-            ]
-            if len(all_variants) == 0:
-                raise ValueError(
-                    f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
-                )
-            variant_name = all_variants[0]
-        else:
-            # (case 3) Find a single file ending with the variant name (case insensitive)
-            end_with_variant = [
-                f
-                for f in repo_files
-                if f.lower().endswith(f"{variant}.gguf".lower())
-                and "mmproj" not in f.lower()
-            ]
-            if len(end_with_variant) == 1:
-                variant_name = end_with_variant[0]
-            elif len(end_with_variant) > 1:
-                raise ValueError(
-                    f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
-                )
-            # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
-            else:
-                sharded_files = [
-                    f
-                    for f in repo_files
-                    if f.endswith(".gguf")
-                    and f.lower().startswith(f"{variant}/".lower())
-                ]
-                if not sharded_files:
-                    raise ValueError(
-                        f"No .gguf files found for variant {variant}. {hint}"
-                    )
-                # Sort to ensure consistent ordering
-                sharded_files.sort()
-                # Use first file as primary (this is how llamacpp handles it)
-                variant_name = sharded_files[0]
-        core_files = {"variant": variant_name}
-        # If there is a mmproj file, add it to the patterns
-        if mmproj:
-            if mmproj not in repo_files:
-                raise ValueError(
-                    f"The provided mmproj file {mmproj} was not found in {checkpoint}."
-                )
-            core_files["mmproj"] = mmproj
-        return core_files, sharded_files
-    def download_gguf(self, model_config: PullConfig) -> dict:
-        """
-        Downloads the GGUF file for the given model configuration.
-        For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
-        will be downloaded but only the first file will be returned for loading.
-        """
-        # This code handles all cases by constructing the appropriate filename or pattern
-        checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
-        # Identify the GGUF model files in the repository that match the variant
-        core_files, sharded_files = self.identify_gguf_models(
-            checkpoint, variant, model_config.mmproj
-        )
-        # Download the files
-        snapshot_folder = huggingface_hub.snapshot_download(
-            repo_id=checkpoint,
-            allow_patterns=list(core_files.values()) + sharded_files,
-        )
-        # Ensure we downloaded all expected files
-        for file in list(core_files.values()) + sharded_files:
-            expected_path = os.path.join(snapshot_folder, file)
-            if not os.path.exists(expected_path):
-                raise ValueError(
-                    f"Hugging Face snapshot download for {model_config.checkpoint} "
-                    f"expected file {file} not found at {expected_path}"
-                )
-        # Return a dict of the full path of the core GGUF files
-        return {
-            file_name: os.path.join(snapshot_folder, file_path)
-            for file_name, file_path in core_files.items()
-        }
     def download_models(
         self,
         models: list[str],
@@ -317,9 +174,9 @@ class ModelManager:
             print(f"Downloading {model} ({checkpoint_to_download})")
             if "gguf" in checkpoint_to_download.lower():
-                self.download_gguf(gguf_model_config)
+                download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
             else:
-                huggingface_hub.snapshot_download(repo_id=checkpoint_to_download)
+                custom_snapshot_download(checkpoint_to_download)
             # Register the model in user_models.json, creating that file if needed
             # We do this registration after the download so that we don't register
@@ -373,12 +230,12 @@ class ModelManager:
         print(f"Deleting {model_name} ({checkpoint})")
         # Handle GGUF models that have the format "checkpoint:variant"
-        base_checkpoint = self.parse_checkpoint(checkpoint)[0]
+        base_checkpoint = parse_checkpoint(checkpoint)[0]
         try:
             # Get the local path using snapshot_download with local_files_only=True
-            snapshot_path = huggingface_hub.snapshot_download(
-                repo_id=base_checkpoint, local_files_only=True
+            snapshot_path = custom_snapshot_download(
+                base_checkpoint, local_files_only=True
             )
             # Navigate up to the model directory (parent of snapshots directory)

lemonade-sdk 8.0.5__py3-none-any.whl → 8.1.0__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.5py3-none-any.whl → 8.1.0py3-none-any.whl