PyPI - lemonade-sdk - Versions diffs - 8.0.6__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

lemonade-sdk 8.0.6py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (21) hide show

lemonade/common/network.py +18 -1
lemonade/tools/llamacpp/bench.py +3 -1
lemonade/tools/llamacpp/utils.py +7 -7
lemonade/tools/oga/load.py +239 -112
lemonade/tools/oga/utils.py +19 -7
lemonade/tools/server/serve.py +19 -28
lemonade/tools/server/static/styles.css +5 -6
lemonade/tools/server/static/webapp.html +3 -0
lemonade/version.py +1 -1
lemonade_install/install.py +65 -84
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +30 -19
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +21 -21
lemonade_server/cli.py +1 -1
lemonade_server/model_manager.py +4 -3
lemonade_server/pydantic_models.py +1 -4
lemonade_server/server_models.json +35 -11
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0

lemonade/tools/server/serve.py CHANGED Viewed

@@ -284,7 +284,7 @@ class Server(ManagementTool):
     def _setup_server_common(
         self,
         port: int,
-        truncate_inputs: bool = False,
+        truncate_inputs: Optional[int] = None,
         log_level: str = DEFAULT_LOG_LEVEL,
         tray: bool = False,
         log_file: str = None,
@@ -295,7 +295,7 @@ class Server(ManagementTool):
         Args:
             port: Port number for the server
-            truncate_inputs: Whether to truncate inputs if they exceed max length
+            truncate_inputs: Truncate messages to this length
             log_level: Logging level to configure
             threaded_mode: Whether this is being set up for threaded execution
         """
@@ -372,7 +372,7 @@ class Server(ManagementTool):
         _=None,
         port: int = DEFAULT_PORT,
         log_level: str = DEFAULT_LOG_LEVEL,
-        truncate_inputs: bool = False,
+        truncate_inputs: Optional[int] = None,
         tray: bool = False,
         log_file: str = None,
     ):
@@ -393,7 +393,7 @@ class Server(ManagementTool):
         port: int = DEFAULT_PORT,
         host: str = "localhost",
         log_level: str = "warning",
-        truncate_inputs: bool = False,
+        truncate_inputs: Optional[int] = None,
     ):
         """
         Set up the server for running in a thread.
@@ -1099,29 +1099,20 @@ class Server(ManagementTool):
             )
             self.input_tokens = len(input_ids[0])
-        if (
-            self.llm_loaded.max_prompt_length
-            and self.input_tokens > self.llm_loaded.max_prompt_length
-        ):
-            if self.truncate_inputs:
-                # Truncate input ids
-                truncate_amount = self.input_tokens - self.llm_loaded.max_prompt_length
-                input_ids = input_ids[: self.llm_loaded.max_prompt_length]
-                # Update token count
-                self.input_tokens = len(input_ids)
-                # Show warning message
-                truncation_message = (
-                    f"Input exceeded {self.llm_loaded.max_prompt_length} tokens. "
-                    f"Truncated {truncate_amount} tokens."
-                )
-                logging.warning(truncation_message)
-            else:
-                raise RuntimeError(
-                    f"Prompt tokens ({self.input_tokens}) cannot be greater "
-                    f"than the model's max prompt length ({self.llm_loaded.max_prompt_length})"
-                )
+        if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
+            # Truncate input ids
+            truncate_amount = self.input_tokens - self.truncate_inputs
+            input_ids = input_ids[: self.truncate_inputs]
+            # Update token count
+            self.input_tokens = len(input_ids)
+            # Show warning message
+            truncation_message = (
+                f"Input exceeded {self.truncate_inputs} tokens. "
+                f"Truncated {truncate_amount} tokens."
+            )
+            logging.warning(truncation_message)
         # Log the input tokens early to avoid this not showing due to potential crashes
         logging.debug(f"Input Tokens: {self.input_tokens}")
@@ -1317,7 +1308,7 @@ class Server(ManagementTool):
         self.tokenizer = None
         self.model = None
-        default_message = f"model {model_reference} not found"
+        default_message = "see stack trace and error message below"
         if message:
             detail = message
         else:

lemonade/tools/server/static/styles.css CHANGED Viewed

@@ -27,7 +27,6 @@ body {
   min-height: 100vh;
   display: flex;
   flex-direction: column;
-  padding-bottom: 5rem;
 }
 body::before {
@@ -102,13 +101,9 @@ body::before {
 }
 .site-footer {
-  position: fixed;
-  left: 0;
-  bottom: 0;
-  width: 100%;
   background: transparent;
   padding-top: 0.5rem;
-  z-index: 100;
+  margin-top: auto;
 }
 .dad-joke {
@@ -535,6 +530,10 @@ body::before {
   background-color: #ca4747;
 }
+.model-label.coding {
+  background-color: #ff6b35;
+}
 .model-labels-container {
   display: flex;
   align-items: center;

lemonade/tools/server/static/webapp.html CHANGED Viewed

@@ -109,6 +109,7 @@
                         </label>
                         <select id="register-recipe" name="recipe" required>
                             <option value="llamacpp">llamacpp</option>
+                            <option value="oga-npu">oga-npu</option>
                             <option value="oga-hybrid">oga-hybrid</option>
                             <option value="oga-cpu">oga-cpu</option>
                         </select>
@@ -413,6 +414,8 @@
                     labelClass = 'reasoning';
                 } else if (labelLower === 'reranking') {
                     labelClass = 'reranking';
+                } else if (labelLower === 'coding') {
+                    labelClass = 'coding';
                 }
                 labelSpan.className = `model-label ${labelClass}`;
                 labelSpan.textContent = label;

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "8.0.6"
1	+ __version__ = "8.1.0"

lemonade_install/install.py CHANGED Viewed

@@ -24,22 +24,6 @@
 # In any python environment, only one set of artifacts can be installed at a time.
 # Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
 #
-# The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
-# The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
-#
-#     RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
-#         onnxruntime_genai\lib
-#         onnxruntime_genai\wheel
-#         onnx_utils\bin
-#         eula\eula
-#
-# The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
-#
-#     RYZEN_AI\npu\amd_oga\
-#         bins\xclbin\stx
-#         libs
-#         wheels
-#
 import argparse
 import glob
@@ -56,6 +40,13 @@ import zipfile
 DEFAULT_RYZEN_AI_VERSION = "1.4.0"
 version_info_filename = "version_info.json"
+# NPU Driver configuration
+NPU_DRIVER_DOWNLOAD_URL = (
+    "https://account.amd.com/en/forms/downloads/"
+    "ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
+)
+REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
 lemonade_install_dir = Path(__file__).parent.parent.parent
 DEFAULT_QUARK_VERSION = "quark-0.6.0"
 DEFAULT_QUARK_DIR = os.path.join(
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
 SUPPORTED_RYZEN_AI_SERIES = ["300"]
 npu_install_data = {
-    "1.3.0": {
-        "artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
-        "license_file": (
-            "https://account.amd.com/content/dam/account/en/licenses/download/"
-            "amd-end-user-license-agreement.pdf"
-        ),
-        "license_tag": "Beta ",
-    },
     "1.4.0": {
         "artifacts_zipfile": (
             "https://www.xilinx.com/bin/public/openDownload?"
@@ -88,17 +71,6 @@ npu_install_data = {
 }
 hybrid_install_data = {
-    "1.3.0": {
-        "artifacts_zipfile": (
-            "https://www.xilinx.com/bin/public/openDownload?"
-            "filename=hybrid-llm-artifacts_1.3.0_012725.zip"
-        ),
-        "license_file": (
-            "https://www.xilinx.com/bin/public/openDownload?"
-            "filename=AMD%20End%20User%20License%20Agreement.pdf"
-        ),
-        "license_tag": "",
-    },
     "1.4.0": {
         "artifacts_zipfile": (
             "https://www.xilinx.com/bin/public/openDownload?"
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
     version_info = get_ryzen_ai_version_info()
     version = version_info["version"]
     ryzen_ai_folder = get_ryzen_ai_path()
-    if "1.3.0" in version:
-        npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
-    else:
-        npu_dir = os.path.join(ryzen_ai_folder, "npu")
+    npu_dir = os.path.join(ryzen_ai_folder, "npu")
     if not os.path.isdir(npu_dir):
         raise RuntimeError(
             f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
     version_info = get_ryzen_ai_version_info()
     version = version_info["version"]
     ryzen_ai_folder = get_ryzen_ai_path()
-    if "1.3.0" in version:
-        hybrid_dir = os.path.join(
-            ryzen_ai_folder,
-            "hybrid",
-            "hybrid-llm-artifacts_1.3.0_lounge",
-            "hybrid-llm-artifacts",
-        )
-    else:
-        hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
+    hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
     if not os.path.isdir(hybrid_dir):
         raise RuntimeError(
             f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
     return hybrid_dir, version
+def _get_ryzenai_version_info(device=None):
+    """
+    Centralized version detection for RyzenAI installations.
+    Uses lazy imports to avoid import errors when OGA is not installed.
+    """
+    try:
+        # Lazy import to avoid errors when OGA is not installed
+        from packaging.version import Version
+        import onnxruntime_genai as og
+        if Version(og.__version__) >= Version("0.7.0"):
+            oga_path = os.path.dirname(og.__file__)
+            if og.__version__ == "0.7.0.2":
+                return "1.5.0", oga_path
+            else:
+                return "1.4.0", oga_path
+        else:
+            if device == "npu":
+                oga_path, version = get_oga_npu_dir()
+            else:
+                oga_path, version = get_oga_hybrid_dir()
+            return version, oga_path
+    except ImportError as e:
+        raise ImportError(
+            f"{e}\n Please install lemonade-sdk with "
+            "one of the oga extras, for example:\n"
+            "pip install lemonade-sdk[dev,oga-cpu]\n"
+            "See https://lemonade_server.ai/install_options.html for details"
+        ) from e
 def download_lfs_file(token, file, output_filename):
     """Downloads a file from LFS"""
     import requests
@@ -426,8 +418,6 @@ class Install:
                 "npu",
                 "hybrid",
                 "unified",
-                "npu-1.3.0",
-                "hybrid-1.3.0",
                 "npu-1.4.0",
                 "hybrid-1.4.0",
                 "unified-1.4.0",
@@ -524,25 +514,14 @@ class Install:
         # Install all whl files in the specified wheels folder
         if wheels_full_path is not None:
             print(f"\nInstalling wheels from {wheels_full_path}\n")
-            if version == "1.3.0":
-                # Install one wheel file at a time (1.3.0 npu build only works this way)
-                for file in os.listdir(wheels_full_path):
-                    if file.endswith(".whl"):
-                        install_cmd = (
-                            f"{sys.executable} -m pip install "
-                            f"{os.path.join(wheels_full_path, file)}"
-                        )
-                        print(f"\nInstalling {file} with command {install_cmd}\n")
-                        subprocess.run(install_cmd, check=True, shell=True)
-            else:
-                # Install all the wheel files together, allowing pip to work out the dependencies
-                wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
-                install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
-                subprocess.run(
-                    install_cmd,
-                    check=True,
-                    shell=True,
-                )
+            # Install all the wheel files together, allowing pip to work out the dependencies
+            wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
+            install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
+            subprocess.run(
+                install_cmd,
+                check=True,
+                shell=True,
+            )
         # Delete the zip file
         print(f"\nCleaning up, removing {archive_file_path}\n")
@@ -611,10 +590,7 @@ class Install:
         license_file = npu_install_data[version].get("license_file", None)
         license_tag = npu_install_data[version].get("license_tag", None)
         install_dir = os.path.join(ryzen_ai_folder, "npu")
-        if version == "1.3.0":
-            wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
-        else:
-            wheels_full_path = os.path.join(install_dir, "wheels")
+        wheels_full_path = os.path.join(install_dir, "wheels")
         if license_file:
             Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -641,17 +617,7 @@ class Install:
         license_file = hybrid_install_data[version].get("license_file", None)
         license_tag = hybrid_install_data[version].get("license_tag", None)
         install_dir = os.path.join(ryzen_ai_folder, "hybrid")
-        if version == "1.3.0":
-            wheels_full_path = os.path.join(
-                ryzen_ai_folder,
-                "hybrid",
-                "hybrid-llm-artifacts_1.3.0_lounge",
-                "hybrid-llm-artifacts",
-                "onnxruntime_genai",
-                "wheel",
-            )
-        else:
-            wheels_full_path = os.path.join(install_dir, "wheels")
+        wheels_full_path = os.path.join(install_dir, "wheels")
         if license_file:
             Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -669,6 +635,21 @@ class Install:
         # Check if the processor is supported before proceeding
         check_ryzen_ai_processor()
+        warning_msg = (
+            "\n" + "=" * 80 + "\n"
+            "WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
+            + "=" * 80
+            + "\n"
+            "Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
+            "For new installations, consider using:\n\n"
+            "pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
+            "This legacy installation method (lemonade-install --ryzenai) is still\n"
+            "supported for version 1.4.0, but may be deprecated in future releases.\n"
+            + "=" * 80
+            + "\n"
+        )
+        print(warning_msg)
         # Delete any previous Ryzen AI installation in this environment
         ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
         if os.path.exists(ryzen_ai_folder):

{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.0.6
+Version: 8.1.0
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.13
@@ -22,16 +22,15 @@ Requires-Dist: pytz
 Requires-Dist: zstandard
 Requires-Dist: fastapi
 Requires-Dist: uvicorn[standard]
-Requires-Dist: openai>=1.81.0
+Requires-Dist: openai<1.97.1,>=1.81.0
 Requires-Dist: transformers<=4.53.2
 Requires-Dist: jinja2
 Requires-Dist: tabulate
 Requires-Dist: sentencepiece
 Requires-Dist: huggingface-hub==0.33.0
-Provides-Extra: oga-hybrid
-Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
-Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
-Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
+Provides-Extra: oga-ryzenai
+Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
+Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
 Provides-Extra: oga-cpu
 Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
 Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
 Requires-Dist: matplotlib; extra == "dev"
 Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
 Requires-Dist: lm-eval[api]; extra == "dev"
+Provides-Extra: oga-hybrid
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
+Provides-Extra: oga-unified
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
 Provides-Extra: oga-hybrid-minimal
-Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
 Provides-Extra: oga-cpu-minimal
 Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
+Provides-Extra: oga-npu-minimal
+Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
 Provides-Extra: llm
 Requires-Dist: lemonade-sdk[dev]; extra == "llm"
 Provides-Extra: llm-oga-cpu
 Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
+Provides-Extra: llm-oga-npu
+Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
+Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
+Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
+Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
+Provides-Extra: llm-oga-hybrid
+Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
+Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
+Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
+Provides-Extra: llm-oga-unified
+Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
 Provides-Extra: llm-oga-igpu
 Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
 Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
 Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
 Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
 Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
-Provides-Extra: llm-oga-npu
-Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
-Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
-Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
-Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
-Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
-Provides-Extra: llm-oga-hybrid
-Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
-Provides-Extra: llm-oga-unified
-Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
 Dynamic: author-email
 Dynamic: description
 Dynamic: description-content-type
@@ -174,7 +181,7 @@ lemonade-server list
 ## Model Library
-Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
+Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
 You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
 <p align="center">
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
 print(completion.choices[0].message.content)
 ```
-For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
+For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
 ## Beyond an LLM Server
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
 - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
 - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
+## FAQ
+To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
 ## Contributing
 We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).

{lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD RENAMED Viewed

@@ -4,14 +4,14 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
 lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=0UVFH05U_SSITAGCT0SSjcWSJnwORBaDn5ZSlvquMo8,22
+lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
 lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
 lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
 lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
-lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
+lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
 lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
 lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
 lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
@@ -32,13 +32,13 @@ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
 lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
 lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
-lemonade/tools/llamacpp/bench.py,sha256=GBUGOrcDUJdREAIM7GGs4VschqUe-mE_1-MbSUaDjic,4776
+lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
 lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
-lemonade/tools/llamacpp/utils.py,sha256=35eir8PKtxVUDehgb2DJ9hUI0uSjijQgoDK6scmLl1E,22390
+lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
-lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
-lemonade/tools/oga/utils.py,sha256=2N1htWM8QKp5g8nHPvk-w9ZYknSc6fWGXcACRkhsXic,16465
+lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
+lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
 lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
@@ -47,26 +47,26 @@ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTgu
 lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
-lemonade/tools/server/serve.py,sha256=SXc0qSh-jKS72GlUsuksT7Lov8p3FatgbbycNmHsUfM,57465
+lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
 lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=JmH9LRGB4HGNCxH14owBrUNNBlzx3cVvB3JJ-xJqDqc,26453
-lemonade/tools/server/static/webapp.html,sha256=8khNmsiy4UdjJDkJW3cFeJkmXUR8RQucvCuuka5SNrQ,36230
+lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
+lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
-lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.6.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
-lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
-lemonade_server/model_manager.py,sha256=FfF3z4IpMZqMk_yIo2LHiE76xg7ybROHvi6lcx-0gvE,10754
-lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
-lemonade_server/server_models.json,sha256=wVyjusS5KkOhlQIl1tCnTR4YYbVm7mLU2rHSFk_39hI,7890
-lemonade_sdk-8.0.6.dist-info/METADATA,sha256=g7dOWZPRb0PEyK4UZpVBPnm1LGfZGlkjrdcNfJ_DO_g,15230
-lemonade_sdk-8.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.6.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.6.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.6.dist-info/RECORD,,
+lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
+lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
+lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
+lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
+lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
+lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
+lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.1.0.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -57,7 +57,7 @@ def serve(
     log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
     # Hidden environment variable to enable input truncation (experimental feature)
-    truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
+    truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
     # Start the server
     serve_kwargs = {

lemonade_server/model_manager.py CHANGED Viewed

@@ -7,6 +7,7 @@ from importlib.metadata import distributions
 from lemonade_server.pydantic_models import PullConfig
 from lemonade.cache import DEFAULT_CACHE_DIR
 from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
+from lemonade.common.network import custom_snapshot_download
 USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
@@ -175,7 +176,7 @@ class ModelManager:
             if "gguf" in checkpoint_to_download.lower():
                 download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
             else:
-                huggingface_hub.snapshot_download(repo_id=checkpoint_to_download)
+                custom_snapshot_download(checkpoint_to_download)
             # Register the model in user_models.json, creating that file if needed
             # We do this registration after the download so that we don't register
@@ -233,8 +234,8 @@ class ModelManager:
         try:
             # Get the local path using snapshot_download with local_files_only=True
-            snapshot_path = huggingface_hub.snapshot_download(
-                repo_id=base_checkpoint, local_files_only=True
+            snapshot_path = custom_snapshot_download(
+                base_checkpoint, local_files_only=True
             )
             # Navigate up to the model directory (parent of snapshots directory)

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, Union, List, Any
+from typing import Optional, Union, List
 from pydantic import BaseModel
@@ -18,9 +18,6 @@ class LoadConfig(BaseModel):
     model_name: str
     checkpoint: Optional[str] = None
     recipe: Optional[str] = None
-    # Indicates the maximum prompt length allowed for that specific
-    # checkpoint + recipe combination
-    max_prompt_length: Optional[int] = None
     # Indicates whether the model is a reasoning model, like DeepSeek
     reasoning: Optional[bool] = False
     # Indicates which Multimodal Projector (mmproj) file to use

lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.0__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.6py3-none-any.whl → 8.1.0py3-none-any.whl