PyPI - lemonade-sdk - Versions diffs - 8.0.1__py3-none-any.whl → 8.0.3__py3-none-any.whl - Mend

lemonade-sdk 8.0.1py3-none-any.whl → 8.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (20) hide show

lemonade/cli.py +2 -2
lemonade/profilers/profiler.py +4 -1
lemonade/tools/oga/load.py +2 -8
lemonade/tools/prompt.py +21 -6
lemonade/tools/report/table.py +89 -0
lemonade/tools/server/llamacpp.py +18 -7
lemonade/tools/server/static/webapp.html +36 -3
lemonade/tools/server/tray.py +44 -15
lemonade/version.py +1 -1
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/METADATA +29 -25
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/RECORD +20 -20
lemonade_server/cli.py +14 -12
lemonade_server/model_manager.py +111 -34
lemonade_server/pydantic_models.py +1 -0
lemonade_server/server_models.json +8 -0
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/top_level.txt +0 -0

lemonade/cli.py CHANGED Viewed

@@ -90,9 +90,9 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
     )
     profiler_instances = [
-        profiler(global_args[profiler.unique_name])
+        profiler(global_args[profiler.unique_name.replace("-", "_")])
         for profiler in profilers
-        if global_args.get(profiler.unique_name, None) is not None
+        if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
     ]
     if len(evaluation_tools) > 0:

lemonade/profilers/profiler.py CHANGED Viewed

@@ -48,7 +48,10 @@ class Profiler(abc.ABC):
         This method is called so that the profiler can create its output files.
         The state is passed so that build info can be gathered and stats can be written.
         The timestamp can be used for filename in current working directory.
-        The start times contain a list of tools and start times.
+        The start times parameter is a dict with the keys being the tools names and
+        the values being the time the tool started.  There is an initial "warmup" key
+        that has a start time before the first tool and a "cool down" key that contains the
+        time when the last tool ended.
         """

lemonade/tools/oga/load.py CHANGED Viewed

@@ -1,12 +1,6 @@
 # onnxruntime_genai is not lint-friendly yet and PyLint can't
 # find any of the class methods
 # pylint: disable=no-member
-#
-# Model builder constraints:
-#   11/10/24 Need transformers <4.45.0 OR onnxruntime-genai 0.5.0 (which must be built from source)
-#   (transformers v4.45 changes the format of the tokenizer.json file which will be supported in
-#   onnxruntime-genai 0.5)
-#
 import argparse
 import os
@@ -51,8 +45,8 @@ def import_error_heler(e: Exception):
     """
     raise ImportError(
         f"{e}\n Please install lemonade-sdk with "
-        "one of the llm-oga extras, for example:\n"
-        "pip install lemonade-sdk[llm-oga-cpu]\n"
+        "one of the oga extras, for example:\n"
+        "pip install lemonade-sdk[dev,oga-cpu]\n"
         "See https://lemonade_server.ai/install_options.html for details"
     )

lemonade/tools/prompt.py CHANGED Viewed

@@ -176,12 +176,21 @@ class LLMPrompt(Tool):
         input_ids = tokenizer(prompt, return_tensors="pt").input_ids
         if isinstance(input_ids, (list, str)):
-            # OGA models return a list of tokens
+            # OGA models return a list of tokens (older versions)
             # Our llama.cpp adapter returns a string
             len_tokens_in = len(input_ids)
-        else:
+        elif hasattr(input_ids, "shape"):
             # HF models return a 2-D tensor
-            len_tokens_in = input_ids.shape[1]
+            # OGA models with newer versions may return numpy arrays
+            if len(input_ids.shape) == 1:
+                # 1-D array from newer OGA versions
+                len_tokens_in = len(input_ids)
+            else:
+                # 2-D tensor from HF models
+                len_tokens_in = input_ids.shape[1]
+        else:
+            # Fallback: try to get length directly
+            len_tokens_in = len(input_ids)
         len_tokens_out = []
         response_texts = []
@@ -202,9 +211,15 @@ class LLMPrompt(Tool):
                 random_seed += 1
             # Flatten the input and response
-            input_ids_array = (
-                input_ids if isinstance(input_ids, (list, str)) else input_ids[0]
-            )
+            if isinstance(input_ids, (list, str)):
+                input_ids_array = input_ids
+            elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
+                # 1-D array from newer OGA versions - already flat
+                input_ids_array = input_ids
+            else:
+                # 2-D tensor from HF models - take first row
+                input_ids_array = input_ids[0]
             response_array = response if isinstance(response, str) else response[0]
             # Separate the prompt from the response

lemonade/tools/report/table.py CHANGED Viewed

@@ -7,6 +7,7 @@ from tabulate import tabulate
 import lemonade.common.build as build
 import lemonade.common.filesystem as fs
 from lemonade.cache import Keys
+from lemonade.tools.accuracy import LMEvalHarness
 from lemonade.tools.huggingface.bench import HuggingfaceBench
 from lemonade.tools.llamacpp.bench import LlamaCppBench
 from lemonade.tools.mmlu import AccuracyMMLU
@@ -73,6 +74,7 @@ class SimpleStat(TableColumn):
         align="center",
         omit_if_lean=False,
         wrap=None,
+        stat_fn=None,
     ):
         self.column_header = column_header
         self.stat = stat
@@ -80,6 +82,7 @@ class SimpleStat(TableColumn):
         self.align = align
         self.omit_if_lean = omit_if_lean
         self.wrap = wrap or self.default_wrap
+        self.stat_fn = stat_fn
     def get_str(self, build_stats, lean=False):
         if lean and self.omit_if_lean:
@@ -87,6 +90,8 @@ class SimpleStat(TableColumn):
         data = build_stats.get(self.stat, None)
         if data is None:
             return ""
+        if self.stat_fn:
+            data = self.stat_fn(data)
         cell_str = "\n".join(
             [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
         )
@@ -232,6 +237,47 @@ class AdditionalStat(TableColumn):
         return "\n".join(cell_entry)
+class DictListStat(TableColumn):
+    """
+    A statistic that is a list of dicts and values from a given list of keys will be
+    pulled out of each dict and placed in the cell
+    """
+    def __init__(
+        self,
+        column_header,
+        statistic_name,
+        key_format_list,
+        align="center",
+        omit_if_lean=False,
+        wrap=None,
+    ):
+        self.column_header = column_header
+        self.statistic_name = statistic_name
+        self.key_format_list = key_format_list
+        self.align = align
+        self.omit_if_lean = omit_if_lean
+        self.wrap = wrap or self.default_wrap
+    def get_str(self, build_stats, lean=False):
+        if lean and self.omit_if_lean:
+            return None
+        stat = build_stats.get(self.statistic_name, None)
+        if not stat:
+            return ""
+        cell_entry = []
+        for stat_dict in stat:
+            line = [
+                format_str.format(stat_dict[key])
+                for key, format_str in self.key_format_list
+            ]
+            cell_entry.append(" ".join(line))
+        return "\n".join(cell_entry)
+    def get_keys(self):
+        return [self.statistic_name]
 ################################################################################
 # ABSTRACT BASE CLASS FOR DEFINING A TABLE
 ################################################################################
@@ -349,6 +395,28 @@ class Table(ABC):
                     headers.append(column.column_header)
                     col_align += (column.align,)
+        # Stat column headers
+        stat_columns = self.table_descriptor.get("stat_columns", [])
+        stat_columns_include = []
+        for column in stat_columns:
+            # Check to see that at least one build has data for the column
+            keep_column = False
+            if not (self.lean and column.omit_if_lean):
+                keys = column.get_keys()
+                for build_stats in self.all_stats:
+                    found = [(key in build_stats) for key in keys]
+                    if any(found):
+                        keep_column = True
+                        headers.append(column.column_header)
+                        col_align += (column.align,)
+                        break
+            stat_columns_include.append(keep_column)
+        stat_columns = [
+            column
+            for column, include in zip(stat_columns, stat_columns_include)
+            if include
+        ]
         # Final headers
         last_columns = self.table_descriptor.get("last_columns", [])
         for column in last_columns:
@@ -385,6 +453,12 @@ class Table(ABC):
                     if entry_str is not None:
                         row.append(entry_str)
+            # Per stat columns
+            for entry in stat_columns:
+                entry_str = entry.get_str(build_stats, self.lean)
+                if entry_str is not None:
+                    row.append(entry_str)
             # Final columns
             for entry in last_columns:
                 entry_str = entry.get_str(build_stats, self.lean)
@@ -513,6 +587,12 @@ class LemonadePerfTable(Table):
                     Keys.STD_DEV_TOKENS_PER_SECOND,
                     ".2f",
                 ),
+                SimpleStat(
+                    _wrap("Total Generated Tokens", 9),
+                    Keys.RESPONSE_TOKENS,
+                    "d",
+                    stat_fn=sum,
+                ),
                 SimpleStat(
                     _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
                 ),
@@ -527,7 +607,16 @@ class LemonadePerfTable(Table):
                     ".2f",
                 )
             ],
+            LMEvalHarness: [
+                AdditionalStat(
+                    "EleutherAI\nLM Evaluation",
+                    "^lm_eval_",
+                    "^lm_eval_",
+                    ".1f",
+                )
+            ],
         },
+        "stat_columns": [],
         "last_columns": [
             SimpleStat(
                 "System Info",

lemonade/tools/server/llamacpp.py CHANGED Viewed

@@ -210,15 +210,20 @@ def _log_subprocess_output(
     """
     if process.stdout:
-        for line in iter(process.stdout.readline, ""):
-            if line:
-                line_stripped = line.strip()
-                logging.debug("%s: %s", prefix, line_stripped)
+        try:
+            for line in iter(process.stdout.readline, ""):
+                if line:
+                    line_stripped = line.strip()
+                    logging.debug("%s: %s", prefix, line_stripped)
-                telemetry.parse_telemetry_line(line_stripped)
+                    telemetry.parse_telemetry_line(line_stripped)
-            if process.poll() is not None:
-                break
+                if process.poll() is not None:
+                    break
+        except UnicodeDecodeError as e:
+            logging.debug("Unicode decode error reading subprocess output: %s", str(e))
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            logging.error("Unexpected error reading subprocess output: %s", str(e))
 def _wait_for_load(llama_server_process: subprocess.Popen, port: int):
@@ -287,6 +292,8 @@ def _launch_llama_subprocess(
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        encoding="utf-8",
+        errors="replace",
         bufsize=1,
         env=env,
     )
@@ -383,6 +390,10 @@ def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
             f"Loading {model_config.model_name} on GPU didn't work, re-attempting on CPU"
         )
+        if os.environ.get("LEMONADE_LLAMACPP_NO_FALLBACK"):
+            # Used for testing, when the test should fail if GPU didn't work
+            raise Exception("llamacpp GPU loading failed")
         llama_server_process = _launch_llama_subprocess(
             snapshot_files, use_gpu=False, telemetry=telemetry
         )

lemonade/tools/server/static/webapp.html CHANGED Viewed

@@ -110,20 +110,53 @@
     </footer>
     <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
     <script>    // Tab switching logic
-    function showTab(tab) {
+    function showTab(tab, updateHash = true) {
         document.getElementById('tab-chat').classList.remove('active');
         document.getElementById('tab-models').classList.remove('active');
         document.getElementById('content-chat').classList.remove('active');
         document.getElementById('content-models').classList.remove('active');
         if (tab === 'chat') {
             document.getElementById('tab-chat').classList.add('active');
-            document.getElementById('content-chat').classList.add('active');
+            document.getElementById('content-chat').classList.add('active');
+            if (updateHash) {
+                window.location.hash = 'llm-chat';
+            }
         } else {
             document.getElementById('tab-models').classList.add('active');
-            document.getElementById('content-models').classList.add('active');
+            document.getElementById('content-models').classList.add('active');
+            if (updateHash) {
+                window.location.hash = 'model-management';
+            }
         }
     }
+    // Handle hash changes for anchor navigation
+    function handleHashChange() {
+        const hash = window.location.hash.slice(1); // Remove the # symbol
+        if (hash === 'llm-chat') {
+            showTab('chat', false);
+        } else if (hash === 'model-management') {
+            showTab('models', false);
+        }
+    }
+    // Initialize tab based on URL hash on page load
+    function initializeTabFromHash() {
+        const hash = window.location.hash.slice(1);
+        if (hash === 'llm-chat') {
+            showTab('chat', false);
+        } else if (hash === 'model-management') {
+            showTab('models', false);
+        }
+        // If no hash or unrecognized hash, keep default (chat tab is already active)
+    }
+    // Listen for hash changes
+    window.addEventListener('hashchange', handleHashChange);
+    // Initialize on page load
+    document.addEventListener('DOMContentLoaded', initializeTabFromHash);
     // Toggle Add Model form
     function toggleAddModelForm() {
         const form = document.querySelector('.model-mgmt-register-form');

lemonade/tools/server/tray.py CHANGED Viewed

@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
         """
         webbrowser.open("https://lemonade-server.ai/docs/")
+    def open_llm_chat(self, _, __):
+        """
+        Open the LLM chat in the default web browser.
+        """
+        webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
     def open_model_manager(self, _, __):
         """
         Open the model manager in the default web browser.
         """
-        webbrowser.open(f"http://localhost:{self.port}/")
+        webbrowser.open(f"http://localhost:{self.port}/#model-management")
     def check_server_state(self):
         """
@@ -266,7 +272,7 @@ class LemonadeTray(SystemTray):
             self.logger.error(f"Error changing port: {str(e)}")
             self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
-    def upgrade_to_latest(self, icon, item):
+    def upgrade_to_latest(self, _, __):
         """
         Download and launch the Lemonade Server installer
         """
@@ -281,21 +287,34 @@ class LemonadeTray(SystemTray):
         installer_path = os.path.join(
             tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
         )
+        if os.path.exists(installer_path):
+            os.remove(installer_path)
         # Download the installer
         response = requests.get(self.latest_version_url, stream=True)
         response.raise_for_status()
-        # Save the installer to disk
+        # Save the installer to disk and force write to disk
         with open(installer_path, "wb") as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
+            f.flush()
+            os.fsync(f.fileno())
-        # Launch the installer
-        subprocess.Popen([installer_path], shell=True)
+        # Launch the installer as a completely detached process
+        # subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
+        # subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
+        # close_fds=True - Closes file descriptors to prevent inheritance
+        subprocess.Popen(
+            [installer_path],
+            creationflags=subprocess.DETACHED_PROCESS
+            | subprocess.CREATE_NEW_PROCESS_GROUP,
+            close_fds=True,
+            shell=True,
+            cwd=tempfile.gettempdir(),
+        )
-        # Quit the application
-        self.exit_app(icon, item)
+        # No need to quit the application, the installer will handle it
     def create_menu(self):
         """
@@ -326,16 +345,25 @@ class LemonadeTray(SystemTray):
         # Create menu items for all downloaded models
         model_menu_items = []
-        for model_name, _ in self.downloaded_models.items():
-            # Create a function that returns the lambda to properly capture the variables
-            def create_handler(mod):
-                return lambda icon, item: self.load_llm(icon, item, mod)
+        if not self.downloaded_models:
+            model_menu_items.append(
+                MenuItem(
+                    "No models available: Use the Model Manager to pull models",
+                    None,
+                    enabled=False,
+                )
+            )
+        else:
+            for model_name, _ in self.downloaded_models.items():
+                # Create a function that returns the lambda to properly capture the variables
+                def create_handler(mod):
+                    return lambda icon, item: self.load_llm(icon, item, mod)
-            model_item = MenuItem(model_name, create_handler(model_name))
+                model_item = MenuItem(model_name, create_handler(model_name))
-            # Set checked property instead of modifying the text
-            model_item.checked = model_name == self.loaded_llm
-            model_menu_items.append(model_item)
+                # Set checked property instead of modifying the text
+                model_item.checked = model_name == self.loaded_llm
+                model_menu_items.append(model_item)
         load_submenu = Menu(*model_menu_items)
@@ -378,6 +406,7 @@ class LemonadeTray(SystemTray):
             )
         items.append(MenuItem("Documentation", self.open_documentation))
+        items.append(MenuItem("LLM Chat", self.open_llm_chat))
         items.append(MenuItem("Model Manager", self.open_model_manager))
         items.append(MenuItem("Show Logs", self.show_logs))
         items.append(Menu.SEPARATOR)

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "8.0.1"
1	+ __version__ = "8.0.3"

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.0.1
+Version: 8.0.3
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.12
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
 Requires-Dist: transformers<=4.51.3
 Requires-Dist: jinja2
 Requires-Dist: tabulate
-Requires-Dist: huggingface-hub==0.30.2
+Requires-Dist: sentencepiece
+Requires-Dist: huggingface-hub==0.33.0
+Provides-Extra: oga-hybrid
+Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
+Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
+Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
+Provides-Extra: oga-cpu
+Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
+Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
+Provides-Extra: dev
+Requires-Dist: torch>=2.6.0; extra == "dev"
+Requires-Dist: accelerate; extra == "dev"
+Requires-Dist: datasets; extra == "dev"
+Requires-Dist: pandas>=1.5.3; extra == "dev"
+Requires-Dist: matplotlib; extra == "dev"
+Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
+Requires-Dist: lm-eval[api]; extra == "dev"
 Provides-Extra: oga-hybrid-minimal
-Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
-Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
-Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
+Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
 Provides-Extra: oga-cpu-minimal
-Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
-Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
+Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
 Provides-Extra: llm
-Requires-Dist: torch>=2.6.0; extra == "llm"
-Requires-Dist: accelerate; extra == "llm"
-Requires-Dist: sentencepiece; extra == "llm"
-Requires-Dist: datasets; extra == "llm"
-Requires-Dist: pandas>=1.5.3; extra == "llm"
-Requires-Dist: matplotlib; extra == "llm"
-Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
-Requires-Dist: lm-eval[api]; extra == "llm"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm"
 Provides-Extra: llm-oga-cpu
-Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
-Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
+Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
 Provides-Extra: llm-oga-igpu
 Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
 Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
 Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
-Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
 Provides-Extra: llm-oga-cuda
-Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
-Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
-Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
-Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
+Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
+Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
+Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
 Provides-Extra: llm-oga-npu
 Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
 Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
 Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
 Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
-Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
 Provides-Extra: llm-oga-hybrid
-Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
-Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
+Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
 Provides-Extra: llm-oga-unified
 Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
 Dynamic: author-email

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
 lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
 lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
-lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
+lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=qR-61NMOca8p2Rty8s6xwXQSXLDufw2os6i4zdyqfak,22
+lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -17,7 +17,7 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
 lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
 lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
 lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
-lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
+lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
 lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
 lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
 lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
@@ -26,7 +26,7 @@ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9
 lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
 lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
 lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
-lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
+lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
 lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
 lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
-lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
+lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
 lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
 lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
 lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
-lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
+lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
+lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
 lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
-lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
+lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
 lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
-lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
+lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
-lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
-lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
-lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
-lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
-lemonade_sdk-8.0.1.dist-info/METADATA,sha256=s5q-KKS3Drrxxm1-wGLUP9c0HymN2RgC7PjMqr0biog,8225
-lemonade_sdk-8.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.1.dist-info/RECORD,,
+lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
+lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
+lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
+lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
+lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
+lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.0.3.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -4,7 +4,6 @@ import os
 from typing import Tuple, Optional
 import psutil
 from typing import List
-import subprocess
 # Error codes for different CLI scenarios
@@ -88,23 +87,26 @@ def stop():
         # Terminate the main process first
         process.terminate()
-        # Then terminate all children
+        # Then terminate llama-server child process (known to be stubborn)
+        # We avoid killing other child processes, such as the installer
         for child in children:
-            try:
-                child.terminate()
-            except psutil.NoSuchProcess:
-                pass  # Child already terminated
+            if "llama-server" in child.name():
+                try:
+                    child.terminate()
+                except psutil.NoSuchProcess:
+                    pass  # Child already terminated
         # Wait for main process
         process.wait(timeout=10)
-        # Kill any children that didn't terminate gracefully
+        # Kill llama-server child process if it didn't terminate gracefully
         for child in children:
-            try:
-                if child.is_running():
-                    child.kill()
-            except psutil.NoSuchProcess:
-                pass  # Child already terminated
+            if "llama-server" in child.name():
+                try:
+                    if child.is_running():
+                        child.kill()
+                except psutil.NoSuchProcess:
+                    pass  # Child already terminated
     except psutil.NoSuchProcess:
         # Process already terminated
         pass

lemonade_server/model_manager.py CHANGED Viewed

@@ -102,57 +102,131 @@ class ModelManager:
         """
         return self.filter_models_by_backend(self.downloaded_models)
+    def identify_gguf_models(
+        self, checkpoint: str, variant: str, mmproj: str
+    ) -> tuple[dict, list[str]]:
+        """
+        Identifies the GGUF model files in the repository that match the variant.
+        """
+        hint = """
+        The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
+        The VARIANT format can be one of several types:
+        1. Full filename: exact file to download
+        2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
+        3. Quantization variant: find a single file ending with the variant name (case insensitive)
+        4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
+        Examples:
+        - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
+        - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
+        - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
+        - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
+        """
+        repo_files = huggingface_hub.list_repo_files(checkpoint)
+        sharded_files = []
+        # (case 1) If variant ends in .gguf, use it directly
+        if variant and variant.endswith(".gguf"):
+            variant_name = variant
+            if variant_name not in repo_files:
+                raise ValueError(
+                    f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
+                )
+        # (case 2) If no variant is provided, get the first .gguf file in the repository
+        elif variant is None:
+            all_variants = [
+                f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
+            ]
+            if len(all_variants) == 0:
+                raise ValueError(
+                    f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
+                )
+            variant_name = all_variants[0]
+        else:
+            # (case 3) Find a single file ending with the variant name (case insensitive)
+            end_with_variant = [
+                f
+                for f in repo_files
+                if f.lower().endswith(f"{variant}.gguf".lower())
+                and "mmproj" not in f.lower()
+            ]
+            if len(end_with_variant) == 1:
+                variant_name = end_with_variant[0]
+            elif len(end_with_variant) > 1:
+                raise ValueError(
+                    f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
+                )
+            # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
+            else:
+                sharded_files = [
+                    f
+                    for f in repo_files
+                    if f.endswith(".gguf")
+                    and f.lower().startswith(f"{variant}/".lower())
+                ]
+                if not sharded_files:
+                    raise ValueError(
+                        f"No .gguf files found for variant {variant}. {hint}"
+                    )
+                # Sort to ensure consistent ordering
+                sharded_files.sort()
+                # Use first file as primary (this is how llamacpp handles it)
+                variant_name = sharded_files[0]
+        core_files = {"variant": variant_name}
+        # If there is a mmproj file, add it to the patterns
+        if mmproj:
+            if mmproj not in repo_files:
+                raise ValueError(
+                    f"The provided mmproj file {mmproj} was not found in {checkpoint}."
+                )
+            core_files["mmproj"] = mmproj
+        return core_files, sharded_files
     def download_gguf(self, model_config: PullConfig) -> dict:
         """
         Downloads the GGUF file for the given model configuration.
+        For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
+        will be downloaded but only the first file will be returned for loading.
         """
-        # The variant parameter can be either:
-        # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
-        # 2. A quantization variant (e.g. "Q4_0")
-        # This code handles both cases by constructing the appropriate filename
+        # This code handles all cases by constructing the appropriate filename or pattern
         checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
-        hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
-        variant_name = (
-            variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
-        )
-        # If there is a mmproj file, add it to the patterns
-        expected_files = {"variant": variant_name}
-        if model_config.mmproj:
-            expected_files["mmproj"] = model_config.mmproj
+        # Identify the GGUF model files in the repository that match the variant
+        core_files, sharded_files = self.identify_gguf_models(
+            checkpoint, variant, model_config.mmproj
+        )
         # Download the files
         snapshot_folder = huggingface_hub.snapshot_download(
             repo_id=checkpoint,
-            allow_patterns=list(expected_files.values()),
+            allow_patterns=list(core_files.values()) + sharded_files,
         )
-        # Make sure we downloaded something
-        # If we didn't that can indicate that no patterns from allow_patterns match
-        # any files in the HF repo
-        if not os.path.exists(snapshot_folder):
-            raise ValueError(
-                "No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
-                "Try again, providing the full filename of your target .gguf file as the variant."
-                " For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
-                "qwen2.5-coder-3b-instruct-q4_0.gguf"
-            )
-        # Ensure we downloaded all expected files while creating a dict of the downloaded files
-        snapshot_files = {}
-        for file in expected_files:
-            snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
-            if expected_files[file].lower() not in [
-                name.lower() for name in os.listdir(snapshot_folder)
-            ]:
+        # Ensure we downloaded all expected files
+        for file in list(core_files.values()) + sharded_files:
+            expected_path = os.path.join(snapshot_folder, file)
+            if not os.path.exists(expected_path):
                 raise ValueError(
                     f"Hugging Face snapshot download for {model_config.checkpoint} "
-                    f"expected file {expected_files[file]} not found in {snapshot_folder}"
+                    f"expected file {file} not found at {expected_path}"
                 )
-        # Return a dict that points to the snapshot path of the downloaded GGUF files
-        return snapshot_files
+        # Return a dict of the full path of the core GGUF files
+        return {
+            file_name: os.path.join(snapshot_folder, file_path)
+            for file_name, file_path in core_files.items()
+        }
     def download_models(
         self,
@@ -249,6 +323,9 @@ class ModelManager:
                 user_models[model_name] = new_user_model
+                # Ensure the cache directory exists before writing the file
+                os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
                 with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
                     json.dump(user_models, fp=file)

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -62,6 +62,7 @@ class ChatCompletionRequest(BaseModel):
     tools: list[dict] | None = None
     max_tokens: int | None = None
     max_completion_tokens: int | None = None
+    response_format: dict | None = None
 class ResponsesRequest(BaseModel):

lemonade_server/server_models.json CHANGED Viewed

@@ -203,5 +203,13 @@
         "reasoning": false,
         "suggested": true,
         "labels": ["vision"]
+    },
+    "Llama-4-Scout-17B-16E-Instruct-GGUF": {
+        "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
+        "mmproj": "mmproj-F16.gguf",
+        "recipe": "llamacpp",
+        "reasoning": false,
+        "suggested": true,
+        "labels": ["vision"]
     }
 }

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/NOTICE.md RENAMED Viewed

File without changes

{lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

lemonade-sdk 8.0.1__py3-none-any.whl → 8.0.3__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.1py3-none-any.whl → 8.0.3py3-none-any.whl