PyPI - lemonade-sdk - Versions diffs - 7.0.1__py3-none-any.whl → 7.0.2__py3-none-any.whl - Mend

lemonade-sdk 7.0.1py3-none-any.whl → 7.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (15) hide show

lemonade/cli.py CHANGED Viewed

@@ -19,6 +19,7 @@ import lemonade.cache as cache
 from lemonade.tools.mmlu import AccuracyMMLU
 from lemonade.tools.humaneval import AccuracyHumaneval
 from lemonade.tools.perplexity import AccuracyPerplexity
+from lemonade.tools.accuracy import LMEvalHarness
 from lemonade.tools.prompt import LLMPrompt
 from lemonade.tools.quark.quark_load import QuarkLoad
 from lemonade.tools.quark.quark_quantize import QuarkQuantize
@@ -36,6 +37,7 @@ def main():
         AccuracyMMLU,
         AccuracyHumaneval,
         AccuracyPerplexity,
+        LMEvalHarness,
         LLMPrompt,
         HuggingfaceBench,
         OgaBench,

lemonade/tools/accuracy.py ADDED Viewed

@@ -0,0 +1,335 @@
+import argparse
+import json
+import os
+import socket
+import subprocess
+import sys
+import time
+from typing import Optional
+import requests
+from lemonade.state import State
+from lemonade.tools import Tool
+import lemonade.common.printing as printing
+import lemonade.common.build as build
+from lemonade.tools.server.thread_utils import ServerRunner
+def is_port_in_use(port, host="localhost"):
+    """
+    Check if a port is in use
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex((host, port)) == 0
+class LMEvalHarness(Tool):
+    """
+    Tool for evaluating LLMs using lm-eval-harness on industry standard benchmarks
+    like MMLU, GSM8k, and more. See docs/lemonade/lm_eval.md for more details.
+    """
+    unique_name = "lm-eval-harness"
+    def __init__(self):
+        super().__init__(
+            monitor_message="Evaluate model accuracy using ElutherAI's lm-eval-harness"
+        )
+        self.status_stats = []
+        self.server_runner = None
+    @staticmethod
+    def parser(add_help: bool = True) -> argparse.ArgumentParser:
+        parser = __class__.helpful_parser(
+            short_description="Evaluate model using lm-eval-harness",
+            add_help=add_help,
+        )
+        parser.add_argument(
+            "--task",
+            type=str,
+            required=True,
+            help="Task(s) to evaluate on (e.g., gsm8k, mmlu)",
+        )
+        parser.add_argument(
+            "--server-port", type=int, default=8000, help="Port to use for the server"
+        )
+        parser.add_argument(
+            "--num-fewshot",
+            type=int,
+            default=0,
+            help="Number of examples in few-shot prompts",
+        )
+        parser.add_argument(
+            "--limit",
+            type=int,
+            default=None,
+            help="Limit the number of examples per task",
+        )
+        parser.add_argument(
+            "--log-samples",
+            action="store_true",
+            help="Log samples for each task to log file",
+        )
+        parser.add_argument(
+            "--output-path",
+            type=str,
+            default=None,
+            help="Path to save evaluation results",
+        )
+        return parser
+    def _process_results(self, results_dir, state):
+        """Process evaluation results and save to state stats"""
+        if not os.path.exists(results_dir) or not os.path.isdir(results_dir):
+            printing.log_warning(f"Results directory not found at {results_dir}")
+            return
+        model_dirs = [
+            d
+            for d in os.listdir(results_dir)
+            if os.path.isdir(os.path.join(results_dir, d))
+        ]
+        if not model_dirs:
+            printing.log_warning(f"No model directories found in {results_dir}")
+            return
+        model_dir = os.path.join(results_dir, model_dirs[0])
+        printing.log_info(f"Found model directory: {model_dir}")
+        # Find the results JSON file with timestamp
+        results_files = [
+            f
+            for f in os.listdir(model_dir)
+            if f.startswith("results_") and f.endswith(".json")
+        ]
+        if not results_files:
+            printing.log_warning(f"No results files found in {model_dir}")
+            return
+        # Sort by timestamp
+        results_files.sort(reverse=True)
+        results_file_path = os.path.join(model_dir, results_files[0])
+        printing.log_info(f"Processing results from {results_file_path}")
+        # Read and process results
+        try:
+            with open(results_file_path, "r", encoding="utf-8") as f:
+                results = json.load(f)
+            # Extract and display metrics
+            if "results" in results:
+                for task_name, metrics in results["results"].items():
+                    printing.log_info(f"Results for {task_name}:")
+                    for metric, value in metrics.items():
+                        if isinstance(value, (int, float)) and not metric.startswith(
+                            "alias"
+                        ):
+                            # Format metric name for stats
+                            clean_metric = metric.replace(",", "_")
+                            stat_name = f"lm_eval_{task_name}_{clean_metric}"
+                            # Save to state stats as percentage
+                            state.save_stat(stat_name, float(value) * 100)
+                            state.save_stat(f"{stat_name}_units", "%")
+                            self.status_stats.append(stat_name)
+                            printing.log_info(
+                                f"  {metric}: {value:.4f} ({value*100:.2f}%)"
+                            )
+                # Save summary metrics if available
+                avg_metrics = {}
+                if "higher_is_better" in results:
+                    for metric_type in results["higher_is_better"].values():
+                        for metric in metric_type.keys():
+                            if metric not in avg_metrics:
+                                avg_metrics[metric] = []
+                for task_metrics in results["results"].values():
+                    for metric, value in task_metrics.items():
+                        if isinstance(value, (int, float)) and not metric.startswith(
+                            "alias"
+                        ):
+                            base_metric = metric.split(",")[0]
+                            if base_metric in avg_metrics:
+                                avg_metrics[base_metric].append(value)
+                # Calculate and save averages
+                for metric, values in avg_metrics.items():
+                    if values:
+                        avg_value = sum(values) / len(values)
+                        stat_name = f"lm_eval_average_{metric}"
+                        state.save_stat(stat_name, float(avg_value) * 100)
+                        state.save_stat(f"{stat_name}_units", "%")
+                        self.status_stats.append(stat_name)
+                        printing.log_info(
+                            f"Average {metric}: {avg_value:.4f} ({avg_value*100:.2f}%)"
+                        )
+        except (IOError, json.JSONDecodeError) as e:
+            printing.log_error(f"Error processing results: {e}")
+    def run(
+        self,
+        state: State,
+        task: str,
+        server_port: int = 8000,
+        server_host: str = "localhost",
+        num_fewshot: int = 0,
+        limit: Optional[int] = None,
+        log_samples: bool = False,
+        output_path: Optional[str] = None,
+    ) -> State:
+        model = state.model
+        tokenizer = state.tokenizer
+        if model is None or tokenizer is None:
+            raise ValueError(
+                "Model and tokenizer must be loaded in state before running lm-eval-harness"
+            )
+        # Set up output path
+        if output_path is None:
+            output_path = os.path.join(
+                build.output_dir(state.cache_dir, state.build_name), "lm_eval_results"
+            )
+        os.makedirs(output_path, exist_ok=True)
+        # Check if port is already in use
+        if is_port_in_use(server_port, server_host):
+            error_msg = (
+                f"Port {server_port} is already in use. "
+                "Please close all applications using this port and try again."
+            )
+            printing.log_error(error_msg)
+            raise RuntimeError(error_msg)
+        # Retroactively determine recipe based on model type to select correct iterator
+        # The model is already loaded in server, so we only need recipe for iterator selection
+        checkpoint = getattr(state, "checkpoint", "unknown")
+        if "OrtGenaiModel" in str(type(model)):
+            recipe = "oga-"
+        else:
+            recipe = "unknown"
+        # Start the server thread
+        self.server_runner = ServerRunner(
+            model=model,
+            tokenizer=tokenizer,
+            checkpoint=checkpoint,
+            recipe=recipe,
+            host=server_host,
+            port=server_port,
+        )
+        self.server_runner.start()
+        # Wait for server initialization
+        printing.log_info("Waiting for server initialization...")
+        # Wait for server to start and be responsive
+        server_url = f"http://{server_host}:{server_port}"
+        max_retries = 30
+        retry_delay = 1
+        printing.log_info(f"Checking if server is available at {server_url}...")
+        for i in range(max_retries):
+            try:
+                response = requests.get(f"{server_url}/api/v0/health", timeout=2)
+                if response.status_code == 200:
+                    printing.log_info(f"Server is ready after {i+1} attempts")
+                    break
+            except requests.exceptions.RequestException:
+                if i < max_retries - 1:
+                    time.sleep(retry_delay)
+                else:
+                    printing.log_error(
+                        f"Server did not start after {max_retries} attempts"
+                    )
+                    raise RuntimeError("Failed to start the server")
+        # Build API URL
+        results_file = os.path.join(output_path, f"{task}_results")
+        printing.log_info(f"Running lm-eval-harness on {task}...")
+        # Build lm-eval-harness command
+        cmd = [
+            "lm_eval",
+            "--model",
+            "local-completions",
+            "--tasks",
+            task,
+            "--model_args",
+            (
+                f"model={checkpoint},"
+                f"base_url={server_url}/api/v0/completions,"
+                f"num_concurrent=1,"
+                f"max_retries=5,"
+                f"retry_timeout=10,"
+                f"tokenized_requests=False"
+            ),
+            "--num_fewshot",
+            str(num_fewshot),
+            "--output_path",
+            results_file,
+        ]
+        if limit is not None:
+            cmd.extend(["--limit", str(limit)])
+        if log_samples:
+            cmd.extend(["--log_samples"])
+        try:
+            # On Windows, set UTF-8 mode to handle Unicode output
+            env = os.environ.copy()
+            if sys.platform == "win32":
+                env["PYTHONIOENCODING"] = "utf-8"
+            # Execute lm-eval-harness command
+            result = subprocess.run(
+                cmd, check=True, text=True, capture_output=True, env=env
+            )
+            # Log relevant output and skip any parts that might cause encoding issues
+            try:
+                printing.log_info(result.stdout)
+            except UnicodeEncodeError:
+                printing.log_info(
+                    "Results obtained successfully but couldn't display due to encoding issues"
+                )
+            # Process results from the correct location
+            results_dir = os.path.join(output_path, f"{task}_results")
+            self._process_results(results_dir, state)
+        except subprocess.CalledProcessError as e:
+            printing.log_error(f"Error running lm-eval-harness: {e}")
+            printing.log_error(f"stderr: {e.stderr}")
+        except (IOError, ValueError, requests.RequestException) as e:
+            printing.log_error(f"Error: {e}")
+        finally:
+            # Shut down server
+            if self.server_runner and self.server_runner.is_alive():
+                printing.log_info("Shutting down server runner...")
+                self.server_runner.shutdown()
+            # Make sure we don't have any lingering references to state's model/tokenizer
+            # that could prevent garbage collection
+            self.server_runner = None
+        return state

lemonade/tools/server/llamacpp.py CHANGED Viewed

@@ -16,9 +16,9 @@ from openai import OpenAI
 from lemonade_server.model_manager import ModelManager
 from lemonade.tools.server.pydantic_models import ChatCompletionRequest
+from lemonade.tools.server.port_utils import find_free_port
 LLAMA_VERSION = "b5543"
-LLAMA_SERVER_PORT = "8081"
 LLAMA_SERVER_EXE_DIR = os.path.join(
     os.path.dirname(sys.executable),
@@ -43,6 +43,23 @@ class LlamaTelemetry:
         self.tokens_per_second = None
         self.prompt_eval_time = None
         self.eval_time = None
+        self.port = None
+    def choose_port(self):
+        """
+        Users probably don't care what port we start llama-server on, so let's
+        search for an empty port
+        """
+        self.port = find_free_port()
+        if self.port is None:
+            msg = "Failed to find an empty port to start llama-server on"
+            logging.error(msg)
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=msg,
+            )
     def parse_telemetry_line(self, line: str):
         """
@@ -128,10 +145,12 @@ def _log_subprocess_output(
                 break
-def _wait_for_load(llama_server_process: subprocess.Popen, fail_message: str):
+def _wait_for_load(
+    llama_server_process: subprocess.Popen, port: int, fail_message: str
+):
     status_code = None
     while not llama_server_process.poll() and status_code != 200:
-        health_url = f"http://localhost:{LLAMA_SERVER_PORT}/health"
+        health_url = f"http://localhost:{port}/health"
         try:
             health_response = requests.get(health_url)
         except requests.exceptions.ConnectionError:
@@ -152,12 +171,17 @@ def _launch_llama_subprocess(
     Launch llama server subprocess with GPU or CPU configuration
     """
+    # Find a port, and save it in the telemetry object for future reference
+    # by other functions
+    telemetry.choose_port()
     base_command = [
         LLAMA_SERVER_EXE_PATH,
         "-m",
         model_path,
         "--port",
-        LLAMA_SERVER_PORT,
+        str(telemetry.port),
+        "--jinja",
     ]
     # Configure GPU layers: 99 for GPU, 0 for CPU-only
@@ -227,6 +251,7 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
     # Check the /health endpoint until GPU server is ready
     _wait_for_load(
         llama_server_process,
+        telemetry.port,
         f"Loading {model_reference} on GPU didn't work, re-attempting on CPU",
     )
@@ -239,6 +264,7 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
         # Check the /health endpoint until CPU server is ready
         _wait_for_load(
             llama_server_process,
+            telemetry.port,
             f"Loading {model_reference} on CPU didn't work",
         )
@@ -254,7 +280,7 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
 def chat_completion(
     chat_completion_request: ChatCompletionRequest, telemetry: LlamaTelemetry
 ):
-    base_url = f"http://127.0.0.1:{LLAMA_SERVER_PORT}/v1"
+    base_url = f"http://127.0.0.1:{telemetry.port}/v1"
     client = OpenAI(
         base_url=base_url,
         api_key="lemonade",

lemonade/tools/server/port_utils.py ADDED Viewed

@@ -0,0 +1,57 @@
+import socketserver
+import sys
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+def find_free_port():
+    """
+    Scans for an unoccupied TCP port
+    Returns the port number as an int on success
+    Returns None if no port can be found
+    """
+    try:
+        with socketserver.TCPServer(("localhost", 0), None) as s:
+            return s.server_address[1]
+    # pylint: disable=broad-exception-caught
+    except Exception:
+        return None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Code here will run when the application starts up
+    # Check if console can handle Unicode by testing emoji encoding
+    try:
+        if sys.stdout.encoding:
+            "🍋".encode(sys.stdout.encoding)
+        use_emojis = True
+    except (UnicodeEncodeError, AttributeError):
+        use_emojis = False
+    if use_emojis:
+        logging.info(
+            "\n"
+            "\n"
+            "🍋  Lemonade Server Ready!\n"
+            f"🍋    Open http://localhost:{app.port} in your browser for:\n"
+            "🍋      💬 chat\n"
+            "🍋      💻 model management\n"
+            "🍋      📄 docs\n"
+        )
+    else:
+        logging.info(
+            "\n"
+            "\n"
+            "[Lemonade]  Lemonade Server Ready!\n"
+            f"[Lemonade]    Open http://localhost:{app.port} in your browser for:\n"
+            "[Lemonade]      chat\n"
+            "[Lemonade]      model management\n"
+            "[Lemonade]      docs\n"
+        )
+    yield

lemonade/tools/server/serve.py CHANGED Viewed

@@ -8,7 +8,6 @@ import traceback
 from typing import Optional, Union
 import json
 import subprocess
-from contextlib import asynccontextmanager
 from pathlib import Path
 from fastapi import FastAPI, HTTPException, status, Request
@@ -16,6 +15,8 @@ from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 import uvicorn
+from uvicorn.config import Config
+from uvicorn.server import Server as UvicornServer
 from transformers import TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
 from tabulate import tabulate
@@ -57,7 +58,7 @@ from lemonade.tools.server.pydantic_models import (
 )
 from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
 from lemonade.tools.server.instructions import get_instructions_html
+from lemonade.tools.server.port_utils import lifespan
 DEFAULT_PORT = 8000
 DEFAULT_LOG_LEVEL = "info"
@@ -243,15 +244,22 @@ class Server(ManagementTool):
         return parser
-    def run(
+    def _setup_server_common(
         self,
-        # ManagementTool has a required cache_dir arg, but
-        # we always use the default cache directory
-        _=None,
-        port: int = DEFAULT_PORT,
-        log_level: str = DEFAULT_LOG_LEVEL,
+        port: int,
         truncate_inputs: bool = False,
+        log_level: str = DEFAULT_LOG_LEVEL,
+        threaded_mode: bool = False,
     ):
+        """
+        Common setup logic shared between run() and run_in_thread().
+        Args:
+            port: Port number for the server
+            truncate_inputs: Whether to truncate inputs if they exceed max length
+            log_level: Logging level to configure
+            threaded_mode: Whether this is being set up for threaded execution
+        """
         # Store truncation settings
         self.truncate_inputs = truncate_inputs
@@ -265,22 +273,27 @@ class Server(ManagementTool):
         logging.trace = trace
-        # Configure logging to match uvicorn's format
-        logging_level = getattr(logging, log_level.upper())
-        logging.basicConfig(
-            level=logging_level,
-            format="%(levelprefix)s %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S",
-        )
+        # Configure logging based on mode
+        if threaded_mode:
+            # Configure logging for warning level (to reduce noise in threaded execution)
+            logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
+        else:
+            # Configure logging to match uvicorn's format
+            logging_level = getattr(logging, log_level.upper())
+            logging.basicConfig(
+                level=logging_level,
+                format="%(levelprefix)s %(message)s",
+                datefmt="%Y-%m-%d %H:%M:%S",
+            )
-        # Add uvicorn's log formatter
-        logging.root.handlers[0].formatter = uvicorn.logging.DefaultFormatter(
-            fmt="%(levelprefix)s %(message)s",
-            use_colors=True,
-        )
+            # Add uvicorn's log formatter
+            logging.root.handlers[0].formatter = uvicorn.logging.DefaultFormatter(
+                fmt="%(levelprefix)s %(message)s",
+                use_colors=True,
+            )
-        # Ensure the log level is properly set
-        logging.getLogger().setLevel(logging_level)
+            # Ensure the log level is properly set
+            logging.getLogger().setLevel(logging_level)
         # Update debug logging state after setting log level
         self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
@@ -293,8 +306,62 @@ class Server(ManagementTool):
         # that the lifespan can access it
         self.app.port = port
+    def run(
+        self,
+        # ManagementTool has a required cache_dir arg, but
+        # we always use the default cache directory
+        _=None,
+        port: int = DEFAULT_PORT,
+        log_level: str = DEFAULT_LOG_LEVEL,
+        truncate_inputs: bool = False,
+    ):
+        # Common setup
+        self._setup_server_common(
+            port=port,
+            truncate_inputs=truncate_inputs,
+            log_level=log_level,
+            threaded_mode=False,
+        )
         uvicorn.run(self.app, host="localhost", port=port, log_level=log_level)
+    def run_in_thread(
+        self,
+        port: int = DEFAULT_PORT,
+        host: str = "localhost",
+        log_level: str = "warning",
+        truncate_inputs: bool = False,
+    ):
+        """
+        Set up the server for running in a thread.
+        Returns a uvicorn server instance that can be controlled externally.
+        """
+        # Common setup
+        self._setup_server_common(
+            port=port,
+            truncate_inputs=truncate_inputs,
+            log_level=log_level,
+            threaded_mode=True,
+        )
+        class CustomServer(UvicornServer):
+            """Custom Uvicorn server that can be properly shutdown from another thread"""
+            def install_signal_handlers(self):
+                pass
+        # Configure the server
+        config = Config(
+            app=self.app,
+            host=host,
+            port=port,
+            log_level=log_level,
+            log_config=None,
+        )
+        # Create and return the uvicorn server
+        return CustomServer(config=config)
     async def _show_telemetry(self):
         """
         Show telemetry data in debug mode.
@@ -1241,6 +1308,8 @@ class Server(ManagementTool):
                     "status": "success",
                     "message": f"Loaded model: {model_reference}",
                 }
+            except HTTPException:
+                raise
             except Exception:  # pylint: disable=broad-exception-caught
                 self.model_load_failure(model_reference)
@@ -1339,22 +1408,5 @@ class Server(ManagementTool):
             return response
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Code here will run when the application starts up
-    logging.info(
-        "\n"
-        "\n"
-        "🍋  Lemonade Server Ready!\n"
-        f"🍋    Open http://localhost:{app.port} in your browser for:\n"
-        "🍋      💬 chat\n"
-        "🍋      💻 model management\n"
-        "🍋      📄 docs\n"
-    )
-    yield
 # This file was originally licensed under Apache 2.0. It has been modified.
 # Modifications Copyright (c) 2025 AMD

lemonade/tools/server/thread_utils.py ADDED Viewed

@@ -0,0 +1,87 @@
+import threading
+import logging
+from lemonade.tools.server.serve import Server
+class ServerRunner(threading.Thread):
+    """
+    Thread class for running the Lemonade Server with a loaded model.
+    """
+    def __init__(
+        self, model, tokenizer, checkpoint, recipe, host="localhost", port=8000
+    ):
+        threading.Thread.__init__(self)
+        self.model = model
+        self.tokenizer = tokenizer
+        self.checkpoint = checkpoint
+        self.recipe = recipe
+        self.host = host
+        self.port = port
+        self.server = None
+        self.ready_event = threading.Event()
+        self.shutdown_event = threading.Event()
+        self.uvicorn_server = None
+    def run(self):
+        try:
+            # Create the server instance
+            self.server = Server()
+            # Configure the server with model/tokenizer
+            self.server.model = self.model
+            self.server.tokenizer = self.tokenizer
+            self.server.llm_loaded = type(
+                "obj",
+                (object,),
+                {
+                    "checkpoint": self.checkpoint,
+                    "recipe": self.recipe,
+                    "max_prompt_length": None,
+                    "reasoning": False,
+                    "model_name": "custom",
+                },
+            )
+            # Set up the server for threaded execution
+            self.uvicorn_server = self.server.run_in_thread(
+                port=self.port, host=self.host, log_level="warning"
+            )
+            # Set the ready event
+            self.ready_event.set()
+            # Run the server until shutdown is requested
+            logging.info(f"Starting server on http://{self.host}:{self.port}")
+            self.uvicorn_server.run()
+        except Exception as e:
+            logging.error(f"Error starting server: {e}")
+            self.ready_event.set()
+            raise
+    def shutdown(self):
+        """Shutdown the server"""
+        if hasattr(self, "uvicorn_server") and self.uvicorn_server:
+            logging.info("Shutting down server...")
+            self.uvicorn_server.should_exit = True
+            self.shutdown_event.set()
+        # Clean up resources properly to avoid memory leaks
+        if hasattr(self, "server") and self.server:
+            logging.info("Cleaning up model and tokenizer resources...")
+            if hasattr(self.server, "model"):
+                self.server.model = None
+            if hasattr(self.server, "tokenizer"):
+                self.server.tokenizer = None
+            if hasattr(self.server, "llm_loaded"):
+                self.server.llm_loaded = None
+        # Clean up local references
+        if hasattr(self, "model"):
+            del self.model
+        if hasattr(self, "tokenizer"):
+            del self.tokenizer

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "7.0.1"
1	+ __version__ = "7.0.2"

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 7.0.1
+Version: 7.0.2
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.12

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
 lemonade/api.py,sha256=9apNWSMS4bYpYl7iqDA4CsHHOOMdjOIuJhNYSqj_jIA,3878
 lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
-lemonade/cli.py,sha256=_s-LWpaVIhOmaP0Q1qirXxNiBhdumAZ-5ub5-lRNccs,4351
+lemonade/cli.py,sha256=ddN2QqsGMsVwydfcR7MSZu1z8_-bUgUP7dhw9lzbHa8,4424
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=co6LyaBArt-ahHXYZSdSER8TFZ2vVTb86CNG6X8Pxwc,22
+lemonade/version.py,sha256=iVyoEZ1fyZz5oicAj7ERV3Eld5fVjLM_p365GVSKBpk,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/analyze_model.py,sha256=sYWDznEUEWjx_Qekg7f1hHY4Pfe87IQ77lmsWqePgE0,803
 lemonade/common/build.py,sha256=Pk86mCr6fyBIx2zXDpq0BkdahlCmWRnwSTpShA_gwZw,7849
@@ -23,6 +23,7 @@ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOf
 lemonade/profilers/memory_tracker.py,sha256=-SSBmNlrweiX59wyNtLMWiwaMOskBzNO1_cufVwteqs,9357
 lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
 lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
+lemonade/tools/accuracy.py,sha256=QndammQ1bmlTaF_6YDaaiJp6fpkKZDYGySdQpAgZIp8,11699
 lemonade/tools/adapter.py,sha256=4H6gfbjvqyU6qm1_-b2FE-c3a7N9OzEBeDVnIwqRDvg,3014
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
 lemonade/tools/huggingface_bench.py,sha256=POE5JYzArK2FBktazOkluLNFzlLctM39B19fK5sMx-0,10017
@@ -46,20 +47,22 @@ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTgu
 lemonade/tools/report/table.py,sha256=a0TXo1X84RxCSu0un_XM3ANOlhLtPDuqtGwR7eomf2s,24853
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/server/instructions.py,sha256=Lvm-tRZaYgHkyt3zQkmMChkXO6rUiLoIAunudmMr_D8,13388
-lemonade/tools/server/llamacpp.py,sha256=PeHg1DbMGcf68txFgC1CJJN5HRHEnIJ4_4EDhvqAFUI,9255
+lemonade/tools/server/llamacpp.py,sha256=R86Q2btI9_EPpPj27vvELnF9KmKxpu3sPIIS1xW3PIA,9997
+lemonade/tools/server/port_utils.py,sha256=24Ryz5cNU0R9L1kuVSapZoyXTZHzhF4y0Yje9MVOrE0,1535
 lemonade/tools/server/pydantic_models.py,sha256=z1RAs9hkAFkOfMiTPtmUiC3CD2P6OMI2N0J2ztNs0d4,2179
-lemonade/tools/server/serve.py,sha256=7meKOKVHaODHBYD_3dDJyaiwoC_m4z_FWniZfsZ9cCI,50655
+lemonade/tools/server/serve.py,sha256=3JQa42WZdllKAf_DY-cal0Pc8vdBZd4vwsfhZmpheS8,52500
+lemonade/tools/server/thread_utils.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
 lemonade/tools/server/static/styles.css,sha256=8U1EejQaqRLQ6QTCF5UG_dLPtLjRwT1menUHMDhaq2M,5045
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=61qUO7kWCLcdjK0_IQZ46-rKP_AWkyznh4YpDclPKyM,28036
-lemonade_sdk-7.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-7.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_sdk-7.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-7.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
 lemonade_server/cli.py,sha256=DR6sIt66K1sZZG3ascEw_6HUgz3UhU9KGUyzxf4nO_A,7351
 lemonade_server/model_manager.py,sha256=WDGDxrKjq-u2GkGWLNUsRk0d74J-RG2yCYEnH8WMnDw,4010
-lemonade_server/server_models.json,sha256=ZSg1R555bLVW4U7BPaYX5ZgwaJVNAP3z1C62dzMRqAM,6198
-lemonade_sdk-7.0.1.dist-info/METADATA,sha256=bvg9-Tzg_v8sTKjkAJtLahpDq_GmLDMDKA9PTisaNGw,5443
-lemonade_sdk-7.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-7.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-7.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-7.0.1.dist-info/RECORD,,
+lemonade_server/server_models.json,sha256=S_wVpybtBT5xTuM2BLxT83bOsJnPR_yWIl35jy30aJ8,6453
+lemonade_sdk-7.0.2.dist-info/METADATA,sha256=Pf_-kdMDlXVYw_6CHQJDlO3ac4GbHzxENx0Rg8p4QBo,5443
+lemonade_sdk-7.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-7.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-7.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-7.0.2.dist-info/RECORD,,

lemonade_server/server_models.json CHANGED Viewed

@@ -9,13 +9,13 @@
         "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
         "reasoning": false,
-        "suggested": true
+        "suggested": false
     },
     "Llama-3.2-3B-Instruct-CPU": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
         "reasoning": false,
-        "suggested": true
+        "suggested": false
     },
     "Phi-3-Mini-Instruct-CPU": {
         "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
@@ -103,6 +103,13 @@
         "max_prompt_length": 2000,
         "suggested": true
     },
+    "Llama-xLAM-2-8b-fc-r-Hybrid": {
+        "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "reasoning": false,
+        "max_prompt_length": 2000,
+        "suggested": true
+    },
     "Llama-3.2-1B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
@@ -158,7 +165,7 @@
         "suggested": true
     },
     "Qwen3-8B-GGUF": {
-        "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_0",
+        "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
         "recipe": "llamacpp",
         "reasoning": true,
         "suggested": true
@@ -181,4 +188,4 @@
         "reasoning": true,
         "suggested": true
     }
-}
+}

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/licenses/NOTICE.md RENAMED Viewed

File without changes

{lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

lemonade-sdk 7.0.1__py3-none-any.whl → 7.0.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 7.0.1py3-none-any.whl → 7.0.2py3-none-any.whl