PyPI - lemonade-sdk - Versions diffs - 7.0.0__py3-none-any.whl - Mend

lemonade-sdk 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (61) hide show

lemonade/__init__.py +5 -0
lemonade/api.py +125 -0
lemonade/cache.py +85 -0
lemonade/cli.py +135 -0
lemonade/common/__init__.py +0 -0
lemonade/common/analyze_model.py +26 -0
lemonade/common/build.py +223 -0
lemonade/common/cli_helpers.py +139 -0
lemonade/common/exceptions.py +98 -0
lemonade/common/filesystem.py +368 -0
lemonade/common/labels.py +61 -0
lemonade/common/onnx_helpers.py +176 -0
lemonade/common/plugins.py +10 -0
lemonade/common/printing.py +110 -0
lemonade/common/status.py +490 -0
lemonade/common/system_info.py +390 -0
lemonade/common/tensor_helpers.py +83 -0
lemonade/common/test_helpers.py +28 -0
lemonade/profilers/__init__.py +1 -0
lemonade/profilers/memory_tracker.py +257 -0
lemonade/profilers/profiler.py +55 -0
lemonade/sequence.py +363 -0
lemonade/state.py +159 -0
lemonade/tools/__init__.py +1 -0
lemonade/tools/adapter.py +104 -0
lemonade/tools/bench.py +284 -0
lemonade/tools/huggingface_bench.py +267 -0
lemonade/tools/huggingface_load.py +520 -0
lemonade/tools/humaneval.py +258 -0
lemonade/tools/llamacpp.py +261 -0
lemonade/tools/llamacpp_bench.py +154 -0
lemonade/tools/management_tools.py +273 -0
lemonade/tools/mmlu.py +327 -0
lemonade/tools/ort_genai/__init__.py +0 -0
lemonade/tools/ort_genai/oga.py +1129 -0
lemonade/tools/ort_genai/oga_bench.py +142 -0
lemonade/tools/perplexity.py +146 -0
lemonade/tools/prompt.py +228 -0
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +172 -0
lemonade/tools/quark/quark_quantize.py +439 -0
lemonade/tools/report/__init__.py +0 -0
lemonade/tools/report/llm_report.py +203 -0
lemonade/tools/report/table.py +739 -0
lemonade/tools/server/__init__.py +0 -0
lemonade/tools/server/serve.py +1354 -0
lemonade/tools/server/tool_calls.py +146 -0
lemonade/tools/tool.py +374 -0
lemonade/version.py +1 -0
lemonade_install/__init__.py +1 -0
lemonade_install/install.py +774 -0
lemonade_sdk-7.0.0.dist-info/METADATA +116 -0
lemonade_sdk-7.0.0.dist-info/RECORD +61 -0
lemonade_sdk-7.0.0.dist-info/WHEEL +5 -0
lemonade_sdk-7.0.0.dist-info/entry_points.txt +4 -0
lemonade_sdk-7.0.0.dist-info/licenses/LICENSE +201 -0
lemonade_sdk-7.0.0.dist-info/licenses/NOTICE.md +21 -0
lemonade_sdk-7.0.0.dist-info/top_level.txt +3 -0
lemonade_server/cli.py +260 -0
lemonade_server/model_manager.py +98 -0
lemonade_server/server_models.json +142 -0

lemonade/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from lemonade.version import __version__
+from .state import load_state, State
+from .cli import main as lemonadecli

lemonade/api.py ADDED Viewed

@@ -0,0 +1,125 @@
+# pylint: disable=no-member
+from typing import Tuple, Dict
+from lemonade.state import State
+import lemonade.common.printing as printing
+import lemonade.cache as cache
+from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
+class NotSupported(Exception):
+    """
+    Indicates that a checkpoint/recipe pair are not supported
+    together at this time.
+    """
+    def __init__(self, msg):
+        super().__init__(msg)
+        printing.log_error(msg)
+def _raise_not_supported(recipe, checkpoint):
+    raise NotSupported(
+        f"Recipe {recipe} does not have support for checkpoint {checkpoint}"
+    )
+def _make_state(recipe, checkpoint) -> Dict:
+    return State(cache_dir=cache.DEFAULT_CACHE_DIR, build_name=f"{checkpoint}_{recipe}")
+def from_pretrained(
+    checkpoint: str,
+    recipe: str = "hf-cpu",
+) -> Tuple[ModelAdapter, TokenizerAdapter]:
+    """
+    Load an LLM and the corresponding tokenizer using a lemonade recipe.
+    Args:
+        - checkpoint: huggingface checkpoint that defines the LLM
+        - recipe: defines the implementation and hardware used for the LLM
+    Recipe choices:
+        - hf-cpu: Huggingface Transformers implementation for CPU with max-perf settings
+        - hf-dgpu: Huggingface Transformers implementation on dGPU (via device="cuda")
+        - oga-cpu: CPU implementation based on onnxruntime-genai
+        - oga-igpu: DirectML implementation for iGPU based on onnxruntime-genai-directml
+        - oga-hybird: AMD Ryzen AI Hybrid implementation based on onnxruntime-genai
+    Returns:
+        - model: LLM instance with a generate() method that invokes the recipe
+        - tokenizer: tokenizer instance compatible with the model, which supports
+            the encode (call) and decode() methods.
+    """
+    if recipe == "hf-cpu":
+        # Huggingface Transformers recipe for CPU
+        # Huggingface supports all checkpoints, so there is nothing to check for
+        import torch
+        from lemonade.tools.huggingface_load import HuggingfaceLoad
+        state = _make_state(recipe, checkpoint)
+        state = HuggingfaceLoad().run(
+            state,
+            input=checkpoint,
+            dtype=torch.bfloat16,
+        )
+        return state.model, state.tokenizer
+    elif recipe == "hf-dgpu":
+        # Huggingface Transformers recipe for discrete GPU (Nvidia, Instinct, Radeon)
+        import torch
+        from lemonade.tools.huggingface_load import HuggingfaceLoad
+        state = _make_state(recipe, checkpoint)
+        state = HuggingfaceLoad().run(
+            state,
+            input=checkpoint,
+            dtype=torch.bfloat16,
+            device="cuda",
+        )
+        return state.model, state.tokenizer
+    elif recipe.startswith("oga-"):
+        import lemonade.tools.ort_genai.oga as oga
+        # Make sure the user chose a supported runtime, e.g., oga-cpu
+        user_backend = recipe.split("oga-")[1]
+        supported_backends = ["cpu", "igpu", "npu", "hybrid"]
+        supported_recipes = [f"oga-{backend}" for backend in supported_backends]
+        if recipe not in supported_recipes:
+            raise NotSupported(
+                "Selected OGA recipe is not supported. "
+                f"The supported OGA recipes are: {supported_recipes}"
+            )
+        backend_to_dtype = {
+            "cpu": "int4",
+            "igpu": "int4",
+            "hybrid": "int4",
+            "npu": "int4",
+        }
+        state = _make_state(recipe, checkpoint)
+        state = oga.OgaLoad().run(
+            state,
+            input=checkpoint,
+            device=user_backend,
+            dtype=backend_to_dtype[user_backend],
+        )
+        return state.model, state.tokenizer
+    else:
+        _raise_not_supported(recipe, checkpoint)
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/cache.py ADDED Viewed

@@ -0,0 +1,85 @@
+import os
+from datetime import datetime, timezone
+# Allow an environment variable to override the default
+# location for the build cache
+if os.environ.get("LEMONADE_CACHE_DIR"):
+    DEFAULT_CACHE_DIR = os.path.expanduser(os.environ.get("LEMONADE_CACHE_DIR"))
+else:
+    DEFAULT_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "lemonade")
+def checkpoint_to_model_name(checkpoint_name: str) -> str:
+    """
+    Get the model's name by stripping the author's name from the checkpoint name
+    """
+    return checkpoint_name.split("/")[1]
+def get_timestamp() -> str:
+    """
+    Get a timestamp string in the format:
+        <year>y_<month>m_<day>d_<hour>h_<minute>m_<second>s
+    """
+    # Get the current time in GMT
+    current_time = datetime.now(timezone.utc)
+    # Format the timestamp string
+    timestamp = current_time.strftime("%Yy_%mm_%dd_%Hh_%Mm_%Ss")
+    return timestamp
+def build_name(input_name):
+    """
+    Name the lemonade build by concatenating these two factors:
+        1. Sanitize the input name (typically a model checkpoint name) by
+            replacing any `/` characters with `_`.
+        2. Timestamp to ensure that builds in the same cache will not
+            collide in the same build directory.
+        If the input_name is a local folder, then we don't know the
+        model checkpoint name, so we use "local_model"
+    """
+    if os.path.isdir(input_name):
+        input_name_sanitized = "local_model"
+    else:
+        # Sanitize the input name
+        input_name_sanitized = input_name.replace("/", "_")
+    # Get the formatted timestamp string
+    timestamp = get_timestamp()
+    return f"{input_name_sanitized}_{timestamp}"
+class Keys:
+    MODEL = "model"
+    PER_ITERATION_LATENCY = "per_iteration_latency"
+    MEAN_LATENCY = "mean_latency"
+    STD_DEV_LATENCY = "std_dev_latency"
+    TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
+    STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
+    SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
+    PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
+    STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
+    CHECKPOINT = "checkpoint"
+    DTYPE = "dtype"
+    PROMPT = "prompt"
+    PROMPT_TOKENS = "prompt_tokens"
+    PROMPT_TEMPLATE = "prompt_template"
+    RESPONSE = "response"
+    RESPONSE_TOKENS = "response_tokens"
+    RESPONSE_LENGTHS_HISTOGRAM = "response_lengths_histogram"
+    CACHE_DIR = "cache_dir"
+    DEVICE = "device"
+    LOCAL_MODEL_FOLDER = "local_model_folder"
+    MEMORY_USAGE_PLOT = "memory_usage_plot"
+    MAX_MEMORY_USED_GB = "max_memory_used_GB"
+    MAX_MEMORY_USED_GBYTE = "max_memory_used_gbyte"
+    RYZEN_AI_VERSION_INFO = "ryzen_ai_version_info"
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/cli.py ADDED Viewed

@@ -0,0 +1,135 @@
+import os
+from lemonade.version import __version__ as version_number
+from lemonade.tools import FirstTool, NiceHelpFormatter
+from lemonade.profilers.memory_tracker import MemoryTracker
+import lemonade.common.filesystem as fs
+import lemonade.common.cli_helpers as cli
+from lemonade.sequence import Sequence
+from lemonade.tools.management_tools import Cache, Version, SystemInfo
+from lemonade.state import State
+from lemonade.tools.huggingface_load import HuggingfaceLoad
+from lemonade.tools.huggingface_bench import HuggingfaceBench
+from lemonade.tools.ort_genai.oga_bench import OgaBench
+from lemonade.tools.llamacpp_bench import LlamaCppBench
+from lemonade.tools.llamacpp import LoadLlamaCpp
+import lemonade.cache as cache
+from lemonade.tools.mmlu import AccuracyMMLU
+from lemonade.tools.humaneval import AccuracyHumaneval
+from lemonade.tools.perplexity import AccuracyPerplexity
+from lemonade.tools.prompt import LLMPrompt
+from lemonade.tools.quark.quark_load import QuarkLoad
+from lemonade.tools.quark.quark_quantize import QuarkQuantize
+from lemonade.tools.report.llm_report import LemonadeReport
+from lemonade.tools.server.serve import Server
+def main():
+    # List the available tools
+    tools = [
+        HuggingfaceLoad,
+        LoadLlamaCpp,
+        LlamaCppBench,
+        AccuracyMMLU,
+        AccuracyHumaneval,
+        AccuracyPerplexity,
+        LLMPrompt,
+        HuggingfaceBench,
+        OgaBench,
+        QuarkQuantize,
+        QuarkLoad,
+        LemonadeReport,
+        Server,
+        # Inherited from lemonade
+        Cache,
+        Version,
+        SystemInfo,
+    ]
+    # Import onnxruntime-genai recipes
+    try:
+        from lemonade.tools.ort_genai.oga import OgaLoad
+        tools = tools + [OgaLoad]
+    except ModuleNotFoundError:
+        pass
+    # List the available profilers
+    profilers = [MemoryTracker]
+    # Define the argument parser
+    parser = cli.CustomArgumentParser(
+        description=f"""Tools for evaluating and deploying LLMs (v{version_number}).
+Read this to learn the command syntax:
+https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
+        formatter_class=NiceHelpFormatter,
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input that will be evaluated by the starting tool "
+        "(e.g., huggingface checkpoint)",
+    )
+    parser.add_argument(
+        "-d",
+        "--cache-dir",
+        help="Cache directory where tool results are "
+        f"stored (default: {cache.DEFAULT_CACHE_DIR})",
+        required=False,
+        default=cache.DEFAULT_CACHE_DIR,
+    )
+    for profiler in profilers:
+        profiler.add_arguments_to_parser(parser)
+    global_args, tool_instances, evaluation_tools = cli.parse_tools(
+        parser, tools, cli_name="lemonade"
+    )
+    profiler_instances = [
+        profiler(global_args[profiler.unique_name])
+        for profiler in profilers
+        if global_args.get(profiler.unique_name, None) is not None
+    ]
+    if len(evaluation_tools) > 0:
+        if not issubclass(evaluation_tools[0], FirstTool):
+            parser.error(
+                "The first tool in the sequence needs to be one "
+                "of the 'tools that can start a sequence.' Use "
+                "`lemonade -h` to see that list of tools."
+            )
+        # Run the evaluation tools as a build
+        sequence = Sequence(tools=tool_instances, profilers=profiler_instances)
+        # Forward the selected input to the first tool in the sequence
+        first_tool_args = next(iter(sequence.tools.values()))
+        first_tool_args.append("--input")
+        first_tool_args.append(global_args["input"])
+        state = State(
+            cache_dir=os.path.abspath(global_args["cache_dir"]),
+            build_name=cache.build_name(global_args["input"]),
+            sequence_info=sequence.info,
+        )
+        sequence.launch(state)
+    else:
+        # Run the management tools
+        for management_tool, argv in tool_instances.items():
+            # Support "~" in the cache_dir argument
+            parsed_cache_dir = os.path.expanduser(global_args[fs.Keys.CACHE_DIR])
+            management_tool.parse_and_run(parsed_cache_dir, argv)
+if __name__ == "__main__":
+    main()
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/common/__init__.py ADDED Viewed

File without changes

lemonade/common/analyze_model.py ADDED Viewed

@@ -0,0 +1,26 @@
+import numpy as np
+import torch
+import onnx
+def count_parameters(model: torch.nn.Module) -> int:
+    """
+    Returns the number of parameters of a given model
+    """
+    if isinstance(model, (torch.nn.Module, torch.jit.ScriptModule)):
+        return sum([parameter.numel() for _, parameter in model.named_parameters()])
+    elif isinstance(model, str) and model.endswith(".onnx"):
+        onnx_model = onnx.load(model)
+        return int(
+            sum(
+                np.prod(tensor.dims, dtype=np.int64)
+                for tensor in onnx_model.graph.initializer
+                if tensor.name not in onnx_model.graph.input
+            )
+        )
+    else:
+        return None
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/common/build.py ADDED Viewed

@@ -0,0 +1,223 @@
+import os
+import logging
+import sys
+import traceback
+from typing import Dict
+import hashlib
+import psutil
+import yaml
+import torch
+import numpy as np
+import lemonade.common.exceptions as exp
+state_file_name = "state.yaml"
+def load_yaml(file_path) -> Dict:
+    with open(file_path, "r", encoding="utf8") as stream:
+        try:
+            return yaml.load(stream, Loader=yaml.FullLoader)
+        except yaml.YAMLError as e:
+            raise exp.IOError(
+                f"Failed while trying to open {file_path}."
+                f"The exception that triggered this was:\n{e}"
+            )
+def builds_dir(cache_dir):
+    """
+    Each build stores stats, logs, and other files in a build directory.
+    All build directories are located at:
+        <cache_dir>/builds
+    """
+    return os.path.join(cache_dir, "builds")
+def output_dir(cache_dir, build_name):
+    """
+    Each build stores stats, logs, and other files in an output directory at:
+    All build directories are located at:
+        <builds_dir>/<build_name>
+    """
+    path = os.path.join(builds_dir(cache_dir), build_name)
+    return path
+def state_file(cache_dir, build_name):
+    path = os.path.join(output_dir(cache_dir, build_name), state_file_name)
+    return path
+class FunctionStatus:
+    """
+    Status values that are assigned to tools, builds, benchmarks, and other
+    functionality to help the user understand whether that function completed
+    successfully or not.
+    """
+    # SUCCESSFUL means the tool/build/benchmark completed successfully.
+    SUCCESSFUL = "successful"
+    # ERROR means the tool/build/benchmark failed and threw some error that
+    # was caught by lemonade. You should proceed by looking at the build
+    # logs to see what happened.
+    ERROR = "error"
+    # TIMEOUT means the tool/build/benchmark failed because it exceeded the timeout
+    # set for the lemonade command.
+    TIMEOUT = "timeout"
+    # KILLED means the build/benchmark failed because the system killed it. This can
+    # happen because of an out-of-memory (OOM), system shutdown, etc.
+    # You should proceed by re-running the build and keeping an eye on it to observe
+    # why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
+    KILLED = "killed"
+    # The NOT_STARTED status is applied to all tools/builds/benchmarks at startup.
+    # It will be replaced by one of the other status values if the tool/build/benchmark
+    # has a chance to start running.
+    # A value of NOT_STARTED in the report CSV indicates that the tool/build/benchmark
+    # never had a chance to start because lemonade exited before that functionality had
+    # a chance to start running.
+    NOT_STARTED = "not_started"
+    # INCOMPLETE indicates that a tool/build/benchmark started running and did not complete.
+    # Each tool, build, and benchmark are marked as INCOMPLETE when they start running.
+    # If you open the lemonade_stats.yaml file while the tool/build/benchmark
+    # is still running, the status will show as INCOMPLETE. If the tool/build/benchmark
+    # is killed without the chance to do any stats cleanup, the status will continue to
+    # show as INCOMPLETE in lemonade_stats.yaml.
+    # When the report CSV is created, any instance of an INCOMPLETE tool/build/benchmark
+    # status will be replaced by KILLED.
+    INCOMPLETE = "incomplete"
+# Create a unique ID from this run by hashing pid + process start time
+def unique_id():
+    pid = os.getpid()
+    p = psutil.Process(pid)
+    start_time = p.create_time()
+    return hashlib.sha256(f"{pid}{start_time}".encode()).hexdigest()
+def get_shapes_and_dtypes(inputs: dict):
+    """
+    Return the shape and data type of each value in the inputs dict
+    """
+    shapes = {}
+    dtypes = {}
+    for key in sorted(inputs):
+        value = inputs[key]
+        if isinstance(
+            value,
+            (list, tuple),
+        ):
+            for v, i in zip(value, range(len(value))):
+                if isinstance(v, (list, tuple)):
+                    # Handle nested lists/tuples, for example past_key_values
+                    # in an LLM that has KV-caching enabled
+                    for v2, i2 in zip(v, range(len(v))):
+                        subsubkey = f"{key}[{i}][{i2}]"
+                        shapes[subsubkey] = np.array(v2).shape
+                        dtypes[subsubkey] = np.array(v2).dtype.name
+                else:
+                    # Handle single list/tuple
+                    subkey = f"{key}[{i}]"
+                    shapes[subkey] = np.array(v).shape
+                    dtypes[subkey] = np.array(v).dtype.name
+        elif torch.is_tensor(value):
+            shapes[key] = np.array(value.detach()).shape
+            dtypes[key] = np.array(value.detach()).dtype.name
+        elif isinstance(value, np.ndarray):
+            shapes[key] = value.shape
+            dtypes[key] = value.dtype.name
+        elif isinstance(value, (bool, int, float)):
+            shapes[key] = (1,)
+            dtypes[key] = type(value).__name__
+        elif value is None:
+            pass
+        else:
+            raise exp.Error(
+                "One of the provided inputs contains the unsupported "
+                f' type {type(value)} at key "{key}".'
+            )
+    return shapes, dtypes
+class Logger:
+    """
+    Redirects stdout to file (and console if needed)
+    """
+    def __init__(
+        self,
+        initial_message: str,
+        log_path: str = None,
+    ):
+        self.debug = os.environ.get("LEMONADE_BUILD_DEBUG") == "True"
+        self.terminal = sys.stdout
+        self.terminal_err = sys.stderr
+        self.log_path = log_path
+        # Create the empty logfile
+        with open(log_path, "w", encoding="utf-8") as f:
+            f.write(f"{initial_message}\n")
+        # Disable any existing loggers so that we can capture all
+        # outputs to a logfile
+        self.root_logger = logging.getLogger()
+        self.handlers = [handler for handler in self.root_logger.handlers]
+        for handler in self.handlers:
+            self.root_logger.removeHandler(handler)
+        # Send any logger outputs to the logfile
+        if not self.debug:
+            self.file_handler = logging.FileHandler(filename=log_path)
+            self.file_handler.setLevel(logging.INFO)
+            formatter = logging.Formatter(
+                "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            )
+            self.file_handler.setFormatter(formatter)
+            self.root_logger.addHandler(self.file_handler)
+    def __enter__(self):
+        sys.stdout = self
+        sys.stderr = self
+    def __exit__(self, _exc_type, _exc_value, _exc_tb):
+        # Ensure we also capture the traceback as part of the logger when exceptions happen
+        if _exc_type:
+            traceback.print_exception(_exc_type, _exc_value, _exc_tb)
+        # Stop redirecting stdout/stderr
+        sys.stdout = self.terminal
+        sys.stderr = self.terminal_err
+        # Remove the logfile logging handler
+        if not self.debug:
+            self.file_handler.close()
+            self.root_logger.removeHandler(self.file_handler)
+            # Restore any pre-existing loggers
+            for handler in self.handlers:
+                self.root_logger.addHandler(handler)
+    def write(self, message):
+        if self.log_path is not None:
+            with open(self.log_path, "a", encoding="utf-8") as f:
+                f.write(message)
+        if self.debug or self.log_path is None:
+            self.terminal.write(message)
+            self.terminal.flush()
+            self.terminal_err.write(message)
+            self.terminal_err.flush()
+    def flush(self):
+        # needed for python 3 compatibility.
+        pass
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD