PyPI - lemonade-sdk - Versions diffs - 7.0.0__py3-none-any.whl - Mend

lemonade-sdk 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (61) hide show

lemonade/__init__.py +5 -0
lemonade/api.py +125 -0
lemonade/cache.py +85 -0
lemonade/cli.py +135 -0
lemonade/common/__init__.py +0 -0
lemonade/common/analyze_model.py +26 -0
lemonade/common/build.py +223 -0
lemonade/common/cli_helpers.py +139 -0
lemonade/common/exceptions.py +98 -0
lemonade/common/filesystem.py +368 -0
lemonade/common/labels.py +61 -0
lemonade/common/onnx_helpers.py +176 -0
lemonade/common/plugins.py +10 -0
lemonade/common/printing.py +110 -0
lemonade/common/status.py +490 -0
lemonade/common/system_info.py +390 -0
lemonade/common/tensor_helpers.py +83 -0
lemonade/common/test_helpers.py +28 -0
lemonade/profilers/__init__.py +1 -0
lemonade/profilers/memory_tracker.py +257 -0
lemonade/profilers/profiler.py +55 -0
lemonade/sequence.py +363 -0
lemonade/state.py +159 -0
lemonade/tools/__init__.py +1 -0
lemonade/tools/adapter.py +104 -0
lemonade/tools/bench.py +284 -0
lemonade/tools/huggingface_bench.py +267 -0
lemonade/tools/huggingface_load.py +520 -0
lemonade/tools/humaneval.py +258 -0
lemonade/tools/llamacpp.py +261 -0
lemonade/tools/llamacpp_bench.py +154 -0
lemonade/tools/management_tools.py +273 -0
lemonade/tools/mmlu.py +327 -0
lemonade/tools/ort_genai/__init__.py +0 -0
lemonade/tools/ort_genai/oga.py +1129 -0
lemonade/tools/ort_genai/oga_bench.py +142 -0
lemonade/tools/perplexity.py +146 -0
lemonade/tools/prompt.py +228 -0
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +172 -0
lemonade/tools/quark/quark_quantize.py +439 -0
lemonade/tools/report/__init__.py +0 -0
lemonade/tools/report/llm_report.py +203 -0
lemonade/tools/report/table.py +739 -0
lemonade/tools/server/__init__.py +0 -0
lemonade/tools/server/serve.py +1354 -0
lemonade/tools/server/tool_calls.py +146 -0
lemonade/tools/tool.py +374 -0
lemonade/version.py +1 -0
lemonade_install/__init__.py +1 -0
lemonade_install/install.py +774 -0
lemonade_sdk-7.0.0.dist-info/METADATA +116 -0
lemonade_sdk-7.0.0.dist-info/RECORD +61 -0
lemonade_sdk-7.0.0.dist-info/WHEEL +5 -0
lemonade_sdk-7.0.0.dist-info/entry_points.txt +4 -0
lemonade_sdk-7.0.0.dist-info/licenses/LICENSE +201 -0
lemonade_sdk-7.0.0.dist-info/licenses/NOTICE.md +21 -0
lemonade_sdk-7.0.0.dist-info/top_level.txt +3 -0
lemonade_server/cli.py +260 -0
lemonade_server/model_manager.py +98 -0
lemonade_server/server_models.json +142 -0

lemonade/profilers/memory_tracker.py ADDED Viewed

@@ -0,0 +1,257 @@
+import os
+import time
+import textwrap
+from multiprocessing import Process, Queue
+import matplotlib.pyplot as plt
+import psutil
+import yaml
+import lemonade.common.filesystem as fs
+import lemonade.common.printing as printing
+from lemonade.profilers import Profiler
+DEFAULT_TRACK_MEMORY_INTERVAL = 0.25
+MEMORY_USAGE_YAML_FILENAME = "memory_usage.yaml"
+MEMORY_USAGE_PNG_FILENAME = "memory_usage.png"
+class MemoryTracker(Profiler):
+    unique_name = "memory"
+    @staticmethod
+    def add_arguments_to_parser(parser):
+        parser.add_argument(
+            "-m",
+            f"--{MemoryTracker.unique_name}",
+            nargs="?",
+            metavar="TRACK_INTERVAL",
+            type=float,
+            default=None,
+            const=DEFAULT_TRACK_MEMORY_INTERVAL,
+            help="Track memory usage and plot the results. "
+            "Optionally, set the tracking interval in seconds "
+            f"(default: {DEFAULT_TRACK_MEMORY_INTERVAL})",
+        )
+    @staticmethod
+    def get_time_mem_list(process):
+        return [time.time(), process.memory_info().rss]
+    def __init__(self, parser_arg_value):
+        super().__init__()
+        self.status_stats += [fs.Keys.MEMORY_USAGE_PLOT]
+        self.track_memory_interval = parser_arg_value
+        self.process_being_tracked = None
+        self.build_dir = None
+        self.queue = None
+        self.tracker_process = None
+        self.tracking_active = False
+        self.yaml_path = None
+    def start(self, build_dir):
+        if self.tracking_active:
+            raise RuntimeError("Cannot start tracking while already tracking")
+        # Save the folder where data and plot will be stored
+        self.build_dir = build_dir
+        # Get the process being tracked
+        track_pid = os.getpid()
+        self.process_being_tracked = psutil.Process(track_pid)
+        # Create queue for passing messages to the tracker
+        self.queue = Queue()
+        # The yaml file where the memory usage data will be saved
+        self.yaml_path = os.path.join(self.build_dir, MEMORY_USAGE_YAML_FILENAME)
+        # Create process to continuously sample memory usage
+        self.tracker_process = Process(
+            target=self._memory_tracker_,
+            args=(
+                track_pid,
+                self.queue,
+                self.yaml_path,
+                self.track_memory_interval,
+            ),
+        )
+        self.tracker_process.start()
+        self.tracking_active = True
+        self.set_label("start")
+        self.sample()
+    def tool_starting(self, tool_name):
+        self.set_label(tool_name)
+    def tool_stopping(self):
+        self.sample()
+    def set_label(self, label):
+        if self.tracking_active:
+            self.queue.put(label)
+    def sample(self):
+        if self.tracking_active:
+            self.queue.put(MemoryTracker.get_time_mem_list(self.process_being_tracked))
+    def stop(self):
+        if self.tracking_active:
+            self.queue.put(None)
+            self.tracking_active = False
+    def generate_results(self, state, timestamp, _):
+        if self.tracker_process is None:
+            return
+        if self.tracking_active:
+            self.stop()
+        # Wait for memory tracker to finish writing yaml data file
+        while self.tracker_process.is_alive():
+            self.tracker_process.join(timeout=1.0)
+        try:
+            with open(self.yaml_path, "r", encoding="utf-8") as f:
+                memory_tracks = yaml.safe_load(f)
+        except FileNotFoundError as e:
+            printing.log_info(
+                f"Memory tracker file not found: {e.filename}.  No memory usage plot generated"
+            )
+            state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, None)
+            return
+        # Add check to ensure that memory_tracks is not empty or improperly formatted
+        if not memory_tracks or not isinstance(memory_tracks, list):
+            printing.log_info(
+                f"Memory tracker file contains no data or is improperly formatted: {self.yaml_path}"
+            )
+            state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, None)
+            return
+        # Find final time in the start track (first track) to subtract from all other times
+        _, track = memory_tracks[0]
+        t0 = track[-1][0]
+        # last_t and last_y are used to draw a line between the last point of the prior
+        # track and the first point of the current track
+        last_t = None
+        last_y = None
+        plt.figure()
+        for k, v in memory_tracks[1:]:
+            if len(v) > 0:
+                t = [x[0] - t0 for x in v]
+                y = [float(x[1]) / 1024**3 for x in v]
+                # draw new memory usage track
+                if last_t is not None:
+                    plt.plot([last_t] + t, [last_y] + y, label=k, marker=".")
+                else:
+                    plt.plot(t, y, label=k, marker=".")
+                last_t = t[-1]
+                last_y = y[-1]
+        plt.xlabel("Time (sec)")
+        plt.ylabel("GB")
+        title_str = "Physical Memory Usage\n" + "\n".join(
+            textwrap.wrap(state.build_name, 60)
+        )
+        plt.title(title_str)
+        plt.legend()
+        plt.grid()
+        plt.tight_layout()
+        # Save plot to cache and to current folder
+        plot_path = os.path.join(
+            self.build_dir, f"{timestamp}_{MEMORY_USAGE_PNG_FILENAME}"
+        )
+        plt.savefig(plot_path)
+        plot_path = os.path.join(
+            os.getcwd(), f"{timestamp}_{MEMORY_USAGE_PNG_FILENAME}"
+        )
+        plt.savefig(plot_path)
+        state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, plot_path)
+    @staticmethod
+    def _memory_tracker_(
+        tracked_pid,
+        input_queue: Queue,
+        yaml_path: str,
+        track_memory_interval: float,
+    ):
+        """
+        Tracks memory usage during build and saves to yaml file
+        The build communicates with the tracker though the input_queue.  It may pass:
+          1) string - This is to indicate that a new track is starting and the string is the label
+                    for the next segment.  The tracker will automatically track memory usage at
+                    the track_memory_interval once a first track_name is given to it.
+          2) list - A time and a current memory usage value that is added to the current track
+                    (typically used at the end of a segment to make sure that each segment is
+                    sampled at least once
+          3) None - This indicates that the tracker should stop tracking, save its data to a file
+                    and end
+        """
+        memory_tracks = []
+        current_track = []
+        track_name = None
+        tracker_exit = False
+        try:
+            tracked_process = psutil.Process(tracked_pid)
+            while (
+                not tracker_exit and tracked_process.status() == psutil.STATUS_RUNNING
+            ):
+                time.sleep(track_memory_interval)
+                # Read any messages from the parent process
+                while not input_queue.empty():
+                    try:
+                        message = input_queue.get(timeout=0.001)
+                        if message is None or isinstance(message, str):
+                            # Save current track.
+                            if track_name is not None:
+                                memory_tracks.append([track_name, current_track])
+                            track_name = message
+                            current_track = []
+                            if message is None:
+                                # Wrap up
+                                tracker_exit = True
+                                break
+                        elif isinstance(message, list):
+                            # Add time and memory data to current track
+                            if track_name is not None:
+                                current_track.append(message)
+                            else:
+                                raise TypeError(
+                                    "Track name must be passed to memory tracker prior to "
+                                    "sending data"
+                                )
+                        else:
+                            raise TypeError(
+                                "Unrecognized message type in memory_tracker input queue: "
+                                f"{message}"
+                            )
+                    except input_queue.Empty:
+                        # input_queue.empty had not been updated
+                        pass
+                if not tracker_exit and track_name is not None:
+                    # Save current time and memory usage
+                    current_track.append(
+                        MemoryTracker.get_time_mem_list(tracked_process)
+                    )
+            # Save the collected memory tracks
+            with open(yaml_path, "w", encoding="utf-8") as f:
+                yaml.dump(memory_tracks, f)
+        except psutil.NoSuchProcess:
+            # If the parent process stopped existing, we can
+            # safely assume that tracking is no longer needed
+            # NOTE: this only seems to be needed on Windows
+            pass
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/profilers/profiler.py ADDED Viewed

@@ -0,0 +1,55 @@
+import abc
+class Profiler(abc.ABC):
+    unique_name: str
+    def __init__(self, parser_arg_value=None):
+        self.parser_arg_value = parser_arg_value
+        # Statistics that will be displayed to the CLI user
+        self.status_stats = []
+    @staticmethod
+    @abc.abstractmethod
+    def add_arguments_to_parser(parser):
+        """
+        Adds the argument parsing for this tool to the parser.
+        Uses f"--{self.unique_name}" as the argument.
+        """
+    @abc.abstractmethod
+    def start(self, build_dir):
+        """
+        This method is called prior to the tool sequence starting.
+        This informs the profiler to start gathering data.
+        The build directory can be used to store profiling data.
+        """
+    def tool_starting(self, tool_name):
+        """
+        This method is called to inform the profiler of the name of the tool that is about to start.
+        """
+    def tool_stopping(self):
+        """
+        This method is called to inform the profiler that the tool has finished.
+        """
+    def stop(self):
+        """
+        This method is called when the tool sequence has finished.
+        This informs the profiler to stop gathering data.
+        """
+    @abc.abstractmethod
+    def generate_results(self, state, timestamp, start_times):
+        """
+        This method is called so that the profiler can create its output files.
+        The state is passed so that build info can be gathered and stats can be written.
+        The timestamp can be used for filename in current working directory.
+        The start times contain a list of tools and start times.
+        """
+# Copyright (c) 2025 AMD

lemonade/sequence.py ADDED Viewed

@@ -0,0 +1,363 @@
+import sys
+import time
+import os
+import platform
+import copy
+from datetime import datetime
+from typing import List, Dict, Optional
+import pytz
+import psutil
+import lemonade.common.printing as printing
+import lemonade.common.exceptions as exp
+import lemonade.common.build as build
+from lemonade.common.system_info import get_system_info_dict
+import lemonade.common.filesystem as fs
+import lemonade.common.status as status
+from lemonade.tools.tool import Tool
+from lemonade.profilers.profiler import Profiler
+from lemonade.state import State
+def _rewind_stdout(lines: int = 1):
+    """
+    Helper function for the command line monitor. Moves the cursor up a
+    certain number of lines in the terminal, corresponding to the
+    status line for a Tool, so that we can update the status of
+    that Tool.
+    """
+    rewind_stdout_one_line = "\033[1A"
+    rewind_multiple_lines = rewind_stdout_one_line * lines
+    print(rewind_multiple_lines, end="")
+    sys.stdout.flush()
+class Sequence:
+    """
+    Helper class to launch and manage build tools.
+    """
+    def __init__(
+        self,
+        tools: Dict[Tool, List[str]],
+        profilers: List[Profiler] = None,
+    ):
+        self.tools = tools
+        self.profilers = [] if profilers is None else profilers
+        # Make sure all the tool names are unique
+        self.tool_names = [tool.__class__.unique_name for tool in self.tools.keys()]
+        if len(self.tool_names) != len(set(self.tool_names)):
+            msg = f"""
+            All tools in a Sequence must have unique unique_names, however Sequence
+            received duplicates in the list of names: {self.tool_names}
+            """
+            raise ValueError(msg)
+        # Save the process (used to get memory usage)
+        self.process = psutil.Process()
+    def show_monitor(self, state: State, verbosity: bool):
+        """
+        Displays the monitor on the terminal. The purpose of the monitor
+        is to show the status of each tool (success, failure, not started yet,
+        or in-progress).
+        """
+        if verbosity:
+            print()
+            printing.logn(
+                f'Building "{state.build_name}"',
+                c=printing.Colors.BOLD,
+            )
+            for tool in self.tools:
+                tool.status_line(successful=None, verbosity=True)
+            _rewind_stdout(len(self.tools))
+    def _advance_cursor(self, current_tool_name: str):
+        # Advance the cursor below the monitor so
+        # we can print a message
+        tool_depth_in_sequence = len(self.tool_names) - self.tool_names.index(
+            current_tool_name
+        )
+        stdout_lines_to_advance = tool_depth_in_sequence - 2
+        cursor_down = "\n" * stdout_lines_to_advance
+        print(cursor_down)
+    def _get_mem_usage_str(self) -> str:
+        """
+        Returns a string with memory usage for the current process
+        (non-swapped physical memory).  In Windows OS, the peak memory used in the
+        process is also included.
+        Example: '1.100 GB (1.638 GB peak)'
+        """
+        mem_info = self.process.memory_info()
+        mem_info_str = f"{mem_info.rss / 1024 ** 3:,.3f} GB"
+        if platform.system() == "Windows":
+            mem_info_str += f" ({mem_info.peak_wset / 1024 ** 3:,.3f} GB peak)"
+        return mem_info_str
+    def launch(
+        self,
+        state: State,
+        lean_cache: bool = False,
+        monitor: Optional[bool] = None,
+        stats_to_save: Optional[Dict] = None,
+    ) -> State:
+        """
+        Executes the sequence of tools.
+        """
+        current_time = datetime.now()
+        timestamp = current_time.strftime("%Y-%m-%d-%H%M%S")
+        start_times = {"warmup": time.time()}
+        # Allow monitor to be globally disabled by an environment variable
+        if monitor is None:
+            if os.environ.get("LEMONADE_BUILD_MONITOR") == "False":
+                monitor_setting = False
+            else:
+                monitor_setting = True
+        else:
+            monitor_setting = monitor
+        # Create a build directory in the cache
+        fs.make_build_dir(state.cache_dir, state.build_name)
+        # Start profilers
+        build_dir = build.output_dir(state.cache_dir, state.build_name)
+        for profiler in self.profilers:
+            profiler.start(build_dir)
+        self.show_monitor(state, monitor_setting)
+        if state.build_status == build.FunctionStatus.SUCCESSFUL:
+            msg = """
+            build_model() is running a build on a model that already built successfully, which
+            should not happen because the build should have loaded from cache or rebuilt from scratch.
+            If you are using custom tools and Sequences then you have some debugging to do. Otherwise,
+            please file an issue at https://github.com/lemonade-sdk/lemonade/issues
+            """
+            raise exp.Error(msg)
+        # Keep a copy of any stats we loaded from disk, in case we need to
+        # restore them later
+        saved_stats = copy.deepcopy(fs.Stats(state.cache_dir, state.build_name).stats)
+        # Save build name to stats so it shows up on reports
+        state.save_stat(fs.Keys.BUILD_NAME, state.build_name)
+        # Indicate that the build is running. If the build fails for any reason,
+        # we will try to catch the exception and note it in the stats.
+        # If a concluded build still has a status of "running", this means
+        # there was an uncaught exception.
+        state.save_stat(fs.Keys.BUILD_STATUS, build.FunctionStatus.INCOMPLETE)
+        # Save a timestamp so that we know the order of builds within a cache
+        pacific_tz = pytz.timezone("America/Los_Angeles")
+        state.save_stat(
+            fs.Keys.TIMESTAMP,
+            datetime.now(pacific_tz),
+        )
+        # Save the system information used for this build
+        system_info = get_system_info_dict()
+        state.save_stat(
+            fs.Keys.SYSTEM_INFO,
+            system_info,
+        )
+        # Collect telemetry for the build
+        state.save_stat(
+            fs.Keys.SELECTED_SEQUENCE_OF_TOOLS,
+            self.tool_names,
+        )
+        # At the beginning of a sequence no tool has started
+        for tool in self.tools:
+            state.save_stat(tool.status_key, build.FunctionStatus.NOT_STARTED)
+            state.save_stat(tool.duration_key, "-")
+            state.save_stat(tool.memory_key, "-")
+        # Save any additional stats passed in via arguments
+        if stats_to_save:
+            for stat_key, stat_value in stats_to_save.items():
+                state.save_stat(stat_key, stat_value)
+        # Save initial memory as a build statistic
+        state.save_stat(f"{fs.Keys.TOOL_MEMORY}:__init__", self._get_mem_usage_str())
+        # Run the build
+        saved_exception = None
+        for tool, argv in self.tools.items():
+            start_time = time.time()
+            start_times[tool.unique_name] = start_time
+            # Inform profiler of name of tool about to start
+            for profiler in self.profilers:
+                profiler.tool_starting(tool.unique_name)
+            try:
+                # Set status as incomplete, since tool just started
+                state.save_stat(tool.status_key, build.FunctionStatus.INCOMPLETE)
+                # Collect telemetry about the tool
+                state.current_build_tool = tool.unique_name
+                # Run the tool
+                state = tool.parse_and_run(state, argv, monitor_setting)
+                # Save the state so that it can be assessed for a cache hit
+                state.save()
+            except exp.SkipBuild as e:
+                # SkipBuild is a special exception, which means that a build
+                # was loaded from disk, then we realized we want to skip it.
+                # In order to preserve the original stats and state of the build,
+                # we need to restore the stats file to what it was at the beginning
+                # of this function call. We also need to avoid calling state.save().
+                # Restore the prior stats
+                fs.save_yaml(
+                    saved_stats, fs.Stats(state.cache_dir, state.build_name).file
+                )
+                # Advance the cursor below the monitor so
+                # we can print a message
+                self._advance_cursor(tool.unique_name)
+                printing.log_warning(str(e))
+                return
+            # Broad exception is desirable as we want to capture
+            # all exceptions (including those we can't anticipate)
+            except Exception as e:  # pylint: disable=broad-except
+                if os.environ.get("LEMONADE_DEBUG", "").lower() == "true":
+                    # It may be useful to raise the exception here, since
+                    # if any of the subsequent lines of code raise another
+                    # exception it will be very hard to root cause e.
+                    raise e
+                # Update tool and build status
+                state.save_stat(tool.status_key, build.FunctionStatus.ERROR)
+                state.save_stat(fs.Keys.BUILD_STATUS, build.FunctionStatus.ERROR)
+                # Save the log file for the failed tool to stats for easy reference
+                stats = fs.Stats(state.cache_dir, state.build_name)
+                stats.save_eval_error_log(tool.logfile_path)
+                # Advance the cursor below the monitor so
+                # we can print a message
+                self._advance_cursor(tool.unique_name)
+                if vars(state).get("invocation_info"):
+                    state.invocation_info.status_message = f"Error: {e}"
+                    state.invocation_info.status_message_color = printing.Colors.WARNING
+                else:
+                    printing.log_error(e)
+                # We will raise this exception after we capture as many statistics
+                # about the build as possible
+                saved_exception = e
+                # Don't run any more tools
+                break
+            else:
+                # Update tool Status
+                state.save_stat(tool.status_key, build.FunctionStatus.SUCCESSFUL)
+                state.current_build_tool = None
+            finally:
+                # Store tool duration
+                execution_time = time.time() - start_time
+                state.save_stat(tool.duration_key, execution_time)
+                # Store current memory and peak working memory
+                state.save_stat(tool.memory_key, self._get_mem_usage_str())
+                # Inform profilers that tool has finished
+                for profiler in self.profilers:
+                    profiler.tool_stopping()
+        start_times["cool down"] = time.time()
+        # Tell the profilers to stop gathering data
+        for profiler in self.profilers:
+            profiler.stop()
+        if not saved_exception:
+            state.build_status = build.FunctionStatus.SUCCESSFUL
+            state.save_stat(fs.Keys.BUILD_STATUS, build.FunctionStatus.SUCCESSFUL)
+            if vars(state).get("invocation_info"):
+                state.invocation_info.status_message = (
+                    f"Successful build! {state.invocation_info.extra_status}"
+                )
+                state.invocation_info.status_message_color = printing.Colors.OKGREEN
+        # Generate profiler output
+        for profiler in self.profilers:
+            profiler.generate_results(state, timestamp, start_times)
+        if vars(state).get("models_found") and vars(state).get("invocation_info"):
+            # Present status statistics from the tools
+            for tool in self.tools:
+                state.invocation_info.stats_keys += tool.status_stats
+            # Present status statistics from the profilers
+            for profiler in self.profilers:
+                state.invocation_info.stats_keys += profiler.status_stats
+            print()
+            status.recursive_print(
+                models_found=state.models_found,
+                build_name=state.build_name,
+                cache_dir=state.cache_dir,
+                parent_model_hash=None,
+                parent_invocation_hash=None,
+                script_names_visited=[],
+            )
+        if lean_cache:
+            printing.log_info("Removing build artifacts...")
+            fs.clean_output_dir(state.cache_dir, state.build_name)
+        state.save()
+        if saved_exception:
+            raise saved_exception
+        printing.log_success(
+            f"\n    Saved to **{build.output_dir(state.cache_dir, state.build_name)}**"
+        )
+        return state
+    def status_line(self, verbosity):
+        """
+        Print a status line in the monitor for every tool in the sequence
+        """
+        for tool in self.tools:
+            tool.status_line(successful=None, verbosity=verbosity)
+    @property
+    def info(self) -> Dict[str, Dict]:
+        """
+        Return a dictionary of tool_name:argv for the sequence
+        """
+        return {tool.__class__.unique_name: argv for tool, argv in self.tools.items()}
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD