PyPI - lemonade-sdk - Versions diffs - 9.1.1__py3-none-any.whl - Mend

lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

lemonade/__init__.py +5 -0
lemonade/api.py +180 -0
lemonade/cache.py +92 -0
lemonade/cli.py +173 -0
lemonade/common/__init__.py +0 -0
lemonade/common/build.py +176 -0
lemonade/common/cli_helpers.py +139 -0
lemonade/common/exceptions.py +98 -0
lemonade/common/filesystem.py +368 -0
lemonade/common/inference_engines.py +408 -0
lemonade/common/network.py +93 -0
lemonade/common/printing.py +110 -0
lemonade/common/status.py +471 -0
lemonade/common/system_info.py +1411 -0
lemonade/common/test_helpers.py +28 -0
lemonade/profilers/__init__.py +1 -0
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/profilers/memory_tracker.py +259 -0
lemonade/profilers/profiler.py +58 -0
lemonade/sequence.py +363 -0
lemonade/state.py +159 -0
lemonade/tools/__init__.py +1 -0
lemonade/tools/accuracy.py +432 -0
lemonade/tools/adapter.py +114 -0
lemonade/tools/bench.py +302 -0
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +305 -0
lemonade/tools/huggingface/bench.py +187 -0
lemonade/tools/huggingface/load.py +235 -0
lemonade/tools/huggingface/utils.py +359 -0
lemonade/tools/humaneval.py +264 -0
lemonade/tools/llamacpp/bench.py +255 -0
lemonade/tools/llamacpp/load.py +222 -0
lemonade/tools/llamacpp/utils.py +1260 -0
lemonade/tools/management_tools.py +319 -0
lemonade/tools/mmlu.py +319 -0
lemonade/tools/oga/__init__.py +0 -0
lemonade/tools/oga/bench.py +120 -0
lemonade/tools/oga/load.py +804 -0
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/oga/utils.py +462 -0
lemonade/tools/perplexity.py +147 -0
lemonade/tools/prompt.py +263 -0
lemonade/tools/report/__init__.py +0 -0
lemonade/tools/report/llm_report.py +203 -0
lemonade/tools/report/table.py +899 -0
lemonade/tools/server/__init__.py +0 -0
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +320 -0
lemonade/tools/server/serve.py +2123 -0
lemonade/tools/server/static/favicon.ico +0 -0
lemonade/tools/server/static/index.html +279 -0
lemonade/tools/server/static/js/chat.js +1059 -0
lemonade/tools/server/static/js/model-settings.js +183 -0
lemonade/tools/server/static/js/models.js +1395 -0
lemonade/tools/server/static/js/shared.js +556 -0
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +2654 -0
lemonade/tools/server/static/webapp.html +321 -0
lemonade/tools/server/tool_calls.py +153 -0
lemonade/tools/server/tray.py +664 -0
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/port.py +77 -0
lemonade/tools/server/utils/thread.py +85 -0
lemonade/tools/server/utils/windows_tray.py +408 -0
lemonade/tools/server/webapp.py +34 -0
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/tools/tool.py +374 -0
lemonade/version.py +1 -0
lemonade_install/__init__.py +1 -0
lemonade_install/install.py +239 -0
lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
lemonade_server/cli.py +805 -0
lemonade_server/model_manager.py +758 -0
lemonade_server/pydantic_models.py +159 -0
lemonade_server/server_models.json +643 -0
lemonade_server/settings.py +39 -0

lemonade/tools/bench.py ADDED Viewed

@@ -0,0 +1,302 @@
+from abc import ABC, abstractmethod
+import argparse
+import os
+import platform
+from lemonade.state import State
+from lemonade.tools import Tool
+from lemonade.cache import Keys
+default_iterations = 10
+default_warmup_runs = 5
+default_prompt_length = 64
+default_output_tokens = 32
+default_prompt = "Hello, I am conscious and"
+class Bench(Tool, ABC):
+    """
+    Abstract parent class for tools that benchmark the performance of the generate()
+    method of an LLM.
+    """
+    def __init__(self, monitor_message="Benchmarking LLM"):
+        super().__init__(monitor_message)
+        # The minimum set of statistics that a benchmark tool will produce
+        # Inherited tools should append any additional statistics they generate to this list
+        self.status_stats = [
+            Keys.SECONDS_TO_FIRST_TOKEN,
+            Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
+            Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
+            Keys.STD_DEV_TOKENS_PER_SECOND,
+            Keys.PREFILL_TOKENS_PER_SECOND,
+            Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
+            Keys.PROMPT_TOKENS,
+            Keys.RESPONSE_TOKENS,
+            Keys.MAX_MEMORY_USED_GBYTE,
+        ]
+        # Minimum per measurement statistics
+        # Inherited tools should add additional lists for other per prompt statistics
+        self.input_ids_len_list = []
+        self.tokens_out_len_list = []
+        self.mean_time_to_first_token_list = []
+        self.std_dev_time_to_first_token_list = []
+        self.prefill_tokens_per_second_list = []
+        self.std_dev_prefill_tokens_per_second_list = []
+        self.token_generation_tokens_per_second_list = []
+        self.std_dev_token_generation_tokens_per_second_list = []
+        self.max_memory_used_gb_list = []
+        # Max memory used can only be measured on Windows systems
+        self.save_max_memory_used = platform.system() == "Windows"
+        # This is set to True only for the duration of the first call to run_prompt
+        self.first_run_prompt = None
+    @staticmethod
+    def parser(parser: argparse.ArgumentParser = None, add_help: bool = True):
+        # Allow inherited classes to initialize and pass in a parser, add parameters to it if so
+        if parser is None:
+            parser = __class__.helpful_parser(
+                short_description="Benchmark an LLM", add_help=add_help
+            )
+        parser.add_argument(
+            "--iterations",
+            "-i",
+            required=False,
+            type=int,
+            default=default_iterations,
+            help="Number of benchmarking iterations to run (default: "
+            f"{default_iterations})",
+        )
+        parser.add_argument(
+            "--warmup-iterations",
+            "-w",
+            required=False,
+            type=int,
+            default=default_warmup_runs,
+            help="Number of benchmarking iterations to use for cache warmup "
+            "(the results of these iterations "
+            f"are not included in the results; default: {default_warmup_runs})",
+        )
+        parser.add_argument(
+            "--prompts",
+            "-p",
+            nargs="+",
+            required=False,
+            default=[str(default_prompt_length)],
+            metavar="PROMPT",
+            help="Input one or more prompts to the LLM. Three formats are supported. "
+            "1) integer: use a synthetic prompt with the specified token length "
+            "2) str: use a user-provided prompt string "
+            "3) path/to/prompt.txt: load the prompt from a text file. "
+            f"(default: {default_prompt_length}) ",
+        )
+        parser.add_argument(
+            "--output-tokens",
+            required=False,
+            type=int,
+            default=default_output_tokens,
+            help="Number of new tokens the LLM should make (default: "
+            f"{default_output_tokens})",
+        )
+        return parser
+    def get_prompt_str(self, _state, token_length):
+        """
+        Returns a string with approximately the prescribed token length.
+        Note: Actual token length is dependent on the tokenizer.
+        """
+        return "word " * (token_length - 1)
+    def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
+        """
+        Helper function to parse CLI arguments into the args expected by run()
+        """
+        parsed_args = super().parse(state, args, known_only)
+        if parsed_args.prompts is None:
+            parsed_args.prompts = [str(default_prompt_length)]
+        # Decode prompt arg into a list of prompt strings
+        prompt_strings = []
+        for prompt_item in parsed_args.prompts:
+            if prompt_item.isdigit():
+                # Generate a prompt with the requested length
+                token_length = int(prompt_item)
+                prompt_strings.append(self.get_prompt_str(state, token_length))
+            elif os.path.exists(prompt_item):
+                with open(prompt_item, "r", encoding="utf-8") as f:
+                    prompt_strings.append(f.read())
+            else:
+                # No change to the prompt
+                prompt_strings.append(prompt_item)
+        parsed_args.prompts = prompt_strings
+        return parsed_args
+    def run(
+        self,
+        state: State,
+        prompts: list[str] = None,
+        iterations: int = default_iterations,
+        warmup_iterations: int = default_warmup_runs,
+        output_tokens: int = default_output_tokens,
+        **kwargs,
+    ) -> State:
+        """
+        Args:
+            - prompts: List of input prompts used as starting points for LLM text generation
+            - iterations: number of benchmarking samples to take; results are
+                reported as the median and mean of the samples.
+            - warmup_iterations: subset of the iterations to treat as warmup,
+                and not included in the results.
+            - output_tokens: Number of new tokens LLM to create.
+            - kwargs: Additional parameters used by bench tools
+        """
+        if prompts is None:
+            prompts = ["word " * (default_prompt_length - 2)]
+        elif isinstance(prompts, str):
+            prompts = [prompts]
+        state.save_stat("prompts", prompts)
+        state.save_stat("iterations", iterations)
+        state.save_stat("warmup_iterations", warmup_iterations)
+        state.save_stat("output_tokens", output_tokens)
+        counter = 0
+        report_progress_fn = lambda x: self.set_percent_progress(
+            100 * (counter + x) / len(prompts)
+        )
+        self.first_run_prompt = True
+        for counter, prompt in enumerate(prompts):
+            report_progress_fn(0)
+            self.run_prompt(
+                state,
+                report_progress_fn,
+                prompt,
+                iterations,
+                warmup_iterations,
+                output_tokens,
+                **kwargs,
+            )
+            self.first_run_prompt = False
+        self.set_percent_progress(None)
+        self.save_stats(state)
+        return state
+    @abstractmethod
+    def run_prompt(
+        self,
+        state,
+        report_progress_fn,
+        prompt,
+        iterations,
+        warmup_iterations,
+        output_tokens,
+        **kwargs,
+    ):
+        """
+        The run_prompt method should append the appropriate value to each of the per prompt
+        measurement statistics lists that are members of the Bench class.
+        """
+    @staticmethod
+    def get_item_or_list(lst):
+        """
+        If the list is just a single item then return the item, else return the list
+        """
+        if len(lst) == 1:
+            return lst[0]
+        else:
+            return lst
+    def save_stats(self, state):
+        # Save performance data to stats
+        state.save_stat(
+            Keys.PROMPT_TOKENS, self.get_item_or_list(self.input_ids_len_list)
+        )
+        state.save_stat(
+            Keys.RESPONSE_TOKENS, self.get_item_or_list(self.tokens_out_len_list)
+        )
+        state.save_stat(
+            Keys.SECONDS_TO_FIRST_TOKEN,
+            self.get_item_or_list(self.mean_time_to_first_token_list),
+        )
+        if not all(
+            element is None for element in self.std_dev_time_to_first_token_list
+        ):
+            state.save_stat(
+                Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
+                self.get_item_or_list(self.std_dev_time_to_first_token_list),
+            )
+        state.save_stat(
+            Keys.PREFILL_TOKENS_PER_SECOND,
+            self.get_item_or_list(self.prefill_tokens_per_second_list),
+        )
+        if not all(
+            element is None for element in self.std_dev_prefill_tokens_per_second_list
+        ):
+            state.save_stat(
+                Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
+                self.get_item_or_list(self.std_dev_prefill_tokens_per_second_list),
+            )
+        state.save_stat(
+            Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
+            self.get_item_or_list(self.token_generation_tokens_per_second_list),
+        )
+        if not all(
+            element is None
+            for element in self.std_dev_token_generation_tokens_per_second_list
+        ):
+            state.save_stat(
+                Keys.STD_DEV_TOKENS_PER_SECOND,
+                self.get_item_or_list(
+                    self.std_dev_token_generation_tokens_per_second_list
+                ),
+            )
+        if self.save_max_memory_used:
+            state.save_stat(
+                Keys.MAX_MEMORY_USED_GBYTE,
+                self.get_item_or_list(self.max_memory_used_gb_list),
+            )
+    @staticmethod
+    def not_enough_tokens(output_tokens: int):
+        """
+        Raise an exception that explains why a benchmark did not produce any results
+        """
+        raise ValueError(
+            "Your model was benchmarked, however none of the benchmarking "
+            "iterations produced the requested amount of output tokens "
+            f"(currently {output_tokens}), so "
+            "the results have been discarded. You have the following options "
+            "to solve this: \n"
+            "1. Use the -p option to change the prompt to something that will "
+            "produce more output tokens. For example, 'The extremely long "
+            "story of my life, told in excruciating details is:' "
+            "is an example of a prompt that will result in a lot of output. \n"
+            "2. Set a lower value for --output-tokens to make it more likely "
+            "that the model will produce enough. \n"
+            "3. Set more verbose hyperparameters. \n"
+            "4. Run more benchmarking iterations, to improve the chance of "
+            "getting at least one with enough output tokens. \n"
+        )
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

lemonade/tools/flm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # FLM (FastFlowLM) utilities for Lemonade SDK

lemonade/tools/flm/utils.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""
+FLM (FastFlowLM) utilities for installation, version checking, and model management.
+"""
+import os
+import logging
+import subprocess
+import tempfile
+import time
+from typing import List, Optional
+import requests
+from packaging.version import Version, InvalidVersion
+def get_flm_latest_version() -> Optional[str]:
+    """
+    Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
+    This uses the GitHub tags API.
+    """
+    url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        tags = response.json()
+        if not tags:
+            return None
+        # Tags are sorted in reverse chronological order; find the first that looks like a version
+        for tag in tags:
+            tag_name = tag.get("name", "")
+            # Accept tags of the form v0.9.10, 0.9.10, etc.
+            if tag_name.startswith("v"):
+                version_candidate = tag_name[1:]
+            else:
+                version_candidate = tag_name
+            try:
+                # validate it's a version string
+                _ = Version(version_candidate)
+                return version_candidate
+            except InvalidVersion:
+                continue
+        return None
+    except requests.exceptions.RequestException as e:
+        logging.debug("Error retrieving latest FLM version: %s", e)
+        return None
+def check_flm_version() -> Optional[str]:
+    """
+    Check if FLM is installed and return version, or None if not available.
+    """
+    latest_version_str = get_flm_latest_version()
+    try:
+        result = subprocess.run(
+            ["flm", "version"],
+            capture_output=True,
+            text=True,
+            check=True,
+            encoding="utf-8",
+            errors="replace",
+        )
+        # Parse version from output like "FLM v0.9.4"
+        output = result.stdout.strip()
+        if output.startswith("FLM v"):
+            version_str = output[5:]  # Remove "FLM v" prefix
+            return version_str, latest_version_str
+        return None, latest_version_str
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None, latest_version_str
+def refresh_environment():
+    """
+    Refresh PATH to pick up newly installed executables.
+    """
+    if os.name == "nt":  # Windows
+        # On Windows, we need to refresh the PATH from registry
+        import winreg
+        try:
+            with winreg.OpenKey(
+                winreg.HKEY_LOCAL_MACHINE,
+                r"SYSTEM\CurrentControlSet\Control\Session Manager\Environment",
+            ) as key:
+                path_value, _ = winreg.QueryValueEx(key, "PATH")
+                os.environ["PATH"] = path_value + ";" + os.environ.get("PATH", "")
+        except Exception as e:  # pylint: disable=broad-except
+            logging.debug("Could not refresh PATH from registry: %s", e)
+        # Also try to add common installation paths
+        common_paths = [
+            r"C:\Program Files\FLM",
+            r"C:\Program Files (x86)\FLM",
+            os.path.expanduser(r"~\AppData\Local\FLM"),
+        ]
+        for path in common_paths:
+            if os.path.exists(path) and path not in os.environ.get("PATH", ""):
+                os.environ["PATH"] = path + ";" + os.environ.get("PATH", "")
+def install_flm():
+    """
+    Check if FLM is installed and at minimum version.
+    If not, download and run the GUI installer, then wait for completion.
+    """
+    # Check current FLM installation
+    current_version, latest_version = check_flm_version()
+    if (
+        current_version
+        and latest_version
+        and Version(current_version) == Version(latest_version)
+    ):
+        logging.info(
+            "FLM v%s is already installed and is up to date (latest version: v%s).",
+            current_version,
+            latest_version,
+        )
+        return
+    if current_version:
+        if not latest_version:
+            logging.info(
+                "Unable to detect the latest FLM version; continuing with installed FLM v%s.",
+                current_version,
+            )
+            return
+        logging.info(
+            "FLM v%s is installed but below latest version v%s. Upgrading...",
+            current_version,
+            latest_version,
+        )
+        verysilent = True
+    else:
+        logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
+        verysilent = False
+    # Download the installer
+    # pylint: disable=line-too-long
+    installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
+    installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
+    installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
+    try:
+        # Remove existing installer if present
+        if os.path.exists(installer_path):
+            os.remove(installer_path)
+        logging.info("Downloading FLM installer...")
+        response = requests.get(installer_url, stream=True, timeout=30)
+        response.raise_for_status()
+        # Save installer to disk
+        with open(installer_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+            f.flush()
+            os.fsync(f.fileno())
+        logging.info("Downloaded FLM installer to %s", installer_path)
+        # Launch the installer GUI
+        logging.warning(
+            "Launching FLM installer GUI. Please complete the installation..."
+            if not verysilent
+            else "Installing FLM..."
+        )
+        # Launch installer and wait for it to complete
+        if os.name == "nt":  # Windows
+            process = subprocess.Popen(installer_args, shell=True)
+        else:
+            process = subprocess.Popen(installer_args)
+        # Wait for installer to complete
+        process.wait()
+        if process.returncode != 0:
+            raise RuntimeError(
+                f"FLM installer failed with exit code {process.returncode}"
+            )
+        logging.info("FLM installer completed successfully")
+        # Refresh environment to pick up new PATH entries
+        refresh_environment()
+        # Wait a moment for system to update
+        time.sleep(2)
+        # Verify installation
+        max_retries = 10
+        for attempt in range(max_retries):
+            new_version, latest_version = check_flm_version()
+            if new_version and Version(new_version) == Version(latest_version):
+                logging.info("FLM v%s successfully installed and verified", new_version)
+                return
+            if attempt < max_retries - 1:
+                logging.debug(
+                    "FLM not yet available in PATH, retrying... (attempt %d/%d)",
+                    attempt + 1,
+                    max_retries,
+                )
+                time.sleep(3)
+                refresh_environment()
+        # Final check failed
+        raise RuntimeError(
+            "FLM installation completed but 'flm' command is not available in PATH. "
+            "Please ensure FLM is properly installed and available in your system PATH."
+        )
+    except requests.RequestException as e:
+        raise RuntimeError(f"Failed to download FLM installer: {e}") from e
+    except Exception as e:
+        raise RuntimeError(f"FLM installation failed: {e}") from e
+    finally:
+        # Clean up installer file
+        if os.path.exists(installer_path):
+            try:
+                os.remove(installer_path)
+            except OSError:
+                pass  # Ignore cleanup errors
+def download_flm_model(config_checkpoint, _=None, do_not_upgrade=False) -> dict:
+    """
+    Downloads the FLM model for the given configuration.
+    Args:
+        config_checkpoint: name of the FLM model to install.
+        _: placeholder for `config_mmproj`, which is standard
+            for WrappedServer (see llamacpp/utils.py) .
+        do_not_upgrade: whether to re-download the model if it is already
+            available.
+    """
+    if do_not_upgrade:
+        command = ["flm", "pull", f"{config_checkpoint}"]
+    else:
+        command = ["flm", "pull", f"{config_checkpoint}", "--force"]
+    subprocess.run(command, check=True)
+def get_flm_installed_models() -> List[str]:
+    """
+    Parse FLM model list and return installed model checkpoints.
+    Uses the improved FLM CLI methodology with --filter and --quiet flags
+    for cleaner, more reliable output parsing.
+    Returns:
+        List of installed FLM model checkpoints (e.g., ["llama3.2:1b", "gemma3:4b"])
+    """
+    try:
+        result = subprocess.run(
+            ["flm", "list", "--filter", "installed", "--quiet"],
+            capture_output=True,
+            text=True,
+            check=True,
+            encoding="utf-8",
+            errors="replace",
+        )
+        # Check if we got valid output
+        if not result.stdout:
+            return []
+        installed_checkpoints = []
+        lines = result.stdout.strip().split("\n")
+        for line in lines:
+            line = line.strip()
+            # Skip the "Models:" header line
+            if line == "Models:" or not line:
+                continue
+            # Parse model checkpoint (format: "  - modelname:tag")
+            if line.startswith("- "):
+                checkpoint = line[2:].strip()
+                installed_checkpoints.append(checkpoint)
+        return installed_checkpoints
+    except (
+        subprocess.CalledProcessError,
+        FileNotFoundError,
+        AttributeError,
+        NotADirectoryError,
+    ):
+        # FLM not installed, not available, or output parsing failed
+        return []
+def is_flm_available() -> bool:
+    """
+    Check if FLM is available and meets minimum version requirements.
+    """
+    current_version, latest_version = check_flm_version()
+    return current_version is not None and Version(current_version) == Version(
+        latest_version
+    )