PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

guidellm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (35) hide show

guidellm/__init__.py +19 -0
guidellm/backend/__init__.py +10 -0
guidellm/backend/base.py +320 -0
guidellm/backend/openai.py +168 -0
guidellm/config.py +234 -0
guidellm/core/__init__.py +24 -0
guidellm/core/distribution.py +190 -0
guidellm/core/report.py +321 -0
guidellm/core/request.py +44 -0
guidellm/core/result.py +545 -0
guidellm/core/serializable.py +169 -0
guidellm/executor/__init__.py +10 -0
guidellm/executor/base.py +213 -0
guidellm/executor/profile_generator.py +343 -0
guidellm/logger.py +83 -0
guidellm/main.py +336 -0
guidellm/request/__init__.py +13 -0
guidellm/request/base.py +194 -0
guidellm/request/emulated.py +391 -0
guidellm/request/file.py +76 -0
guidellm/request/transformers.py +100 -0
guidellm/scheduler/__init__.py +4 -0
guidellm/scheduler/base.py +374 -0
guidellm/scheduler/load_generator.py +196 -0
guidellm/utils/__init__.py +40 -0
guidellm/utils/injector.py +70 -0
guidellm/utils/progress.py +196 -0
guidellm/utils/text.py +455 -0
guidellm/utils/transformers.py +151 -0
guidellm-0.1.0.dist-info/LICENSE +201 -0
guidellm-0.1.0.dist-info/METADATA +434 -0
guidellm-0.1.0.dist-info/RECORD +35 -0
guidellm-0.1.0.dist-info/WHEEL +5 -0
guidellm-0.1.0.dist-info/entry_points.txt +3 -0
guidellm-0.1.0.dist-info/top_level.txt +1 -0

guidellm/executor/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .base import Executor, ExecutorResult
+from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator
+__all__ = [
+    "Executor",
+    "ExecutorResult",
+    "Profile",
+    "ProfileGenerationMode",
+    "ProfileGenerator",
+]

guidellm/executor/base.py ADDED Viewed

@@ -0,0 +1,213 @@
+from dataclasses import dataclass
+from typing import AsyncGenerator, Optional, Sequence, Union
+from loguru import logger
+from guidellm.backend import Backend
+from guidellm.core import TextGenerationBenchmarkReport
+from guidellm.executor.profile_generator import (
+    Profile,
+    ProfileGenerationMode,
+    ProfileGenerator,
+)
+from guidellm.request import RequestGenerator
+from guidellm.scheduler import Scheduler, SchedulerResult
+__all__ = ["Executor", "ExecutorResult"]
+@dataclass
+class ExecutorResult:
+    """
+    Data class representing the result of executing tasks in the Executor.
+    :param completed: Indicates whether all tasks have completed.
+    :type completed: bool
+    :param count_total: Total number of profiles.
+    :type count_total: int
+    :param count_completed: Number of completed profiles.
+    :type count_completed: int
+    :param report: A report report for text generation.
+    :type report: TextGenerationBenchmarkReport
+    :param scheduler_result: Optional scheduler result for the last task.
+    :type scheduler_result: Optional[SchedulerResult]
+    """
+    completed: bool
+    count_total: int
+    count_completed: int
+    generation_modes: Sequence[ProfileGenerationMode]
+    report: TextGenerationBenchmarkReport
+    scheduler_result: Optional[SchedulerResult] = None
+    current_index: Optional[int] = None
+    current_profile: Optional[Profile] = None
+class Executor:
+    """
+    The Executor class manages the execution of tasks based on a given profile
+    generation mode and rate. It orchestrates the interaction between the backend,
+    request generator, and profile generator, and runs benchmarks accordingly.
+    :param backend: The backend to run tasks against.
+    :type backend: Backend
+    :param request_generator: The generator that creates requests for execution.
+    :type request_generator: RequestGenerator
+    :param mode: The mode for profile generation (e.g., sweep, synchronous).
+    :type mode: ProfileGenerationMode
+    :param rate: The list of rates for load generation, or None.
+    :type rate: Optional[List[float]]
+    :param max_number: Maximum number of requests to generate for the scheduler
+        (a single report run), or None.
+    :type max_number: Optional[int]
+    :param max_duration: Maximum duration for generating requests for the scheduler,
+        (a single report run), or None.
+    :type max_duration: Optional[float]
+    """
+    def __init__(
+        self,
+        backend: Backend,
+        request_generator: RequestGenerator,
+        mode: ProfileGenerationMode = "sweep",
+        rate: Optional[Union[float, Sequence[float]]] = None,
+        max_number: Optional[int] = None,
+        max_duration: Optional[float] = None,
+    ):
+        self._backend = backend
+        self._generator = request_generator
+        self._max_number = max_number
+        self._max_duration = max_duration
+        self._profile_generator = ProfileGenerator(mode=mode, rate=rate)
+        logger.info("Executor initialized with mode: {}, rate: {}", mode, rate)
+    @property
+    def backend(self) -> Backend:
+        """
+        Returns the backend being used by the Executor.
+        :return: Backend
+        :rtype: Backend
+        """
+        return self._backend
+    @property
+    def request_generator(self) -> RequestGenerator:
+        """
+        Returns the request generator used by the Executor.
+        :return: RequestGenerator
+        :rtype: RequestGenerator
+        """
+        return self._generator
+    @property
+    def profile_generator(self) -> ProfileGenerator:
+        """
+        Returns the profile generator for generating profiles during execution.
+        :return: ProfileGenerator
+        :rtype: ProfileGenerator
+        """
+        return self._profile_generator
+    @property
+    def max_number(self) -> Optional[int]:
+        """
+        Returns the maximum number of requests to generate.
+        :return: Maximum number of requests or None.
+        :rtype: Optional[int]
+        """
+        return self._max_number
+    @property
+    def max_duration(self) -> Optional[float]:
+        """
+        Returns the maximum duration for generating requests.
+        :return: Maximum duration in seconds or None.
+        :rtype: Optional[float]
+        """
+        return self._max_duration
+    async def run(self) -> AsyncGenerator[ExecutorResult, None]:
+        """
+        Runs the Executor, generating and scheduling tasks based on the profile
+        generation mode. Yields results incrementally.
+        :rtype: AsyncGenerator[ExecutorResult, None]
+        """
+        report = TextGenerationBenchmarkReport()
+        report.args = {
+            # backend args
+            "backend_type": self.backend.type_,
+            "target": self.backend.target,
+            "model": self.backend.model,
+            # data args
+            "data_type": self.request_generator.type_,
+            "data": self.request_generator.source,
+            "tokenizer": self.request_generator.tokenizer.name_or_path,
+            # rate args
+            "mode": self.profile_generator.mode,
+            "rate": self.profile_generator.rates,
+            # limits args
+            "max_number": self.max_number,
+            "max_duration": self.max_duration,
+        }
+        profile_index = -1
+        logger.info("Starting Executor run")
+        yield ExecutorResult(
+            completed=False,
+            count_total=len(self.profile_generator),
+            count_completed=0,
+            generation_modes=self.profile_generator.profile_generation_modes,
+            report=report,
+        )
+        while profile := self.profile_generator.next(report):
+            logger.debug("Generated profile: {}", profile)
+            scheduler = Scheduler(
+                generator=self.request_generator,
+                worker=self.backend,
+                mode=profile.load_gen_mode,
+                rate=profile.load_gen_rate,
+                max_number=self.max_number or profile.args.get("max_number", None),
+                max_duration=self.max_duration,
+            )
+            profile_index += 1
+            logger.info(
+                "Scheduling tasks with mode: {}, rate: {}",
+                profile.load_gen_mode,
+                profile.load_gen_rate,
+            )
+            async for scheduler_result in scheduler.run():
+                if scheduler_result.completed:
+                    report.add_benchmark(scheduler_result.benchmark)
+                    logger.debug(
+                        "Benchmark added for scheduler result: {}",
+                        scheduler_result.benchmark,
+                    )
+                yield ExecutorResult(
+                    completed=False,
+                    count_total=len(self.profile_generator),
+                    count_completed=len(report.benchmarks),
+                    generation_modes=self.profile_generator.profile_generation_modes,
+                    report=report,
+                    scheduler_result=scheduler_result,
+                    current_index=profile_index,
+                    current_profile=profile,
+                )
+        logger.info("Executor run completed")
+        yield ExecutorResult(
+            completed=True,
+            count_total=len(self.profile_generator),
+            count_completed=len(report.benchmarks),
+            generation_modes=self.profile_generator.profile_generation_modes,
+            report=report,
+        )

guidellm/executor/profile_generator.py ADDED Viewed

@@ -0,0 +1,343 @@
+from typing import Any, Dict, Literal, Optional, Sequence, Union, get_args
+import numpy as np
+from loguru import logger
+from pydantic import Field
+from guidellm.config import settings
+from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
+from guidellm.core.serializable import Serializable
+from guidellm.scheduler import LoadGenerationMode
+__all__ = [
+    "Profile",
+    "ProfileGenerationMode",
+    "ProfileGenerator",
+]
+ProfileGenerationMode = Literal[
+    "sweep", "synchronous", "throughput", "constant", "poisson"
+]
+class Profile(Serializable):
+    """
+    A data class representing a profile for load generation.
+    :param load_gen_mode: The mode of load generation (e.g., constant, poisson).
+    :type load_gen_mode: LoadGenerationMode
+    :param load_gen_rate: The rate of load generation, if applicable.
+    :type load_gen_rate: Optional[float]
+    :param args: Additional arguments for the profile.
+    :type args: Optional[Dict[str, Any]]
+    """
+    load_gen_mode: LoadGenerationMode
+    load_gen_rate: Optional[float] = None
+    args: Dict[str, Any] = Field(default_factory=dict)
+class ProfileGenerator:
+    """
+    Generates profiles based on different load generation modes.
+    :param mode: The mode for profile generation (e.g., sweep, synchronous).
+    :type mode: ProfileGenerationMode
+    :param rate: The rate(s) for load generation; could be a float or list of floats.
+    :type rate: Optional[Union[float, Sequence[float]]]
+    """
+    def __init__(
+        self,
+        mode: ProfileGenerationMode,
+        rate: Optional[Union[float, Sequence[float]]] = None,
+    ):
+        if mode not in get_args(ProfileGenerationMode):
+            err = ValueError(
+                f"{mode} is not a valid Profile Generation Mode. "
+                f"Valid options are {get_args(ProfileGenerationMode)}"
+            )
+            logger.error(err)
+            raise err
+        self._mode = mode
+        if self._mode in ("sweep", "throughput", "synchronous"):
+            if rate is not None:
+                err = ValueError(f"Rates are not applicable for {self._mode} mode")
+                logger.error(err)
+                raise err
+            self._rates = None
+        else:
+            if not rate:
+                err = ValueError(f"Rates are required for {self._mode} mode")
+                logger.error(err)
+                raise err
+            self._rates = rate if isinstance(rate, Sequence) else [rate]
+            for rt in self._rates:
+                if rt <= 0:
+                    err = ValueError(
+                        f"Rate must be > 0 for mode: {self._mode}. Given: {rt}"
+                    )
+                    logger.error(err)
+                    raise err
+        self._generated_count = 0
+    def __len__(self) -> int:
+        """
+        Returns the number of profiles to generate based on the mode and rates.
+        :return: The number of profiles.
+        :rtype: int
+        """
+        if self._mode == "sweep":
+            return settings.num_sweep_profiles + 2
+        if self._mode in ("throughput", "synchronous"):
+            return 1
+        if not self._rates:
+            raise ValueError(f"Rates are required for {self._mode} mode")
+        return len(self._rates)
+    @property
+    def mode(self) -> ProfileGenerationMode:
+        """
+        Returns the current mode of profile generation.
+        :return: The profile generation mode.
+        :rtype: ProfileGenerationMode
+        """
+        return self._mode
+    @property
+    def rates(self) -> Optional[Sequence[float]]:
+        """
+        Returns the list of rates for load generation, if any.
+        :return: Sequence of rates or None if not applicable.
+        :rtype: Optional[Sequence[float]]
+        """
+        return self._rates
+    @property
+    def generated_count(self) -> int:
+        """
+        Returns the current count of generated profiles.
+        :return: The current count of generated profiles.
+        :rtype: int
+        """
+        return self._generated_count
+    @property
+    def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
+        """
+        Return the list of profile modes to be run in the report.
+        :return: Sequence of profile modes to be run in the report.
+        :rtype: Sequence[ProfileGenerationMode]
+        """
+        if self._mode == "sweep":
+            return ["synchronous", "throughput"] + ["constant"] * (  # type: ignore  # noqa: PGH003
+                settings.num_sweep_profiles
+            )
+        if self._mode in ["throughput", "synchronous"]:
+            return [self._mode]
+        if self._rates is None:
+            raise ValueError(f"Rates are required for {self._mode} mode")
+        if self._mode in ["constant", "poisson"]:
+            return [self._mode] * len(self._rates)
+        raise ValueError(f"Invalid mode: {self._mode}")
+    def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
+        """
+        Generates the next profile based on the current mode and report.
+        :param current_report: The current report report.
+        :type current_report: TextGenerationBenchmarkReport
+        :return: The generated profile or None if no more profiles.
+        :rtype: Optional[Profile]
+        """
+        logger.debug(
+            "Generating the next profile with mode: {}, current report: {}",
+            self.mode,
+            current_report,
+        )
+        if self.mode in ["constant", "poisson"]:
+            if not self.rates:
+                err = ValueError(f"Rates are required for {self.mode} mode")
+                logger.error(err)
+                raise err
+            profile = self.create_fixed_rate_profile(
+                self.generated_count,
+                self.mode,
+                self.rates,
+            )
+        elif self.mode == "synchronous":
+            profile = self.create_synchronous_profile(self.generated_count)
+        elif self.mode == "throughput":
+            profile = self.create_throughput_profile(self.generated_count)
+        elif self.mode == "sweep":
+            profile = self.create_sweep_profile(
+                self.generated_count,
+                sync_benchmark=current_report.benchmarks[0]
+                if current_report.benchmarks
+                else None,
+                throughput_benchmark=current_report.benchmarks[1]
+                if len(current_report.benchmarks) > 1
+                else None,
+            )
+        else:
+            err = ValueError(f"Invalid mode: {self.mode}")
+            logger.error(err)
+            raise err
+        self._generated_count += 1
+        logger.info(
+            "Generated profile: {}, total generated count: {}",
+            profile,
+            self._generated_count,
+        )
+        return profile
+    @staticmethod
+    def create_fixed_rate_profile(
+        index: int, mode: ProfileGenerationMode, rates: Sequence[float]
+    ) -> Optional[Profile]:
+        """
+        Creates a profile with a fixed rate.
+        :param index: The index of the rate in the list.
+        :type index: int
+        :param mode: The mode for profile generation (e.g., constant, poisson).
+        :type mode: ProfileGenerationMode
+        :param rates: The list of rates for load generation.
+        :type rates: Sequence[float]
+        :return: The generated profile or None if index is out of range.
+        :rtype: Optional[Profile]
+        """
+        modes_map: Dict[str, LoadGenerationMode] = {
+            "constant": "constant",
+            "poisson": "poisson",
+        }
+        if mode not in modes_map:
+            err = ValueError(f"Invalid mode: {mode}")
+            logger.error(err)
+            raise err
+        profile = (
+            Profile(
+                load_gen_mode=modes_map[mode],
+                load_gen_rate=rates[index],
+            )
+            if index < len(rates)
+            else None
+        )
+        logger.debug("Created fixed rate profile: {}", profile)
+        return profile
+    @staticmethod
+    def create_synchronous_profile(index: int) -> Optional[Profile]:
+        """
+        Creates a profile with synchronous mode.
+        :param index: The index of the profile to create.
+        :type index: int
+        :return: The generated profile or None if index is out of range.
+        :rtype: Optional[Profile]
+        """
+        profile = (
+            Profile(
+                load_gen_mode="synchronous",
+                load_gen_rate=None,
+            )
+            if index < 1
+            else None
+        )
+        logger.debug("Created synchronous profile: {}", profile)
+        return profile
+    @staticmethod
+    def create_throughput_profile(index: int) -> Optional[Profile]:
+        """
+        Creates a profile with throughput mode.
+        :param index: The index of the profile to create.
+        :type index: int
+        :return: The generated profile or None if index is out of range.
+        :rtype: Optional[Profile]
+        """
+        profile = (
+            Profile(
+                load_gen_mode="throughput",
+                load_gen_rate=None,
+            )
+            if index < 1
+            else None
+        )
+        logger.debug("Created throughput profile: {}", profile)
+        return profile
+    @staticmethod
+    def create_sweep_profile(
+        index: int,
+        sync_benchmark: Optional[TextGenerationBenchmark],
+        throughput_benchmark: Optional[TextGenerationBenchmark],
+    ) -> Optional[Profile]:
+        """
+        Creates a profile with sweep mode, generating profiles between
+        synchronous and throughput benchmarks.
+        :param index: The index of the profile to create.
+        :type index: int
+        :param sync_benchmark: The synchronous report data.
+        :type sync_benchmark: Optional[TextGenerationBenchmark]
+        :param throughput_benchmark: The throughput report data.
+        :type throughput_benchmark: Optional[TextGenerationBenchmark]
+        :return: The generated profile or None if index is out of range.
+        :rtype: Optional[Profile]
+        """
+        if index < 0 or index >= settings.num_sweep_profiles + 2:
+            return None
+        if index == 0:
+            return ProfileGenerator.create_synchronous_profile(0)
+        if not sync_benchmark:
+            err = ValueError("Synchronous report is required for sweep mode")
+            logger.error(err)
+            raise err
+        if index == 1:
+            throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0)  # type: ignore  # noqa: PGH003
+            # set the max number of requests to 5 times the number of requests
+            # incase it is not set for the sweep to limit the number of requests
+            throughput_profile.args = {"max_number": sync_benchmark.request_count * 5}
+            return throughput_profile
+        if not throughput_benchmark:
+            err = ValueError("Throughput report is required for sweep mode")
+            logger.error(err)
+            raise err
+        min_rate = sync_benchmark.completed_request_rate
+        max_rate = throughput_benchmark.completed_request_rate
+        intermediate_rates = list(
+            np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1)
+        )[1:]
+        return Profile(
+            load_gen_mode="constant",
+            load_gen_rate=intermediate_rates[index - 2],
+        )

guidellm/logger.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+Logger configuration for GuideLLM.
+This module provides a flexible logging configuration using the loguru library.
+It supports console and file logging with options to configure via environment
+variables or direct function calls.
+Environment Variables:
+    - GUIDELLM__LOGGING__DISABLED: Disable logging (default: false).
+    - GUIDELLM__LOGGING__CLEAR_LOGGERS: Clear existing loggers
+        from loguru (default: true).
+    - GUIDELLM__LOGGING__LOG_LEVEL: Log level for console logging
+        (default: none, options: DEBUG, INFO, WARNING, ERROR, CRITICAL).
+    - GUIDELLM__LOGGING__FILE: Path to the log file for file logging
+        (default: guidellm.log if log file level set else none)
+    - GUIDELLM__LOGGING__FILE_LEVEL: Log level for file logging
+        (default: INFO if log file set else none).
+Usage:
+    from guidellm import logger, configure_logger, LoggerConfig
+    # Configure metrics with default settings
+    configure_logger(
+        config=LoggingConfig
+            disabled=False,
+            clear_loggers=True,
+            console_log_level="DEBUG",
+            log_file=None,
+            log_file_level=None,
+        )
+    )
+    logger.debug("This is a debug message")
+    logger.info("This is an info message")
+"""
+import sys
+from loguru import logger
+from guidellm.config import LoggingSettings, settings
+__all__ = ["configure_logger", "logger"]
+def configure_logger(config: LoggingSettings = settings.logging):
+    """
+    Configure the metrics for LLM Compressor.
+    This function sets up the console and file logging
+    as per the specified or default parameters.
+    Note: Environment variables take precedence over the function parameters.
+    :param config: The configuration for the logger to use.
+    :type config: LoggerConfig
+    """
+    if config.disabled:
+        logger.disable("guidellm")
+        return
+    logger.enable("guidellm")
+    if config.clear_loggers:
+        logger.remove()
+    # log as a human readable string with the time, function, level, and message
+    logger.add(
+        sys.stdout,
+        level=config.console_log_level.upper(),
+        format="{time} | {function} | {level} - {message}",
+    )
+    if config.log_file or config.log_file_level:
+        log_file = config.log_file or "guidellm.log"
+        log_file_level = config.log_file_level or "INFO"
+        # log as json to the file for easier parsing
+        logger.add(log_file, level=log_file_level.upper(), serialize=True)
+# invoke logger setup on import with default values
+# enabling console logging with INFO and disabling file logging
+configure_logger()