PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl - Mend

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show

guidellm/__init__.py +38 -6
guidellm/__main__.py +294 -0
guidellm/backend/__init__.py +19 -6
guidellm/backend/backend.py +238 -0
guidellm/backend/openai.py +532 -122
guidellm/backend/response.py +132 -0
guidellm/benchmark/__init__.py +73 -0
guidellm/benchmark/aggregator.py +760 -0
guidellm/benchmark/benchmark.py +838 -0
guidellm/benchmark/benchmarker.py +334 -0
guidellm/benchmark/entrypoints.py +141 -0
guidellm/benchmark/output.py +946 -0
guidellm/benchmark/profile.py +409 -0
guidellm/benchmark/progress.py +720 -0
guidellm/config.py +34 -56
guidellm/data/__init__.py +4 -0
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +22 -0
guidellm/dataset/creator.py +213 -0
guidellm/dataset/entrypoints.py +42 -0
guidellm/dataset/file.py +90 -0
guidellm/dataset/hf_datasets.py +62 -0
guidellm/dataset/in_memory.py +132 -0
guidellm/dataset/synthetic.py +262 -0
guidellm/objects/__init__.py +18 -0
guidellm/objects/pydantic.py +60 -0
guidellm/objects/statistics.py +947 -0
guidellm/request/__init__.py +12 -10
guidellm/request/loader.py +281 -0
guidellm/request/request.py +79 -0
guidellm/scheduler/__init__.py +51 -3
guidellm/scheduler/result.py +137 -0
guidellm/scheduler/scheduler.py +382 -0
guidellm/scheduler/strategy.py +493 -0
guidellm/scheduler/types.py +7 -0
guidellm/scheduler/worker.py +511 -0
guidellm/utils/__init__.py +16 -29
guidellm/utils/colors.py +8 -0
guidellm/utils/hf_transformers.py +35 -0
guidellm/utils/random.py +43 -0
guidellm/utils/text.py +118 -357
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
guidellm/backend/base.py +0 -320
guidellm/core/__init__.py +0 -24
guidellm/core/distribution.py +0 -190
guidellm/core/report.py +0 -321
guidellm/core/request.py +0 -44
guidellm/core/result.py +0 -545
guidellm/core/serializable.py +0 -169
guidellm/executor/__init__.py +0 -10
guidellm/executor/base.py +0 -213
guidellm/executor/profile_generator.py +0 -343
guidellm/main.py +0 -336
guidellm/request/base.py +0 -194
guidellm/request/emulated.py +0 -391
guidellm/request/file.py +0 -76
guidellm/request/transformers.py +0 -100
guidellm/scheduler/base.py +0 -374
guidellm/scheduler/load_generator.py +0 -196
guidellm/utils/injector.py +0 -70
guidellm/utils/progress.py +0 -196
guidellm/utils/transformers.py +0 -151
guidellm-0.1.0.dist-info/RECORD +0 -35
guidellm-0.1.0.dist-info/entry_points.txt +0 -3
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0

guidellm/scheduler/base.py DELETED Viewed

@@ -1,374 +0,0 @@
-import asyncio
-import math
-import time
-from dataclasses import dataclass
-from typing import AsyncGenerator, Literal, Optional, Union, get_args
-from loguru import logger
-from guidellm.backend import Backend
-from guidellm.config import settings
-from guidellm.core import (
-    TextGenerationBenchmark,
-    TextGenerationError,
-    TextGenerationRequest,
-    TextGenerationResult,
-)
-from guidellm.request import RequestGenerator
-from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator
-__all__ = ["Scheduler", "SchedulerResult"]
-@dataclass
-class SchedulerResult:
-    """
-    Represents the result of a single task execution within the Scheduler.
-    :param completed: Indicates if the task is completed.
-    :type completed: bool
-    :param count_total: Total number of tasks to be executed.
-    :type count_total: int
-    :param count_completed: Number of tasks that have been completed so far.
-    :type count_completed: int
-    :param report: Benchmark data for the task execution.
-    :type benchmark: TextGenerationBenchmark
-    :param current_result: The result of the current request, if any.
-    :type current_result: Optional[Union[TextGenerationResult, Exception]]
-    """
-    completed: bool
-    count_total: int
-    count_completed: int
-    benchmark: TextGenerationBenchmark
-    current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None
-class Scheduler:
-    """
-    Schedules and manages the execution of tasks for text generation requests.
-    :param generator: The request generator that produces text generation requests.
-    :type generator: RequestGenerator
-    :param worker: The backend worker that processes the requests.
-    :type worker: Backend
-    :param mode: The mode of load generation (e.g., synchronous, asynchronous).
-    :type mode: LoadGenerationMode
-    :param rate: The rate at which requests are generated, if applicable.
-    :type rate: Optional[float]
-    :param max_number: Maximum number of requests to be processed.
-    :type max_number: Optional[int]
-    :param max_duration: Maximum duration in seconds for which requests
-        should be processed.
-    :type max_duration: Optional[float]
-    :raises ValueError: If neither max_number nor max_duration is specified or
-        if they are not positive.
-    """
-    def __init__(
-        self,
-        generator: RequestGenerator,
-        worker: Backend,
-        mode: LoadGenerationMode = "synchronous",
-        rate: Optional[float] = None,
-        max_number: Optional[int] = None,
-        max_duration: Optional[float] = None,
-    ):
-        logger.info(
-            "Scheduler initialized with params: generator={}, worker={}, mode={}, "
-            "rate={}, max_number={}, max_duration={}",
-            generator,
-            worker,
-            mode,
-            rate,
-            max_number,
-            max_duration,
-        )
-        if mode not in get_args(LoadGenerationMode):
-            err = ValueError(
-                f"{mode} is not a valid Load Generation Mode. "
-                f"Valid options are {get_args(LoadGenerationMode)}"
-            )
-            logger.error(err)
-            raise err
-        if not max_number and not max_duration:
-            err = ValueError("Either max_number or max_duration must be specified")
-            logger.error(err)
-            raise err
-        if max_number and max_number <= 0:
-            err = ValueError(f"max_number must be > 0, given: {max_number}")
-            logger.error(err)
-            raise err
-        if max_duration and max_duration <= 0:
-            err = ValueError(f"max_duration must be > 0, given: {max_duration}")
-            logger.error(err)
-            raise err
-        if mode in ["constant", "poisson"] and not rate:
-            err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
-            logger.error(err)
-            raise err
-        self._generator = generator
-        self._worker = worker
-        self._mode = mode
-        self._rate = rate
-        self._max_number = max_number
-        self._max_duration = max_duration
-        self._load_generator = LoadGenerator(mode, rate)
-    @property
-    def generator(self) -> RequestGenerator:
-        """
-        The request generator that produces text generation requests.
-        :return: The request generator instance.
-        :rtype: RequestGenerator
-        """
-        return self._generator
-    @property
-    def worker(self) -> Backend:
-        """
-        The backend worker that processes the requests.
-        :return: The backend worker instance.
-        :rtype: Backend
-        """
-        return self._worker
-    @property
-    def mode(self) -> LoadGenerationMode:
-        """
-        The mode of load generation (e.g., synchronous, asynchronous).
-        :return: The load generation mode.
-        :rtype: LoadGenerationMode
-        """
-        return self._mode
-    @property
-    def rate(self) -> Optional[float]:
-        """
-        The rate at which requests are generated, if applicable.
-        :return: The rate of request generation.
-        :rtype: Optional[float]
-        """
-        return self._rate
-    @property
-    def max_number(self) -> Optional[int]:
-        """
-        Maximum number of requests to be processed.
-        :return: The maximum number of requests.
-        :rtype: Optional[int]
-        """
-        return self._max_number
-    @property
-    def max_duration(self) -> Optional[float]:
-        """
-        Maximum duration in seconds for which requests should be processed.
-        :return: The maximum duration in seconds.
-        :rtype: Optional[float]
-        """
-        return self._max_duration
-    @property
-    def load_generator(self) -> LoadGenerator:
-        """
-        The load generator responsible for generating load based on mode and rate.
-        :return: The load generator instance.
-        :rtype: LoadGenerator
-        """
-        return self._load_generator
-    @property
-    def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]:
-        """
-        The report mode for the scheduler.
-        :return: The report mode.
-        :rtype: Literal["asynchronous", "synchronous", "throughput"]
-        """
-        if self._mode == "synchronous":
-            return "synchronous"
-        if self._mode == "throughput":
-            return "throughput"
-        return "asynchronous"
-    async def run(self) -> AsyncGenerator[SchedulerResult, None]:
-        """
-        Run the scheduler to process requests based on the configured mode, rate,
-        maximum number, and maximum duration.
-        :yield: The result of each task executed by the scheduler.
-        :rtype: Generator[SchedulerResult, None, None]
-        """
-        logger.info("Starting Scheduler run")
-        benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate)
-        start_time = time.time()
-        end_time = start_time + self.max_duration if self.max_duration else math.inf
-        max_number = float(self.max_number) if self.max_number else math.inf
-        runner = self._run_sync if self._mode == "synchronous" else self._run_async
-        count_total = (
-            self.max_number
-            if self.max_number
-            else round(self.max_duration)
-            if self.max_duration
-            else 0
-        )
-        # yield initial result for progress tracking
-        yield SchedulerResult(
-            completed=False,
-            count_total=count_total,
-            count_completed=0,
-            benchmark=benchmark,
-        )
-        run_count = 0
-        async for res in runner(benchmark, end_time, max_number):
-            run_count += 1
-            count_completed = (
-                min(run_count, self.max_number)
-                if self.max_number
-                else round(time.time() - start_time)
-                if self.max_duration
-                else 0
-            )
-            yield SchedulerResult(
-                completed=False,
-                count_total=count_total,
-                count_completed=count_completed,
-                benchmark=benchmark,
-                current_result=res,
-            )
-        logger.info("Scheduler run completed")
-        yield SchedulerResult(
-            completed=True,
-            count_total=count_total,
-            count_completed=(
-                benchmark.request_count + benchmark.error_count
-                if self.max_number
-                else round(time.time() - start_time)
-                if self.max_duration
-                else 0
-            ),
-            benchmark=benchmark,
-        )
-    async def _run_sync(
-        self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
-    ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
-        for index, (request, submit_at) in enumerate(
-            zip(self.generator, self.load_generator.times())
-        ):
-            if index >= max_number or time.time() >= end_time:
-                break
-            logger.debug(
-                "Running synchronous request={} at submit_at={}",
-                request,
-                submit_at,
-            )
-            benchmark.request_started()
-            result = await self._submit_task_coroutine(request, submit_at, end_time)
-            if result is not None:
-                benchmark.request_completed(result)
-                logger.debug("Request completed with output: {}", result)
-                yield result
-    async def _run_async(
-        self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
-    ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
-        tasks = []
-        completed = 0
-        for index, (request, submit_at) in enumerate(
-            zip(self.generator, self.load_generator.times())
-        ):
-            while (index + 1 - completed) >= settings.max_concurrency:
-                await asyncio.sleep(0.1)
-            if index >= max_number or time.time() >= end_time or submit_at >= end_time:
-                break
-            logger.debug(
-                "Running asynchronous request={} at submit_at={}",
-                request,
-                submit_at,
-            )
-            def _completed(_task: asyncio.Task) -> None:
-                nonlocal completed
-                completed += 1
-                _res = _task.result()
-                if _res:
-                    benchmark.request_completed(_res)
-                    logger.debug("Request completed: {}", _res)
-            benchmark.request_started()
-            task = asyncio.create_task(
-                self._submit_task_coroutine(request, submit_at, end_time)
-            )
-            task.add_done_callback(_completed)
-            tasks.append(task)
-            # release control to the event loop for other tasks
-            await asyncio.sleep(0.001)
-        for compl_task in asyncio.as_completed(tasks):
-            task_res = await compl_task
-            if task_res is not None:
-                yield task_res
-    async def _submit_task_coroutine(
-        self, request: TextGenerationRequest, submit_at: float, end_time: float
-    ) -> Optional[Union[TextGenerationResult, TextGenerationError]]:
-        try:
-            if submit_at > end_time:
-                logger.info(
-                    "Request {} submission time {} is greater than end time {}",
-                    request,
-                    submit_at,
-                    end_time,
-                )
-                raise asyncio.TimeoutError(
-                    f"Request submission time {submit_at} "
-                    f"is greater than end time {end_time}"
-                )
-            if submit_at > time.time():
-                await asyncio.sleep(submit_at - time.time())
-            timeout = (
-                end_time - time.time() if end_time and end_time < math.inf else None
-            )
-            return await asyncio.wait_for(self._worker.submit(request), timeout=timeout)
-        except asyncio.TimeoutError as exc:
-            logger.info("Request {} timed out: {}", request, exc)
-            return None
-        except Exception as exc:  # noqa: BLE001
-            logger.warning("Request {} failed: {}", request, exc)
-            return TextGenerationError(request=request, message=str(exc))

guidellm/scheduler/load_generator.py DELETED Viewed

@@ -1,196 +0,0 @@
-import time
-from typing import Generator, Literal, Optional, get_args
-import numpy as np
-from loguru import logger
-__all__ = ["LoadGenerationMode", "LoadGenerator"]
-LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"]
-class LoadGenerator:
-    """
-    Load Generator class that generates timestamps for load generation.
-    This class supports multiple load generation modes: "constant", "poisson",
-    "throughput", and "synchronous". Each mode has its own method for generating
-    timestamps based on the rate provided during initialization.
-    :param mode: The mode of load generation. Valid options are "constant",
-        "poisson", "throughput", and "synchronous".
-    :type mode: LoadGenerationMode
-    :param rate: The rate at which to generate timestamps. This value is
-        interpreted differently depending on the mode.
-    :type rate: float
-    :raises ValueError: If an invalid mode is provided.
-    """
-    def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None):
-        """
-        Initialize the Load Generator with the mode and rate.
-        :param mode: The mode of load generation ("constant", "poisson", "throughput",
-            or "synchronous").
-        :type mode: LoadGenerationMode
-        :param rate: The rate at which to generate timestamps. In the "constant"
-            mode, this represents the frequency of events. In the "poisson" mode,
-            it represents the average frequency.
-        :type rate: Optional[float]
-        """
-        if mode not in get_args(LoadGenerationMode):
-            error = ValueError(
-                f"{mode} is not a valid Load Generation Mode. "
-                f"Valid options are {get_args(LoadGenerationMode)}"
-            )
-            logger.error(error)
-            raise error
-        if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0):
-            error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
-            logger.error(error)
-            raise error
-        self._mode = mode
-        self._rate = rate
-        logger.debug(
-            "Initialized LoadGenerator with mode: {mode}, rate: {rate}",
-            mode=mode,
-            rate=rate,
-        )
-    @property
-    def mode(self) -> LoadGenerationMode:
-        """
-        Get the mode of load generation.
-        :return: The mode of load generation.
-        :rtype: LoadGenerationMode
-        """
-        return self._mode
-    @property
-    def rate(self) -> Optional[float]:
-        """
-        Get the rate of load generation.
-        :return: The rate of load generation.
-        :rtype: Optional[float]
-        """
-        return self._rate
-    def times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps for load generation based on the selected mode.
-        :return: A generator that yields timestamps at which each load
-            should be initiated.
-        :rtype: Generator[float, None, None]
-        :raises ValueError: If the mode is invalid.
-        """
-        logger.debug(f"Generating timestamps using mode: {self._mode}")
-        if self._mode == "throughput":
-            yield from self.throughput_times()
-        elif self._mode == "constant":
-            yield from self.constant_times()
-        elif self._mode == "poisson":
-            yield from self.poisson_times()
-        elif self._mode == "synchronous":
-            yield from self.synchronous_times()
-        else:
-            logger.error(f"Invalid mode encountered: {self._mode}")
-            raise ValueError(f"Invalid mode: {self._mode}")
-    def synchronous_times(self) -> Generator[float, None, None]:
-        """
-        Generate invalid timestamps for the "synchronous" mode.
-        :return: A generator that yields a constant invalid timestamp (-1.0).
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating invalid timestamps for synchronous mode")
-        while True:
-            yield -1.0
-    def throughput_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps at the maximum rate possible, returning the current time.
-        :return: A generator that yields the current time in seconds.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating timestamps at throughput rate")
-        while True:
-            yield time.time()
-    def constant_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps at a constant rate based on the specified rate.
-        :return: A generator that yields timestamps incremented by 1/rate seconds.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating constant rate timestamps with rate: {}", self._rate)
-        if self._rate is None or self._rate == 0:
-            raise ValueError(
-                "Rate must be > 0 for constant mode, given: {}", self._rate
-            )
-        start_time = time.time()
-        time_increment = 1.0 / self._rate
-        counter = 0
-        while True:
-            yield_time = start_time + time_increment * counter
-            logger.debug(f"Yielding timestamp: {yield_time}")
-            yield yield_time
-            counter += 1
-    def poisson_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps based on a Poisson process, where the number
-        of requests to be sent per second is drawn from a Poisson distribution.
-        The inter arrival time between requests is exponentially distributed.
-        :return: A generator that yields timestamps based on a Poisson distribution.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate)
-        if self._rate is None or self._rate == 0:
-            raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate)
-        time_tracker = time.time()
-        rng = np.random.default_rng()
-        time_increment = 1.0
-        while True:
-            num_requests = rng.poisson(self._rate)
-            if num_requests == 0:
-                yield time_tracker + time_increment
-            else:
-                inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests)
-                logger.debug(
-                    "Calculated new inter-arrival times for poisson process: {}",
-                    inter_arrival_times,
-                )
-                arrival_time_tracker = time_tracker
-                for arrival_time in inter_arrival_times:
-                    arrival_time_tracker += arrival_time
-                    if arrival_time_tracker > time_tracker + time_increment:
-                        logger.debug(
-                            "Arrival time tracker: {} is greater than current time",
-                            arrival_time_tracker,
-                        )
-                        break
-                    yield arrival_time_tracker
-            time_tracker += time_increment  # Move on to the next time period

guidellm/utils/injector.py DELETED Viewed

@@ -1,70 +0,0 @@
-from pathlib import Path
-from typing import Union
-from pydantic import BaseModel
-from guidellm.config import settings
-from guidellm.utils.text import load_text
-__all__ = ["create_report", "inject_data"]
-def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
-    """
-    Creates a report from the model and saves it to the output path.
-    :param model: the model to serialize and inject
-    :type model: BaseModel
-    :param output_path: the path, either a file or a directory,
-        to save the report to. If a directory, the report will be saved
-        as "report.html" inside of the directory.
-    :type output_path: str
-    :return: the path to the saved report
-    :rtype: str
-    """
-    if not isinstance(output_path, Path):
-        output_path = Path(output_path)
-    html_content = load_text(settings.report_generation.source)
-    report_content = inject_data(
-        model,
-        html_content,
-        settings.report_generation.report_html_match,
-        settings.report_generation.report_html_placeholder,
-    )
-    if not output_path.suffix:
-        # assume directory, save as report.html
-        output_path = output_path / "report.html"
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    output_path.write_text(report_content)
-    return output_path
-def inject_data(
-    model: BaseModel,
-    html: str,
-    match: str,
-    placeholder: str,
-) -> str:
-    """
-    Injects the data from the model into the HTML while replacing the placeholder.
-    :param model: the model to serialize and inject
-    :type model: BaseModel
-    :param html: the html to inject the data into
-    :type html: str
-    :param match: the string to match in the html to find the placeholder
-    :type match: str
-    :param placeholder: the placeholder to replace with the model data
-        inside of the placeholder
-    :type placeholder: str
-    :return: the html with the model data injected
-    :rtype: str
-    """
-    model_str = model.json()
-    inject_str = match.replace(placeholder, model_str)
-    return html.replace(match, inject_str)

guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

Potentially problematic release.

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl