PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl - Mend

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show

guidellm/__init__.py +38 -6
guidellm/__main__.py +294 -0
guidellm/backend/__init__.py +19 -6
guidellm/backend/backend.py +238 -0
guidellm/backend/openai.py +532 -122
guidellm/backend/response.py +132 -0
guidellm/benchmark/__init__.py +73 -0
guidellm/benchmark/aggregator.py +760 -0
guidellm/benchmark/benchmark.py +838 -0
guidellm/benchmark/benchmarker.py +334 -0
guidellm/benchmark/entrypoints.py +141 -0
guidellm/benchmark/output.py +946 -0
guidellm/benchmark/profile.py +409 -0
guidellm/benchmark/progress.py +720 -0
guidellm/config.py +34 -56
guidellm/data/__init__.py +4 -0
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +22 -0
guidellm/dataset/creator.py +213 -0
guidellm/dataset/entrypoints.py +42 -0
guidellm/dataset/file.py +90 -0
guidellm/dataset/hf_datasets.py +62 -0
guidellm/dataset/in_memory.py +132 -0
guidellm/dataset/synthetic.py +262 -0
guidellm/objects/__init__.py +18 -0
guidellm/objects/pydantic.py +60 -0
guidellm/objects/statistics.py +947 -0
guidellm/request/__init__.py +12 -10
guidellm/request/loader.py +281 -0
guidellm/request/request.py +79 -0
guidellm/scheduler/__init__.py +51 -3
guidellm/scheduler/result.py +137 -0
guidellm/scheduler/scheduler.py +382 -0
guidellm/scheduler/strategy.py +493 -0
guidellm/scheduler/types.py +7 -0
guidellm/scheduler/worker.py +511 -0
guidellm/utils/__init__.py +16 -29
guidellm/utils/colors.py +8 -0
guidellm/utils/hf_transformers.py +35 -0
guidellm/utils/random.py +43 -0
guidellm/utils/text.py +118 -357
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
guidellm/backend/base.py +0 -320
guidellm/core/__init__.py +0 -24
guidellm/core/distribution.py +0 -190
guidellm/core/report.py +0 -321
guidellm/core/request.py +0 -44
guidellm/core/result.py +0 -545
guidellm/core/serializable.py +0 -169
guidellm/executor/__init__.py +0 -10
guidellm/executor/base.py +0 -213
guidellm/executor/profile_generator.py +0 -343
guidellm/main.py +0 -336
guidellm/request/base.py +0 -194
guidellm/request/emulated.py +0 -391
guidellm/request/file.py +0 -76
guidellm/request/transformers.py +0 -100
guidellm/scheduler/base.py +0 -374
guidellm/scheduler/load_generator.py +0 -196
guidellm/utils/injector.py +0 -70
guidellm/utils/progress.py +0 -196
guidellm/utils/transformers.py +0 -151
guidellm-0.1.0.dist-info/RECORD +0 -35
guidellm-0.1.0.dist-info/entry_points.txt +0 -3
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0

guidellm/benchmark/benchmarker.py ADDED Viewed

@@ -0,0 +1,334 @@
+import time
+import uuid
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator, Iterable
+from pathlib import Path
+from typing import (
+    Any,
+    Generic,
+    Literal,
+    Optional,
+    Union,
+)
+from pydantic import Field
+from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
+from guidellm.backend import Backend, ResponseSummary
+from guidellm.benchmark.aggregator import (
+    AggregatorT,
+    BenchmarkT,
+    GenerativeBenchmarkAggregator,
+)
+from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
+from guidellm.benchmark.profile import Profile
+from guidellm.objects import StandardBaseModel
+from guidellm.request import (
+    GenerationRequest,
+    GenerativeRequestLoaderDescription,
+    RequestLoaderDescription,
+)
+from guidellm.scheduler import (
+    GenerativeRequestsWorker,
+    RequestsWorker,
+    RequestT,
+    ResponseT,
+    Scheduler,
+    SchedulerRequestResult,
+    SchedulingStrategy,
+)
+__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
+class BenchmarkerResult(
+    StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
+):
+    type_: Literal[
+        "run_start",
+        "run_complete",
+        "scheduler_start",
+        "scheduler_update",
+        "scheduler_complete",
+        "benchmark_compiled",
+    ]
+    start_time: float
+    end_number: int
+    profile: Profile
+    current_index: int
+    current_strategy: Optional[SchedulingStrategy] = None
+    current_aggregator: Optional[AggregatorT] = None
+    current_benchmark: Optional[BenchmarkT] = None
+    current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
+class BenchmarkerStrategyLimits(StandardBaseModel):
+    requests_loader_size: Optional[int] = Field(
+        description="Size of the request loader.",
+    )
+    max_number_per_strategy: Optional[int] = Field(
+        description="Maximum number of requests to process per strategy.",
+        ge=0,
+    )
+    max_duration_per_strategy: Optional[float] = Field(
+        description="Maximum duration (in seconds) to process requests per strategy.",
+        ge=0,
+    )
+    warmup_percent_per_strategy: Optional[float] = Field(
+        description="Percentage of requests to use for warmup.",
+        ge=0,
+        le=1,
+    )
+    cooldown_percent_per_strategy: Optional[float] = Field(
+        description="Percentage of requests to use for cooldown.",
+        ge=0,
+        le=1,
+    )
+    @property
+    def max_number(self) -> Optional[int]:
+        if self.max_number_per_strategy is not None:
+            return self.max_number_per_strategy
+        if self.requests_loader_size is not None:
+            return self.requests_loader_size
+        return None
+    @property
+    def max_duration(self) -> Optional[float]:
+        return self.max_duration_per_strategy
+    @property
+    def warmup_number(self) -> Optional[int]:
+        if self.warmup_percent_per_strategy is None or self.max_number is None:
+            return None
+        return int(self.warmup_percent_per_strategy * self.max_number)
+    @property
+    def warmup_duration(self) -> Optional[float]:
+        if self.warmup_percent_per_strategy is None or self.max_duration is None:
+            return None
+        return self.warmup_percent_per_strategy * self.max_duration
+    @property
+    def cooldown_number(self) -> Optional[int]:
+        if self.cooldown_percent_per_strategy is None or self.max_number is None:
+            return None
+        return int(self.cooldown_percent_per_strategy * self.max_number)
+    @property
+    def cooldown_duration(self) -> Optional[float]:
+        if self.cooldown_percent_per_strategy is None or self.max_duration is None:
+            return None
+        return self.cooldown_percent_per_strategy * self.max_duration
+class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
+    def __init__(
+        self,
+        worker: RequestsWorker[RequestT, ResponseT],
+        request_loader: Iterable[RequestT],
+        requests_loader_description: RequestLoaderDescription,
+        benchmark_save_extras: Optional[dict[str, Any]] = None,
+    ):
+        self.worker = worker
+        self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
+            worker=worker, request_loader=request_loader
+        )
+        self.requests_loader_description = requests_loader_description
+        self.benchmark_save_extras = benchmark_save_extras
+    async def run(
+        self,
+        profile: Profile,
+        max_number_per_strategy: Optional[int],
+        max_duration_per_strategy: Optional[float],
+        warmup_percent_per_strategy: Optional[float],
+        cooldown_percent_per_strategy: Optional[float],
+    ) -> AsyncGenerator[
+        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
+    ]:
+        try:
+            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
+        except Exception:  # noqa: BLE001
+            requests_loader_size = None
+        strategy_limits = BenchmarkerStrategyLimits(
+            requests_loader_size=requests_loader_size,
+            max_number_per_strategy=max_number_per_strategy,
+            max_duration_per_strategy=max_duration_per_strategy,
+            warmup_percent_per_strategy=warmup_percent_per_strategy,
+            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
+        )
+        start_time = time.time()
+        end_number = len(profile.strategy_types)
+        current_index = -1
+        run_id = str(uuid.uuid4())
+        yield BenchmarkerResult(
+            type_="run_start",
+            start_time=start_time,
+            end_number=end_number,
+            profile=profile,
+            current_index=current_index,
+            current_strategy=None,
+            current_aggregator=None,
+            current_benchmark=None,
+            current_result=None,
+        )
+        while scheduling_strategy := profile.next_strategy():
+            current_index += 1
+            aggregator = self.create_benchmark_aggregator(
+                run_id=run_id,
+                profile=profile,
+                strategy_index=current_index,
+                strategy=scheduling_strategy,
+                limits=strategy_limits,
+            )
+            async for result in self.scheduler.run(
+                scheduling_strategy=scheduling_strategy,
+                max_number=max_number_per_strategy,
+                max_duration=max_duration_per_strategy,
+            ):
+                if result.type_ == "run_start":
+                    yield BenchmarkerResult(
+                        type_="scheduler_start",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=None,
+                    )
+                elif result.type_ == "run_complete":
+                    yield BenchmarkerResult(
+                        type_="scheduler_complete",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=None,
+                    )
+                elif isinstance(result, SchedulerRequestResult):
+                    aggregator.add_result(result)
+                    yield BenchmarkerResult(
+                        type_="scheduler_update",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=result,
+                    )
+                else:
+                    raise ValueError(f"Unexpected result type: {type(result)}")
+            benchmark: BenchmarkT = aggregator.compile()
+            profile.completed_strategy(
+                average_rate=benchmark.metrics.requests_per_second.successful.mean,
+                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
+            )
+            yield BenchmarkerResult(
+                type_="benchmark_compiled",
+                start_time=start_time,
+                end_number=end_number,
+                profile=profile,
+                current_index=current_index,
+                current_strategy=scheduling_strategy,
+                current_aggregator=None,
+                current_benchmark=benchmark,
+                current_result=None,
+            )
+        yield BenchmarkerResult(
+            type_="run_complete",
+            start_time=start_time,
+            end_number=end_number,
+            profile=profile,
+            current_index=current_index,
+            current_strategy=None,
+            current_aggregator=None,
+            current_benchmark=None,
+            current_result=None,
+        )
+    @abstractmethod
+    def create_benchmark_aggregator(
+        self,
+        run_id: str,
+        profile: Profile,
+        strategy_index: int,
+        strategy: SchedulingStrategy,
+        limits: BenchmarkerStrategyLimits,
+    ) -> AggregatorT: ...
+class GenerativeBenchmarker(
+    Benchmarker[
+        GenerativeBenchmarkAggregator,
+        GenerativeBenchmark,
+        GenerationRequest,
+        ResponseSummary,
+    ],
+):
+    def __init__(
+        self,
+        backend: Backend,
+        request_loader: Iterable[GenerationRequest],
+        request_loader_description: GenerativeRequestLoaderDescription,
+        benchmark_save_extras: Optional[dict[str, Any]] = None,
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
+        processor_args: Optional[dict[str, Any]] = None,
+    ):
+        super().__init__(
+            worker=GenerativeRequestsWorker(backend),
+            request_loader=request_loader,
+            requests_loader_description=request_loader_description,
+            benchmark_save_extras=benchmark_save_extras,
+        )
+        self.processor = processor
+        self.processor_args = processor_args
+    def create_benchmark_aggregator(
+        self,
+        run_id: str,
+        profile: Profile,
+        strategy_index: int,
+        strategy: SchedulingStrategy,
+        limits: BenchmarkerStrategyLimits,
+    ) -> GenerativeBenchmarkAggregator:
+        return GenerativeBenchmarkAggregator(
+            run_id=run_id,
+            args=BenchmarkArgs(
+                profile=profile,
+                strategy_index=strategy_index,
+                strategy=strategy,
+                max_number=limits.max_number,
+                max_duration=limits.max_duration,
+                warmup_number=limits.warmup_number,
+                warmup_duration=limits.warmup_duration,
+                cooldown_number=limits.cooldown_number,
+                cooldown_duration=limits.cooldown_duration,
+            ),
+            worker_description=self.worker.description,  # type: ignore[arg-type]
+            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
+            extras=self.benchmark_save_extras or {},
+            processor=self.processor,
+            processor_args=self.processor_args,
+        )

guidellm/benchmark/entrypoints.py ADDED Viewed

@@ -0,0 +1,141 @@
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any, Literal, Optional, Union
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from transformers import (  # type: ignore[import]
+    PreTrainedTokenizerBase,
+)
+from guidellm.backend import Backend, BackendType
+from guidellm.benchmark.benchmarker import GenerativeBenchmarker
+from guidellm.benchmark.output import (
+    GenerativeBenchmarksConsole,
+    GenerativeBenchmarksReport,
+)
+from guidellm.benchmark.profile import ProfileType, create_profile
+from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.request import GenerativeRequestLoader
+from guidellm.scheduler import StrategyType
+async def benchmark_generative_text(
+    target: str,
+    backend_type: BackendType,
+    backend_args: Optional[dict[str, Any]],
+    model: Optional[str],
+    processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
+    processor_args: Optional[dict[str, Any]],
+    data: Union[
+        str,
+        Path,
+        Iterable[Union[str, dict[str, Any]]],
+        Dataset,
+        DatasetDict,
+        IterableDataset,
+        IterableDatasetDict,
+    ],
+    data_args: Optional[dict[str, Any]],
+    data_sampler: Optional[Literal["random"]],
+    rate_type: Union[StrategyType, ProfileType],
+    rate: Optional[Union[int, float, list[Union[int, float]]]],
+    max_seconds: Optional[float],
+    max_requests: Optional[int],
+    warmup_percent: Optional[float],
+    cooldown_percent: Optional[float],
+    show_progress: bool,
+    show_progress_scheduler_stats: bool,
+    output_console: bool,
+    output_path: Optional[Union[str, Path]],
+    output_extras: Optional[dict[str, Any]],
+    output_sampling: Optional[int],
+    random_seed: int,
+) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
+    console = GenerativeBenchmarksConsole(enabled=show_progress)
+    console.print_line("Creating backend...")
+    backend = Backend.create(
+        backend_type, target=target, model=model, **(backend_args or {})
+    )
+    await backend.validate()
+    console.print_line(
+        f"Backend {backend_type} connected to {target} for model {backend.model}."
+    )
+    if processor is None:
+        processor = backend.model
+    console.print_line("Creating request loader...")
+    request_loader = GenerativeRequestLoader(
+        data=data,
+        data_args=data_args,
+        processor=processor,
+        processor_args=processor_args,
+        shuffle=data_sampler == "random",
+        iter_type=(
+            "finite"  # assume a finite dataset is our limit
+            if max_requests is None and max_seconds is None
+            else "infinite"  # default to infinite so we don't run out of data
+        ),
+        random_seed=random_seed,
+    )
+    unique_requests = request_loader.num_unique_items(raise_err=False)
+    console.print_line(
+        f"Created loader with {unique_requests} unique requests from {data}.\n\n"
+        if unique_requests > 0
+        else f"Created loader with unknown number unique requests from {data}.\n\n"
+    )
+    profile = create_profile(rate_type=rate_type, rate=rate)
+    benchmarker = GenerativeBenchmarker(
+        backend=backend,
+        request_loader=request_loader,
+        request_loader_description=request_loader.description,
+        benchmark_save_extras=output_extras,
+        processor=processor,
+        processor_args=processor_args,
+    )
+    progress = (
+        GenerativeTextBenchmarkerProgressDisplay(
+            display_scheduler_stats=show_progress_scheduler_stats
+        )
+        if show_progress
+        else None
+    )
+    report = GenerativeBenchmarksReport()
+    async for result in benchmarker.run(
+        profile=profile,
+        max_number_per_strategy=max_requests,
+        max_duration_per_strategy=max_seconds,
+        warmup_percent_per_strategy=warmup_percent,
+        cooldown_percent_per_strategy=cooldown_percent,
+    ):
+        if progress:
+            progress.update(result)
+        if result.type_ == "benchmark_compiled":
+            if result.current_benchmark is None:
+                raise ValueError("Current benchmark is None")
+            report.benchmarks.append(
+                result.current_benchmark.set_sample_size(output_sampling)
+            )
+    if output_console:
+        orig_enabled = console.enabled
+        console.enabled = True
+        console.benchmarks = report.benchmarks
+        console.print_benchmarks_metadata()
+        console.print_benchmarks_info()
+        console.print_benchmarks_stats()
+        console.enabled = orig_enabled
+    if output_path:
+        console.print_line("\nSaving benchmarks report...")
+        saved_path = report.save_file(output_path)
+        console.print_line(f"Benchmarks report saved to {saved_path}")
+    else:
+        saved_path = None
+    console.print_line("\nBenchmarking complete.")
+    return report, saved_path

guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

Potentially problematic release.

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl