PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/benchmark/benchmarker.py CHANGED Viewed

@@ -1,334 +1,178 @@
-import time
-import uuid
-from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator, Iterable
-from pathlib import Path
-from typing import (
-    Any,
-    Generic,
-    Literal,
-    Optional,
-    Union,
-)
+"""
+Benchmark execution orchestration and lifecycle management.
+Provides the core benchmarking engine that coordinates request scheduling,
+data aggregation, and result compilation across execution strategies and
+environments. The Benchmarker manages the complete benchmark lifecycle from
+request submission through result compilation while implementing thread-safe
+singleton operations for consistent state management across concurrent workflows.
+"""
-from pydantic import Field
-from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
+from __future__ import annotations
-from guidellm.backend import Backend, ResponseSummary
-from guidellm.benchmark.aggregator import (
-    AggregatorT,
+import uuid
+from abc import ABC
+from collections.abc import AsyncIterator, Iterable
+from typing import Generic
+from guidellm.benchmark.profiles import Profile
+from guidellm.benchmark.progress import BenchmarkerProgress
+from guidellm.benchmark.schemas import (
+    BenchmarkAccumulatorT,
+    BenchmarkConfig,
     BenchmarkT,
-    GenerativeBenchmarkAggregator,
 )
-from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
-from guidellm.benchmark.profile import Profile
-from guidellm.objects import StandardBaseModel
-from guidellm.request import (
-    GenerationRequest,
-    GenerativeRequestLoaderDescription,
-    RequestLoaderDescription,
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
+from guidellm.logger import logger
+from guidellm.scheduler import (
+    BackendInterface,
+    Constraint,
+    Environment,
+    MultiTurnRequestT,
     RequestT,
     ResponseT,
-)
-from guidellm.scheduler import (
-    GenerativeRequestsWorker,
-    RequestsWorker,
     Scheduler,
-    SchedulerRequestResult,
     SchedulingStrategy,
 )
+from guidellm.utils import ThreadSafeSingletonMixin
+from guidellm.utils.mixins import InfoMixin
-__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
+__all__ = ["Benchmarker"]
-class BenchmarkerResult(
-    StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
+class Benchmarker(
+    Generic[BenchmarkT, RequestT, ResponseT],
+    ABC,
+    ThreadSafeSingletonMixin,
 ):
-    type_: Literal[
-        "run_start",
-        "run_complete",
-        "scheduler_start",
-        "scheduler_update",
-        "scheduler_complete",
-        "benchmark_compiled",
-    ]
-    start_time: float
-    end_number: int
-    profile: Profile
-    current_index: int
-    current_strategy: Optional[SchedulingStrategy] = None
-    current_aggregator: Optional[AggregatorT] = None
-    current_benchmark: Optional[BenchmarkT] = None
-    current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
-class BenchmarkerStrategyLimits(StandardBaseModel):
-    requests_loader_size: Optional[int] = Field(
-        description="Size of the request loader.",
-    )
-    max_number_per_strategy: Optional[int] = Field(
-        description="Maximum number of requests to process per strategy.",
-        ge=0,
-    )
-    max_duration_per_strategy: Optional[float] = Field(
-        description="Maximum duration (in seconds) to process requests per strategy.",
-        ge=0,
-    )
-    warmup_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for warmup.",
-        ge=0,
-        le=1,
-    )
-    cooldown_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for cooldown.",
-        ge=0,
-        le=1,
-    )
-    @property
-    def max_number(self) -> Optional[int]:
-        if self.max_number_per_strategy is not None:
-            return self.max_number_per_strategy
-        if self.requests_loader_size is not None:
-            return self.requests_loader_size
-        return None
-    @property
-    def max_duration(self) -> Optional[float]:
-        return self.max_duration_per_strategy
-    @property
-    def warmup_number(self) -> Optional[int]:
-        if self.warmup_percent_per_strategy is None or self.max_number is None:
-            return None
-        return int(self.warmup_percent_per_strategy * self.max_number)
-    @property
-    def warmup_duration(self) -> Optional[float]:
-        if self.warmup_percent_per_strategy is None or self.max_duration is None:
-            return None
-        return self.warmup_percent_per_strategy * self.max_duration
-    @property
-    def cooldown_number(self) -> Optional[int]:
-        if self.cooldown_percent_per_strategy is None or self.max_number is None:
-            return None
-        return int(self.cooldown_percent_per_strategy * self.max_number)
-    @property
-    def cooldown_duration(self) -> Optional[float]:
-        if self.cooldown_percent_per_strategy is None or self.max_duration is None:
-            return None
-        return self.cooldown_percent_per_strategy * self.max_duration
+    """
+    Orchestrates benchmark execution across scheduling strategies.
-class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
-    def __init__(
-        self,
-        worker: RequestsWorker[RequestT, ResponseT],
-        request_loader: Iterable[RequestT],
-        requests_loader_description: RequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-    ):
-        self.worker = worker
-        self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
-            worker=worker, request_loader=request_loader
-        )
-        self.requests_loader_description = requests_loader_description
-        self.benchmark_save_extras = benchmark_save_extras
+    Coordinates benchmarking runs by managing request scheduling, metric aggregation,
+    and result compilation. Implements a thread-safe singleton pattern to ensure
+    consistent state management across concurrent operations while supporting multiple
+    scheduling strategies and execution environments.
+    """
     async def run(
         self,
+        accumulator_class: type[BenchmarkAccumulatorT],
+        benchmark_class: type[BenchmarkT],
+        requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
+        backend: BackendInterface[RequestT, ResponseT],
         profile: Profile,
-        max_number_per_strategy: Optional[int],
-        max_duration_per_strategy: Optional[float],
-        warmup_percent_per_strategy: Optional[float],
-        cooldown_percent_per_strategy: Optional[float],
-    ) -> AsyncGenerator[
-        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
-    ]:
-        try:
-            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
-        except Exception:  # noqa: BLE001
-            requests_loader_size = None
-        strategy_limits = BenchmarkerStrategyLimits(
-            requests_loader_size=requests_loader_size,
-            max_number_per_strategy=max_number_per_strategy,
-            max_duration_per_strategy=max_duration_per_strategy,
-            warmup_percent_per_strategy=warmup_percent_per_strategy,
-            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
-        )
-        start_time = time.time()
-        end_number = len(profile.strategy_types)
-        current_index = -1
-        run_id = str(uuid.uuid4())
-        yield BenchmarkerResult(
-            type_="run_start",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-        while scheduling_strategy := profile.next_strategy():
-            current_index += 1
-            aggregator = self.create_benchmark_aggregator(
-                run_id=run_id,
-                profile=profile,
-                strategy_index=current_index,
-                strategy=scheduling_strategy,
-                limits=strategy_limits,
-            )
-            async for result in self.scheduler.run(
-                scheduling_strategy=scheduling_strategy,
-                max_number=max_number_per_strategy,
-                max_duration=max_duration_per_strategy,
-            ):
-                if result.type_ == "run_start":
-                    yield BenchmarkerResult(
-                        type_="scheduler_start",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif result.type_ == "run_complete":
-                    yield BenchmarkerResult(
-                        type_="scheduler_complete",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif isinstance(result, SchedulerRequestResult):
-                    aggregator.add_result(result)
-                    yield BenchmarkerResult(
-                        type_="scheduler_update",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=result,
-                    )
-                else:
-                    raise ValueError(f"Unexpected result type: {type(result)}")
-            benchmark: BenchmarkT = aggregator.compile()
-            profile.completed_strategy(
-                average_rate=benchmark.metrics.requests_per_second.successful.mean,
-                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
-            )
-            yield BenchmarkerResult(
-                type_="benchmark_compiled",
-                start_time=start_time,
-                end_number=end_number,
-                profile=profile,
-                current_index=current_index,
-                current_strategy=scheduling_strategy,
-                current_aggregator=None,
-                current_benchmark=benchmark,
-                current_result=None,
-            )
-        yield BenchmarkerResult(
-            type_="run_complete",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-    @abstractmethod
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> AggregatorT: ...
-class GenerativeBenchmarker(
-    Benchmarker[
-        GenerativeBenchmarkAggregator,
-        GenerativeBenchmark,
-        GenerationRequest,
-        ResponseSummary,
-    ],
-):
-    def __init__(
-        self,
-        backend: Backend,
-        request_loader: Iterable[GenerationRequest],
-        request_loader_description: GenerativeRequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
-        processor_args: Optional[dict[str, Any]] = None,
-    ):
-        super().__init__(
-            worker=GenerativeRequestsWorker(backend),
-            request_loader=request_loader,
-            requests_loader_description=request_loader_description,
-            benchmark_save_extras=benchmark_save_extras,
-        )
-        self.processor = processor
-        self.processor_args = processor_args
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> GenerativeBenchmarkAggregator:
-        return GenerativeBenchmarkAggregator(
-            run_id=run_id,
-            args=BenchmarkArgs(
-                profile=profile,
-                strategy_index=strategy_index,
-                strategy=strategy,
-                max_number=limits.max_number,
-                max_duration=limits.max_duration,
-                warmup_number=limits.warmup_number,
-                warmup_duration=limits.warmup_duration,
-                cooldown_number=limits.cooldown_number,
-                cooldown_duration=limits.cooldown_duration,
-            ),
-            worker_description=self.worker.description,  # type: ignore[arg-type]
-            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
-            extras=self.benchmark_save_extras or {},
-            processor=self.processor,
-            processor_args=self.processor_args,
-        )
+        environment: Environment,
+        warmup: TransientPhaseConfig,
+        cooldown: TransientPhaseConfig,
+        sample_requests: int | None = 20,
+        prefer_response_metrics: bool = True,
+        progress: (
+            BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
+        ) = None,
+    ) -> AsyncIterator[BenchmarkT]:
+        """
+        Execute benchmark runs across scheduling strategies in the profile.
+        :param accumulator_class: Class for accumulating metrics during execution
+        :param benchmark_class: Class for constructing final benchmark results
+        :param requests: Request datasets to process across strategies
+        :param backend: Backend interface for executing requests
+        :param profile: Profile defining scheduling strategies and constraints
+        :param environment: Environment for execution coordination
+        :param warmup: Warmup phase configuration before benchmarking
+        :param cooldown: Cooldown phase configuration after benchmarking
+        :param sample_requests: Number of requests to sample for estimation,
+            defaults to 20
+        :param prefer_response_metrics: Whether to prefer response metrics over
+            request metrics, defaults to True
+        :param progress: Optional tracker for benchmark lifecycle events
+        :yield: Compiled benchmark result for each strategy execution
+        :raises Exception: If benchmark execution or compilation fails
+        """
+        with self.thread_lock:
+            if progress:
+                await progress.on_initialize(profile)
+            run_id = str(uuid.uuid4())
+            strategies_generator = profile.strategies_generator()
+            strategy: SchedulingStrategy | None
+            constraints: dict[str, Constraint] | None
+            strategy, constraints = next(strategies_generator)
+            while strategy is not None:
+                if progress:
+                    await progress.on_benchmark_start(strategy)
+                config = BenchmarkConfig(
+                    run_id=run_id,
+                    run_index=len(profile.completed_strategies),
+                    strategy=strategy,
+                    constraints=(
+                        {
+                            key: InfoMixin.extract_from_obj(val)
+                            for key, val in constraints.items()
+                        }
+                        if isinstance(constraints, dict)
+                        else {"constraint": InfoMixin.extract_from_obj(constraints)}
+                        if constraints
+                        else {}
+                    ),
+                    sample_requests=sample_requests,
+                    warmup=warmup,
+                    cooldown=cooldown,
+                    prefer_response_metrics=prefer_response_metrics,
+                    profile=profile,
+                    requests=InfoMixin.extract_from_obj(requests),
+                    backend=InfoMixin.extract_from_obj(backend),
+                    environment=InfoMixin.extract_from_obj(environment),
+                )
+                accumulator = accumulator_class(config=config)
+                scheduler_state = None
+                scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
+                async for (
+                    response,
+                    request,
+                    request_info,
+                    scheduler_state,
+                ) in scheduler.run(
+                    requests=requests,
+                    backend=backend,
+                    strategy=strategy,
+                    env=environment,
+                    **constraints or {},
+                ):
+                    try:
+                        accumulator.update_estimate(
+                            response,
+                            request,
+                            request_info,
+                            scheduler_state,
+                        )
+                        if progress:
+                            await progress.on_benchmark_update(
+                                accumulator, scheduler_state
+                            )
+                    except Exception as err:  # noqa: BLE001
+                        logger.error(
+                            f"Error updating benchmark estimate/progress: {err}"
+                        )
+                benchmark = benchmark_class.compile(
+                    accumulator=accumulator,
+                    scheduler_state=scheduler_state,
+                )
+                if progress:
+                    await progress.on_benchmark_complete(benchmark)
+                yield benchmark
+                try:
+                    strategy, constraints = strategies_generator.send(benchmark)
+                except StopIteration:
+                    strategy = None
+                    constraints = None
+            if progress:
+                await progress.on_finalize()

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl