PyPI - guidellm - Versions diffs - 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl - Mend

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +452 -252
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +110 -0
guidellm/backends/openai.py +355 -0
guidellm/backends/response_handlers.py +455 -0
guidellm/benchmark/__init__.py +53 -39
guidellm/benchmark/benchmarker.py +150 -317
guidellm/benchmark/entrypoints.py +467 -128
guidellm/benchmark/output.py +519 -771
guidellm/benchmark/profile.py +580 -280
guidellm/benchmark/progress.py +568 -549
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas.py +2086 -0
guidellm/data/__init__.py +28 -4
guidellm/data/collators.py +16 -0
guidellm/data/deserializers/__init__.py +53 -0
guidellm/data/deserializers/deserializer.py +144 -0
guidellm/data/deserializers/file.py +222 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +348 -0
guidellm/data/loaders.py +149 -0
guidellm/data/preprocessors/__init__.py +25 -0
guidellm/data/preprocessors/formatters.py +404 -0
guidellm/data/preprocessors/mappers.py +198 -0
guidellm/data/preprocessors/preprocessor.py +31 -0
guidellm/data/processor.py +31 -0
guidellm/data/schemas.py +13 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +215 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +168 -0
guidellm/mock_server/utils.py +302 -0
guidellm/preprocess/dataset.py +23 -26
guidellm/presentation/builder.py +2 -2
guidellm/presentation/data_models.py +25 -21
guidellm/presentation/injector.py +2 -3
guidellm/scheduler/__init__.py +65 -26
guidellm/scheduler/constraints.py +1035 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +140 -368
guidellm/scheduler/schemas.py +272 -0
guidellm/scheduler/strategies.py +519 -0
guidellm/scheduler/worker.py +391 -420
guidellm/scheduler/worker_group.py +707 -0
guidellm/schemas/__init__.py +31 -0
guidellm/schemas/info.py +159 -0
guidellm/schemas/request.py +226 -0
guidellm/schemas/response.py +119 -0
guidellm/schemas/stats.py +228 -0
guidellm/{config.py → settings.py} +32 -21
guidellm/utils/__init__.py +95 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +71 -2
guidellm/utils/console.py +183 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +134 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/pydantic_utils.py +411 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/{objects → utils}/statistics.py +341 -247
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +1 -1
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
guidellm-0.4.0a169.dist-info/RECORD +95 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -705
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.4.0a21.dist-info/RECORD +0 -62
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0

guidellm/benchmark/benchmarker.py CHANGED Viewed

@@ -1,334 +1,167 @@
-import time
-import uuid
-from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator, Iterable
-from pathlib import Path
-from typing import (
-    Any,
-    Generic,
-    Literal,
-    Optional,
-    Union,
-)
+"""
+Benchmark execution orchestration and lifecycle management.
+Provides the core benchmarking engine that coordinates request scheduling,
+data aggregation, and result compilation across different execution strategies
+and environments. The Benchmarker acts as the primary workflow coordinator,
+managing the complete benchmark lifecycle from request submission through
+result compilation while supporting thread-safe singleton operations.
+"""
-from pydantic import Field
-from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
+from __future__ import annotations
-from guidellm.backend import Backend, ResponseSummary
-from guidellm.benchmark.aggregator import (
-    AggregatorT,
+import uuid
+from abc import ABC
+from collections.abc import AsyncIterator, Iterable
+from typing import Generic
+from guidellm.benchmark.profile import Profile
+from guidellm.benchmark.progress import BenchmarkerProgress
+from guidellm.benchmark.schemas import (
+    BenchmarkerArgs,
     BenchmarkT,
-    GenerativeBenchmarkAggregator,
+    EstimatedBenchmarkState,
 )
-from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
-from guidellm.benchmark.profile import Profile
-from guidellm.objects import StandardBaseModel
-from guidellm.request import (
-    GenerationRequest,
-    GenerativeRequestLoaderDescription,
-    RequestLoaderDescription,
+from guidellm.logger import logger
+from guidellm.scheduler import (
+    BackendInterface,
+    Environment,
     RequestT,
     ResponseT,
-)
-from guidellm.scheduler import (
-    GenerativeRequestsWorker,
-    RequestsWorker,
     Scheduler,
-    SchedulerRequestResult,
-    SchedulingStrategy,
 )
+from guidellm.utils import ThreadSafeSingletonMixin
-__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
+__all__ = ["Benchmarker"]
-class BenchmarkerResult(
-    StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
+class Benchmarker(
+    Generic[BenchmarkT, RequestT, ResponseT],
+    ABC,
+    ThreadSafeSingletonMixin,
 ):
-    type_: Literal[
-        "run_start",
-        "run_complete",
-        "scheduler_start",
-        "scheduler_update",
-        "scheduler_complete",
-        "benchmark_compiled",
-    ]
-    start_time: float
-    end_number: int
-    profile: Profile
-    current_index: int
-    current_strategy: Optional[SchedulingStrategy] = None
-    current_aggregator: Optional[AggregatorT] = None
-    current_benchmark: Optional[BenchmarkT] = None
-    current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
-class BenchmarkerStrategyLimits(StandardBaseModel):
-    requests_loader_size: Optional[int] = Field(
-        description="Size of the request loader.",
-    )
-    max_number_per_strategy: Optional[int] = Field(
-        description="Maximum number of requests to process per strategy.",
-        ge=0,
-    )
-    max_duration_per_strategy: Optional[float] = Field(
-        description="Maximum duration (in seconds) to process requests per strategy.",
-        ge=0,
-    )
-    warmup_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for warmup.",
-        ge=0,
-        le=1,
-    )
-    cooldown_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for cooldown.",
-        ge=0,
-        le=1,
-    )
-    @property
-    def max_number(self) -> Optional[int]:
-        if self.max_number_per_strategy is not None:
-            return self.max_number_per_strategy
-        if self.requests_loader_size is not None:
-            return self.requests_loader_size
-        return None
-    @property
-    def max_duration(self) -> Optional[float]:
-        return self.max_duration_per_strategy
-    @property
-    def warmup_number(self) -> Optional[int]:
-        if self.warmup_percent_per_strategy is None or self.max_number is None:
-            return None
-        return int(self.warmup_percent_per_strategy * self.max_number)
-    @property
-    def warmup_duration(self) -> Optional[float]:
-        if self.warmup_percent_per_strategy is None or self.max_duration is None:
-            return None
-        return self.warmup_percent_per_strategy * self.max_duration
-    @property
-    def cooldown_number(self) -> Optional[int]:
-        if self.cooldown_percent_per_strategy is None or self.max_number is None:
-            return None
-        return int(self.cooldown_percent_per_strategy * self.max_number)
+    """
+    Abstract benchmark orchestrator for request processing workflows.
-    @property
-    def cooldown_duration(self) -> Optional[float]:
-        if self.cooldown_percent_per_strategy is None or self.max_duration is None:
-            return None
-        return self.cooldown_percent_per_strategy * self.max_duration
-class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
-    def __init__(
-        self,
-        worker: RequestsWorker[RequestT, ResponseT],
-        request_loader: Iterable[RequestT],
-        requests_loader_description: RequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-    ):
-        self.worker = worker
-        self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
-            worker=worker, request_loader=request_loader
-        )
-        self.requests_loader_description = requests_loader_description
-        self.benchmark_save_extras = benchmark_save_extras
+    Coordinates execution of benchmarking runs across different scheduling
+    strategies, aggregating metrics and compiling results. Manages the complete
+    benchmark lifecycle from request submission through result compilation while
+    implementing thread-safe singleton pattern to ensure consistent state across
+    concurrent operations.
+    """
     async def run(
         self,
+        benchmark_class: type[BenchmarkT],
+        requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
+        backend: BackendInterface[RequestT, ResponseT],
         profile: Profile,
-        max_number_per_strategy: Optional[int],
-        max_duration_per_strategy: Optional[float],
-        warmup_percent_per_strategy: Optional[float],
-        cooldown_percent_per_strategy: Optional[float],
-    ) -> AsyncGenerator[
-        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
-    ]:
-        try:
-            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
-        except Exception:  # noqa: BLE001
-            requests_loader_size = None
-        strategy_limits = BenchmarkerStrategyLimits(
-            requests_loader_size=requests_loader_size,
-            max_number_per_strategy=max_number_per_strategy,
-            max_duration_per_strategy=max_duration_per_strategy,
-            warmup_percent_per_strategy=warmup_percent_per_strategy,
-            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
-        )
-        start_time = time.time()
-        end_number = len(profile.strategy_types)
-        current_index = -1
-        run_id = str(uuid.uuid4())
-        yield BenchmarkerResult(
-            type_="run_start",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-        while scheduling_strategy := profile.next_strategy():
-            current_index += 1
-            aggregator = self.create_benchmark_aggregator(
-                run_id=run_id,
-                profile=profile,
-                strategy_index=current_index,
-                strategy=scheduling_strategy,
-                limits=strategy_limits,
-            )
-            async for result in self.scheduler.run(
-                scheduling_strategy=scheduling_strategy,
-                max_number=max_number_per_strategy,
-                max_duration=max_duration_per_strategy,
-            ):
-                if result.type_ == "run_start":
-                    yield BenchmarkerResult(
-                        type_="scheduler_start",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif result.type_ == "run_complete":
-                    yield BenchmarkerResult(
-                        type_="scheduler_complete",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif isinstance(result, SchedulerRequestResult):
-                    aggregator.add_result(result)
-                    yield BenchmarkerResult(
-                        type_="scheduler_update",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=result,
-                    )
-                else:
-                    raise ValueError(f"Unexpected result type: {type(result)}")
-            benchmark: BenchmarkT = aggregator.compile()
-            profile.completed_strategy(
-                average_rate=benchmark.metrics.requests_per_second.successful.mean,
-                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
-            )
-            yield BenchmarkerResult(
-                type_="benchmark_compiled",
-                start_time=start_time,
-                end_number=end_number,
-                profile=profile,
-                current_index=current_index,
-                current_strategy=scheduling_strategy,
-                current_aggregator=None,
-                current_benchmark=benchmark,
-                current_result=None,
-            )
-        yield BenchmarkerResult(
-            type_="run_complete",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-    @abstractmethod
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> AggregatorT: ...
-class GenerativeBenchmarker(
-    Benchmarker[
-        GenerativeBenchmarkAggregator,
-        GenerativeBenchmark,
-        GenerationRequest,
-        ResponseSummary,
-    ],
-):
-    def __init__(
-        self,
-        backend: Backend,
-        request_loader: Iterable[GenerationRequest],
-        request_loader_description: GenerativeRequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
-        processor_args: Optional[dict[str, Any]] = None,
-    ):
-        super().__init__(
-            worker=GenerativeRequestsWorker(backend),
-            request_loader=request_loader,
-            requests_loader_description=request_loader_description,
-            benchmark_save_extras=benchmark_save_extras,
-        )
-        self.processor = processor
-        self.processor_args = processor_args
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> GenerativeBenchmarkAggregator:
-        return GenerativeBenchmarkAggregator(
-            run_id=run_id,
-            args=BenchmarkArgs(
-                profile=profile,
-                strategy_index=strategy_index,
-                strategy=strategy,
-                max_number=limits.max_number,
-                max_duration=limits.max_duration,
-                warmup_number=limits.warmup_number,
-                warmup_duration=limits.warmup_duration,
-                cooldown_number=limits.cooldown_number,
-                cooldown_duration=limits.cooldown_duration,
-            ),
-            worker_description=self.worker.description,  # type: ignore[arg-type]
-            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
-            extras=self.benchmark_save_extras or {},
-            processor=self.processor,
-            processor_args=self.processor_args,
-        )
+        environment: Environment,
+        data: list[Any],
+        progress: BenchmarkerProgress[BenchmarkT] | None = None,
+        sample_requests: int | None = 20,
+        warmup: float | None = None,
+        cooldown: float | None = None,
+        prefer_response_metrics: bool = True,
+    ) -> AsyncIterator[BenchmarkT]:
+        """
+        Execute benchmark runs across multiple scheduling strategies.
+        Orchestrates the complete benchmark workflow by iterating through scheduling
+        strategies from the profile, executing requests through the scheduler,
+        aggregating metrics, and compiling final benchmark results.
+        :param benchmark_class: Class for constructing final benchmark objects
+        :param requests: Request datasets for processing across strategies
+        :param backend: Backend interface for request processing
+        :param profile: Benchmark profile defining strategies and constraints
+        :param environment: Execution environment for coordination
+        :param progress: Optional progress tracker for benchmark lifecycle events
+        :param sample_requests: Number of sample requests to use for estimation
+        :param warmup: Optional warmup duration in seconds before benchmarking
+        :param cooldown: Optional cooldown duration in seconds after benchmarking
+        :param prefer_response_metrics: Whether to prefer response-based metrics over
+            request-based metrics
+        :yield: Compiled benchmark results for each strategy execution
+        :raises Exception: If benchmark execution or compilation fails
+        """
+        with self.thread_lock:
+            if progress:
+                await progress.on_initialize(profile)
+            run_id = str(uuid.uuid4())
+            strategies_generator = profile.strategies_generator()
+            strategy, constraints = next(strategies_generator)
+            while strategy is not None:
+                if progress:
+                    await progress.on_benchmark_start(strategy)
+                args = BenchmarkerArgs(
+                    run_id=run_id,
+                    run_index=len(profile.completed_strategies),
+                    sample_requests=sample_requests,
+                    warmup=warmup,
+                    cooldown=cooldown,
+                    prefer_response_metrics=prefer_response_metrics,
+                )
+                estimated_state = EstimatedBenchmarkState()
+                scheduler_state = None
+                scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
+                async for (
+                    response,
+                    request,
+                    request_info,
+                    scheduler_state,
+                ) in scheduler.run(
+                    requests=requests,
+                    backend=backend,
+                    strategy=strategy,
+                    startup_duration=warmup if warmup and warmup >= 1 else 0.0,
+                    env=environment,
+                    **constraints or {},
+                ):
+                    try:
+                        benchmark_class.update_estimate(
+                            args,
+                            estimated_state,
+                            response,
+                            request,
+                            request_info,
+                            scheduler_state,
+                        )
+                        if progress:
+                            await progress.on_benchmark_update(
+                                estimated_state, scheduler_state
+                            )
+                    except Exception as err:  # noqa: BLE001
+                        logger.error(
+                            f"Error updating benchmark estimate/progress: {err}"
+                        )
+                benchmark = benchmark_class.compile(
+                    args=args,
+                    estimated_state=estimated_state,
+                    scheduler_state=scheduler_state,
+                    profile=profile,
+                    requests=requests,
+                    backend=backend,
+                    environment=environment,
+                    strategy=strategy,
+                    constraints=constraints,
+                    data=data,
+                )
+                if progress:
+                    await progress.on_benchmark_complete(benchmark)
+                yield benchmark
+                try:
+                    strategy, constraints = strategies_generator.send(benchmark)
+                except StopIteration:
+                    strategy = None
+                    constraints = None
+            if progress:
+                await progress.on_finalize()

guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

Potentially problematic release.

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl