PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/benchmark/entrypoints.py CHANGED Viewed

@@ -1,165 +1,598 @@
-from collections.abc import Iterable
+"""
+Primary interface for executing and re-importing generative text benchmarks.
+This module orchestrates comprehensive benchmarking workflows by coordinating backend
+initialization, data loading, profile configuration, and output generation. It provides
+two main entry points: `benchmark_generative_text` for executing new benchmarks and
+`reimport_benchmarks_report` for re-exporting existing results. The resolution functions
+convert user-provided arguments into fully configured components, handling backend
+validation, data preprocessing, profile constraints, and output format specifications.
+"""
+from __future__ import annotations
+from collections.abc import Callable, Mapping, MutableMapping
 from pathlib import Path
-from typing import Any, Literal, Optional, Union
+from typing import Any, Literal
-from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
-from transformers import (  # type: ignore[import]
-    PreTrainedTokenizerBase,
-)
+from torch.utils.data import Sampler
+from transformers import PreTrainedTokenizerBase
+from typing_extensions import TypeAliasType
-from guidellm.backend import Backend, BackendType
-from guidellm.benchmark.benchmarker import GenerativeBenchmarker
-from guidellm.benchmark.output import (
-    GenerativeBenchmarksConsole,
+from guidellm.backends import Backend, BackendType
+from guidellm.benchmark.benchmarker import Benchmarker
+from guidellm.benchmark.outputs import (
+    GenerativeBenchmarkerConsole,
+    GenerativeBenchmarkerOutput,
+)
+from guidellm.benchmark.profiles import Profile, ProfileType
+from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
+from guidellm.benchmark.schemas import (
+    BenchmarkGenerativeTextArgs,
+    GenerativeBenchmark,
+    GenerativeBenchmarkAccumulator,
     GenerativeBenchmarksReport,
 )
-from guidellm.benchmark.profile import ProfileType, create_profile
-from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
-from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
-from guidellm.request import GenerativeRequestLoader
-from guidellm.scheduler import StrategyType
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
+from guidellm.data import (
+    DataLoader,
+    DatasetPreprocessor,
+    GenerativeRequestCollator,
+    PreprocessorRegistry,
+    ProcessorFactory,
+    RequestFormatter,
+)
+from guidellm.data.preprocessors import GenerativeColumnMapper
+from guidellm.scheduler import (
+    ConstraintInitializer,
+    NonDistributedEnvironment,
+    StrategyType,
+)
+from guidellm.schemas import GenerationRequest, GenerationResponse
+from guidellm.settings import settings
+from guidellm.utils import Console, InfoMixin
+__all__ = [
+    "benchmark_generative_text",
+    "reimport_benchmarks_report",
+]
+# Type Aliases
+OutputFormatT = TypeAliasType(
+    "OutputFormatT",
+    tuple[str, ...]
+    | list[str]
+    | Mapping[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
+    | None,
+)
+"""Output format specification as strings, mappings, or configured output instances"""
+ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
+"""Processor input as model identifier, path to tokenizer, or tokenizer instance"""
+# Helper Functions
+async def resolve_backend(
+    backend: BackendType | Backend,
+    target: str,
+    model: str | None,
+    console: Console | None = None,
+    **backend_kwargs: dict[str, Any],
+) -> tuple[Backend, str]:
+    """
+    Initialize and validate a backend instance for benchmarking execution.
+    Handles backend creation from type identifiers or pre-configured instances,
+    performs startup validation, and resolves the default model if not specified.
+    The backend is shut down after validation to ensure clean state for subsequent
+    benchmark execution.
+    :param backend: Backend type identifier or pre-configured Backend instance
+    :param target: Target endpoint URL or connection string for the backend
+    :param model: Model identifier to use with the backend, or None to use default
+    :param console: Console instance for progress reporting, or None
+    :param backend_kwargs: Additional keyword arguments passed to backend initialization
+    :return: Tuple of initialized Backend instance and resolved model identifier
+    """
+    console_step = (
+        console.print_update_step(title=f"Initializing backend {backend}")
+        if console
+        else None
+    )
+    backend_instance = (
+        Backend.create(backend, target=target, model=model, **(backend_kwargs or {}))
+        if not isinstance(backend, Backend)
+        else backend
+    )
+    if console_step:
+        console_step.update(
+            f"{backend_instance.__class__.__name__} backend initialized"
+        )
+    await backend_instance.process_startup()
+    await backend_instance.validate()
+    if model is None:
+        if console_step:
+            console_step.update(
+                title="Resolving default model from backend.default_model",
+                status_level="info",
+            )
+        model = await backend_instance.default_model()
+    await backend_instance.process_shutdown()
-async def benchmark_with_scenario(scenario: Scenario, **kwargs):
+    if console_step:
+        console_step.finish(
+            title=(
+                f"{backend_instance.__class__.__name__} backend validated "
+                f"with model {model}"
+            ),
+            details=backend_instance.info,
+            status_level="success",
+        )
+    return backend_instance, model
+async def resolve_processor(
+    processor: ProcessorInputT | None,
+    model: str | None,
+    console: Console | None = None,
+) -> ProcessorInputT | None:
     """
-    Run a benchmark using a scenario and specify any extra arguments
+    Resolve the tokenization processor, defaulting to model if not provided.
+    :param processor: Processor identifier, path, tokenizer instance, or None
+    :param model: Model identifier to use as fallback processor
+    :param console: Console instance for progress reporting, or None
+    :return: Resolved processor or None if neither processor nor model provided
     """
+    console_step = (
+        console.print_update_step(title=f"Resolving processor {processor}")
+        if console
+        else None
+    )
-    if isinstance(scenario, GenerativeTextScenario):
-        return await benchmark_generative_text(**vars(scenario), **kwargs)
+    if processor is not None:
+        if console_step:
+            console_step.finish(
+                title="Processor resolved",
+                details=f"Using processor '{processor}'",
+                status_level="success",
+            )
     else:
-        raise ValueError(f"Unsupported Scenario type {type(scenario)}")
+        processor = model
+        if console_step:
+            console_step.finish(
+                title="Processor resolved",
+                details=f"Using model '{processor}' as processor",
+                status_level="success",
+            )
+    return processor
-async def benchmark_generative_text(
-    target: str,
-    backend_type: BackendType,
-    backend_args: Optional[dict[str, Any]],
-    model: Optional[str],
-    processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
-    processor_args: Optional[dict[str, Any]],
-    data: Union[
-        str,
-        Path,
-        Iterable[Union[str, dict[str, Any]]],
-        Dataset,
-        DatasetDict,
-        IterableDataset,
-        IterableDatasetDict,
-    ],
-    data_args: Optional[dict[str, Any]],
-    data_sampler: Optional[Literal["random"]],
-    rate_type: Union[StrategyType, ProfileType],
-    rate: Optional[Union[float, list[float]]],
-    max_seconds: Optional[float],
-    max_requests: Optional[int],
-    warmup_percent: Optional[float],
-    cooldown_percent: Optional[float],
-    output_path: Optional[Union[str, Path]],
-    output_extras: Optional[dict[str, Any]],
-    output_sampling: Optional[int],
+async def resolve_request_loader(
+    data: list[Any],
+    model: str,
+    data_args: list[dict[str, Any]] | None,
+    data_samples: int,
+    processor: ProcessorInputT | None,
+    processor_args: dict[str, Any] | None,
+    data_column_mapper: (
+        DatasetPreprocessor
+        | dict[str, str | list[str]]
+        | Literal["generative_column_mapper"]
+    ),
+    data_request_formatter: (RequestFormatter | dict[str, str] | str),
+    data_collator: Callable | Literal["generative"] | None,
+    data_sampler: Sampler[int] | Literal["shuffle"] | None,
+    data_num_workers: int | None,
     random_seed: int,
-    show_progress: bool = True,
-    show_progress_scheduler_stats: bool = False,
-    output_console: bool = True,
-) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
-    console = GenerativeBenchmarksConsole(enabled=show_progress)
-    console.print_line("Creating backend...")
-    backend = Backend.create(
-        backend_type, target=target, model=model, **(backend_args or {})
-    )
-    await backend.validate()
-    console.print_line(
-        f"Backend {backend_type} connected to {target} for model {backend.model}."
+    console: Console | None = None,
+    **dataloader_kwargs: dict[str, Any] | None,
+) -> DataLoader[GenerationRequest]:
+    """
+    Construct a DataLoader for GenerationRequest objects from raw data inputs.
+    Initializes and configures the data pipeline including column mapping, request
+    formatting, collation, and sampling. Resolves string-based preprocessor identifiers
+    from the PreprocessorRegistry and creates appropriate instances with provided
+    configurations.
+    :param data: List of data sources to load requests from
+    :param model: Model identifier for request formatting
+    :param data_args: Arguments for each data source in the data list
+    :param data_samples: Number of samples to draw from the dataset
+    :param processor: Processor for tokenization operations
+    :param processor_args: Arguments for processor initialization
+    :param data_column_mapper: Preprocessor or mapping for standardizing column names
+    :param data_request_formatter: Preprocessor or config for formatting requests
+    :param data_collator: Collation function or type for batching requests
+    :param data_sampler: Sampler instance or type for data sampling
+    :param data_num_workers: Number of worker processes for data loading
+    :param random_seed: Seed for reproducible random operations
+    :param console: Console instance for progress reporting, or None
+    :param dataloader_kwargs: Additional arguments passed to DataLoader initialization
+    :return: Configured DataLoader instance for GenerationRequest objects
+    :raises ValueError: If request formatter type is not registered in
+        PreprocessorRegistry
+    :raises TypeError: If registered request formatter is not a RequestFormatter
+        subclass
+    """
+    console_step = (
+        console.print_update_step(title=f"Initializing request loader from {data}")
+        if console
+        else None
     )
-    if processor is None:
-        processor = backend.model
+    data_column_mapper_instance: DatasetPreprocessor
+    if isinstance(data_column_mapper, DatasetPreprocessor):
+        data_column_mapper_instance = data_column_mapper
+    else:
+        column_mappings = (
+            data_column_mapper if isinstance(data_column_mapper, dict) else None
+        )
+        data_column_mapper_instance = GenerativeColumnMapper(
+            column_mappings=column_mappings  # type: ignore[arg-type]
+        )
+    data_request_formatter_instance: RequestFormatter
+    if isinstance(data_request_formatter, RequestFormatter):
+        data_request_formatter_instance = data_request_formatter
+    else:
+        if isinstance(data_request_formatter, str):
+            request_type = data_request_formatter
+            formatter_kwargs: dict[str, Any] = {}
+        else:
+            # Extract request_type from formatter dictionary
+            formatter_dict = dict(data_request_formatter)
+            request_type = formatter_dict.pop("request_type", settings.preferred_route)
+            formatter_kwargs = formatter_dict
-    console.print_line("Creating request loader...")
-    request_loader = GenerativeRequestLoader(
+        if (
+            formatter_class := PreprocessorRegistry.get_registered_object(request_type)
+        ) is None:
+            raise ValueError(
+                f"Request formatter '{request_type}' is not registered in the "
+                f"PreprocessorRegistry."
+            )
+        if not issubclass(formatter_class, RequestFormatter):
+            raise TypeError(
+                f"Request formatter '{request_type}' is not a subclass of "
+                f"RequestFormatter."
+            )
+        data_request_formatter_instance = formatter_class(
+            model=model,
+            **formatter_kwargs,
+        )
+    # Cast to proper types for the DataLoader preprocessors list
+    preprocessors_list: list[DatasetPreprocessor] = [
+        data_column_mapper_instance,
+        data_request_formatter_instance,
+    ]
+    request_loader: DataLoader[GenerationRequest] = DataLoader(
         data=data,
         data_args=data_args,
-        processor=processor,
-        processor_args=processor_args,
-        shuffle=data_sampler == "random",
-        iter_type=(
-            "finite"  # assume a finite dataset is our limit
-            if max_requests is None and max_seconds is None
-            else "infinite"  # default to infinite so we don't run out of data
+        data_samples=data_samples,
+        processor_factory=ProcessorFactory(
+            processor=processor if processor is not None else model,
+            processor_args=processor_args,
         ),
+        preprocessors=preprocessors_list,
+        collator=(
+            data_collator if callable(data_collator) else GenerativeRequestCollator()
+        ),
+        sampler=data_sampler,
+        num_workers=data_num_workers,
         random_seed=random_seed,
+        **(dataloader_kwargs or {}),
     )
-    unique_requests = request_loader.num_unique_items(raise_err=False)
-    console.print_line(
-        f"Created loader with {unique_requests} unique requests from {data}.\n\n"
-        if unique_requests > 0
-        else f"Created loader with unknown number unique requests from {data}.\n\n"
+    if console_step:
+        console_step.finish(
+            title=(
+                f"Request loader initialized with "
+                f"{data_samples if data_samples > 0 else 'inf'} "
+                "unique requests"
+            ),
+            details=InfoMixin.extract_from_obj(request_loader),
+            status_level="success",
+        )
+    return request_loader
+async def resolve_profile(
+    profile: StrategyType | ProfileType | Profile,
+    rate: list[float] | None,
+    random_seed: int,
+    rampup: float,
+    constraints: MutableMapping[str, ConstraintInitializer | Any],
+    max_seconds: int | float | None,
+    max_requests: int | None,
+    max_errors: int | None,
+    max_error_rate: float | None,
+    max_global_error_rate: float | None,
+    over_saturation: dict[str, Any] | None = None,
+    console: Console | None = None,
+) -> Profile:
+    """
+    Resolve and configure a benchmark profile with rate and constraint settings.
+    Constructs a Profile instance from type identifiers or validates pre-configured
+    profiles. Constraint parameters are merged into the constraints dictionary before
+    profile creation.
+    :param profile: Profile type identifier or pre-configured Profile instance
+    :param rate: Request rate(s) for the benchmark execution
+    :param random_seed: Seed for reproducible random operations
+    :param warmup: Warm-up phase configuration for the benchmark execution
+        (used for ramp-up duration calculation)
+    :param constraints: Dictionary of constraint initializers for benchmark limits
+    :param max_seconds: Maximum duration in seconds for the benchmark
+    :param max_requests: Maximum number of requests to process
+    :param max_errors: Maximum number of errors before stopping
+    :param max_error_rate: Maximum error rate threshold before stopping
+    :param max_global_error_rate: Maximum global error rate threshold before stopping
+    :param over_saturation: Over-saturation detection configuration (dict)
+    :param console: Console instance for progress reporting, or None
+    :return: Configured Profile instance ready for benchmarking
+    :raises ValueError: If constraints are provided with a pre-configured Profile
+    """
+    console_step = (
+        console.print_update_step(title=f"Resolving profile {profile}")
+        if console
+        else None
     )
-    profile = create_profile(rate_type=rate_type, rate=rate)
-    benchmarker = GenerativeBenchmarker(
-        backend=backend,
-        request_loader=request_loader,
-        request_loader_description=request_loader.description,
-        benchmark_save_extras=output_extras,
+    for key, val in {
+        "max_seconds": max_seconds,
+        "max_requests": max_requests,
+        "max_errors": max_errors,
+        "max_error_rate": max_error_rate,
+        "max_global_error_rate": max_global_error_rate,
+        "over_saturation": over_saturation,
+    }.items():
+        if val is not None:
+            constraints[key] = val
+    if not isinstance(profile, Profile):
+        profile = Profile.create(
+            rate_type=profile,
+            rate=rate,
+            random_seed=random_seed,
+            rampup_duration=rampup,
+            constraints={**constraints},
+        )
+    elif constraints:
+        raise ValueError(
+            "Constraints must be empty when providing a Profile instance. "
+            f"Provided constraints: {constraints} ; provided profile: {profile}"
+        )
+    elif rampup > 0.0:
+        raise ValueError(
+            "Ramp-up duration must not be set when providing a Profile instance. "
+            f"Provided rampup: {rampup} ; provided profile: {profile}"
+        )
+    if console_step:
+        console_step.finish(
+            title=f"{profile.__class__.__name__} profile resolved",
+            details=InfoMixin.extract_from_obj(profile),
+            status_level="success",
+        )
+    return profile
+async def resolve_output_formats(
+    outputs: list[str] | tuple[str],
+    output_dir: str | Path | None,
+    console: Console | None = None,
+) -> dict[str, GenerativeBenchmarkerOutput]:
+    """
+    Resolve output format specifications into configured output handler instances.
+    :param outputs: Specification of desired output files/types
+    :param output_dir: Base path for output file generation, or None for default
+    :param console: Console instance for progress reporting, or None
+    :return: Dictionary mapping format names to configured output handler instances
+    """
+    console_step = (
+        console.print_update_step(title="Resolving output formats") if console else None
+    )
+    resolved = GenerativeBenchmarkerOutput.resolve(
+        outputs=outputs, output_dir=output_dir
+    )
+    if console_step:
+        console_step.finish(
+            title="Output formats resolved",
+            details={key: str(val) for key, val in resolved.items()},
+            status_level="success",
+        )
+    return resolved
+# Main Entrypoints Functions
+async def benchmark_generative_text(
+    args: BenchmarkGenerativeTextArgs,
+    progress: GenerativeConsoleBenchmarkerProgress | None = None,
+    console: Console | None = None,
+    **constraints: str | ConstraintInitializer | Any,
+) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
+    """
+    Execute a comprehensive generative text benchmarking workflow.
+    Orchestrates the full benchmarking pipeline by resolving all components from
+    provided arguments, executing benchmark runs across configured profiles, and
+    finalizing results in specified output formats. Components include backend
+    initialization, data loading, profile configuration, and output generation.
+    :param args: Configuration arguments for the benchmark execution
+    :param progress: Progress tracker for benchmark execution, or None for no tracking
+    :param console: Console instance for status reporting, or None for silent operation
+    :param constraints: Additional constraint initializers for benchmark limits
+    :return: Tuple of GenerativeBenchmarksReport and dictionary of output format
+        results
+    """
+    backend, model = await resolve_backend(
+        backend=args.backend,
+        target=args.target,
+        model=args.model,
+        console=console,
+        **(args.backend_kwargs or {}),
+    )
+    processor = await resolve_processor(
+        processor=args.processor, model=model, console=console
+    )
+    request_loader = await resolve_request_loader(
+        data=args.data,
+        model=model,
+        data_args=args.data_args,
+        data_samples=args.data_samples,
         processor=processor,
-        processor_args=processor_args,
+        processor_args=args.processor_args,
+        data_column_mapper=args.data_column_mapper,
+        data_request_formatter=args.data_request_formatter,
+        data_collator=args.data_collator,
+        data_sampler=args.data_sampler,
+        data_num_workers=args.data_num_workers,
+        random_seed=args.random_seed,
+        console=console,
+        **(args.dataloader_kwargs or {}),
     )
-    progress = (
-        GenerativeTextBenchmarkerProgressDisplay(
-            display_scheduler_stats=show_progress_scheduler_stats
+    warmup = TransientPhaseConfig.create_from_value(args.warmup)
+    cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
+    if console:
+        console.print_update(
+            title="Resolved transient phase configurations",
+            details="\n".join(
+                [
+                    f"Warmup: {warmup}",
+                    f"Cooldown: {cooldown}",
+                    f"Rampup (Throughput/Concurrent): {args.rampup}",
+                ]
+            ),
+            status="success",
         )
-        if show_progress
-        else None
+    profile = await resolve_profile(
+        profile=args.profile,
+        rate=args.rate,
+        random_seed=args.random_seed,
+        rampup=args.rampup,
+        constraints=constraints,
+        max_seconds=args.max_seconds,
+        max_requests=args.max_requests,
+        max_errors=args.max_errors,
+        max_error_rate=args.max_error_rate,
+        max_global_error_rate=args.max_global_error_rate,
+        over_saturation=args.over_saturation,
+        console=console,
     )
-    report = GenerativeBenchmarksReport()
+    output_formats = await resolve_output_formats(
+        outputs=args.outputs, output_dir=args.output_dir, console=console
+    )
+    report = GenerativeBenchmarksReport(args=args)
+    if console:
+        console.print_update(
+            title="Setup complete, starting benchmarks...", status="success"
+        )
+        console.print("\n\n")
-    async for result in benchmarker.run(
+    benchmarker: Benchmarker[
+        GenerativeBenchmark, GenerationRequest, GenerationResponse
+    ] = Benchmarker()
+    async for benchmark in benchmarker.run(
+        accumulator_class=GenerativeBenchmarkAccumulator,
+        benchmark_class=GenerativeBenchmark,
+        requests=request_loader,
+        backend=backend,
         profile=profile,
-        max_number_per_strategy=max_requests,
-        max_duration_per_strategy=max_seconds,
-        warmup_percent_per_strategy=warmup_percent,
-        cooldown_percent_per_strategy=cooldown_percent,
+        environment=NonDistributedEnvironment(),
+        progress=progress,
+        sample_requests=args.sample_requests,
+        warmup=warmup,
+        cooldown=cooldown,
+        prefer_response_metrics=args.prefer_response_metrics,
     ):
-        if progress:
-            progress.update(result)
-        if result.type_ == "benchmark_compiled":
-            if result.current_benchmark is None:
-                raise ValueError("Current benchmark is None")
-            report.benchmarks.append(
-                result.current_benchmark.set_sample_size(output_sampling)
-            )
-    if output_console:
-        console.benchmarks = report.benchmarks
-        console.print_full_report()
+        if benchmark:
+            report.benchmarks.append(benchmark)
-    if output_path:
-        console.print_line("\nSaving benchmarks report...")
-        saved_path = report.save_file(output_path)
-        console.print_line(f"Benchmarks report saved to {saved_path}")
-    else:
-        saved_path = None
+    output_format_results = {}
+    for key, output in output_formats.items():
+        output_result = await output.finalize(report)
+        output_format_results[key] = output_result
-    console.print_line("\nBenchmarking complete.")
+    if console:
+        await GenerativeBenchmarkerConsole(console=console).finalize(report)
+        console.print("\n\n")
+        console.print_update(
+            title=(
+                "Benchmarking complete, generated "
+                f"{len(report.benchmarks)} benchmark(s)"
+            ),
+            status="success",
+        )
+        for key, value in output_format_results.items():
+            console.print_update(title=f"  {key:<8}: {value}", status="debug")
-    return report, saved_path
+    return report, output_format_results
-def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
+async def reimport_benchmarks_report(
+    file: Path,
+    output_path: Path | None,
+    output_formats: OutputFormatT = ("console", "json", "html", "csv"),
+) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
     """
-    The command-line entry point for re-importing and displaying an
-    existing benchmarks report. Can also specify
-    Assumes the file provided exists.
+    Load and re-export an existing benchmarks report in specified output formats.
+    :param file: Path to the existing benchmark report file to load
+    :param output_path: Base path for output file generation, or None for default
+    :param output_formats: Specification of desired output formats for the report
+    :return: Tuple of loaded GenerativeBenchmarksReport and dictionary of output
+        results
     """
-    console = GenerativeBenchmarksConsole(enabled=True)
-    report = GenerativeBenchmarksReport.load_file(file)
-    console.benchmarks = report.benchmarks
-    console.print_full_report()
-    if output_path:
-        console.print_line("\nSaving benchmarks report...")
-        saved_path = report.save_file(output_path)
-        console.print_line(f"Benchmarks report saved to {saved_path}")
+    console = Console()
+    with console.print_update_step(
+        title=f"Loading benchmarks from {file}..."
+    ) as console_step:
+        report = GenerativeBenchmarksReport.load_file(file)
+        console_step.finish(
+            "Import of old benchmarks complete;"
+            f" loaded {len(report.benchmarks)} benchmark(s)"
+        )
+    resolved_output_formats = await resolve_output_formats(
+        output_formats,  # type: ignore[arg-type]
+        output_path,
+        console=console,
+    )
+    output_format_results = {}
+    for key, output in resolved_output_formats.items():
+        output_result = await output.finalize(report)
+        output_format_results[key] = output_result
+    for key, value in output_format_results.items():
+        console.print_update(title=f"  {key:<8}: {value}", status="debug")
+    return report, output_format_results

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl