PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/data/schemas.py ADDED Viewed

@@ -0,0 +1,175 @@
+from __future__ import annotations
+from typing import Literal
+from pydantic import ConfigDict, Field, model_validator
+from guidellm.schemas import StandardBaseModel
+__all__ = [
+    "DataConfig",
+    "DataNotSupportedError",
+    "GenerativeDatasetColumnType",
+    "SyntheticTextDatasetConfig",
+    "SyntheticTextPrefixBucketConfig",
+]
+GenerativeDatasetColumnType = Literal[
+    "prompt_tokens_count_column",
+    "output_tokens_count_column",
+    "prefix_column",
+    "text_column",
+    "image_column",
+    "video_column",
+    "audio_column",
+]
+class DataNotSupportedError(Exception):
+    """
+    Exception raised when the data format is not supported by deserializer or config.
+    """
+class DataConfig(StandardBaseModel):
+    """
+    A generic parent class for various configs for the data package
+    that can be passed in as key-value pairs or JSON.
+    """
+class PreprocessDatasetConfig(DataConfig):
+    prompt_tokens: int = Field(
+        description="The average number of text tokens retained or added to prompts.",
+        gt=0,
+    )
+    prompt_tokens_stdev: int | None = Field(
+        description="The standard deviation of the number of tokens retained in or "
+                    "added to prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_min: int | None = Field(
+        description="The minimum number of text tokens retained or added to prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_max: int | None = Field(
+        description="The maximum number of text tokens retained or added to prompts.",
+        gt=0,
+        default=None,
+    )
+    output_tokens: int = Field(
+        description="The average number of text tokens retained or added to outputs.",
+        gt=0,
+    )
+    output_tokens_stdev: int | None = Field(
+        description="The standard deviation of the number of tokens retained or "
+                    "added to outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_min: int | None = Field(
+        description="The minimum number of text tokens retained or added to outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_max: int | None = Field(
+        description="The maximum number of text tokens retained or added to outputs.",
+        gt=0,
+        default=None,
+    )
+    prefix_tokens_max: int | None = Field(
+        description="The maximum number of text tokens left in the prefixes.",
+        gt=0,
+        default=None,
+    )
+class SyntheticTextPrefixBucketConfig(StandardBaseModel):
+    bucket_weight: int = Field(
+        description="Weight of this bucket in the overall distribution.",
+        gt=0,
+        default=100,
+    )
+    prefix_count: int = Field(
+        description="The number of unique prefixes to generate for this bucket.",
+        ge=1,
+        default=1,
+    )
+    prefix_tokens: int = Field(
+        description="The number of prefix tokens per-prompt for this bucket.",
+        ge=0,
+        default=0,
+    )
+class SyntheticTextDatasetConfig(DataConfig):
+    prompt_tokens: int = Field(
+        description="The average number of text tokens generated for prompts.",
+        gt=0,
+    )
+    prompt_tokens_stdev: int | None = Field(
+        description="The standard deviation of the tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_min: int | None = Field(
+        description="The minimum number of text tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_max: int | None = Field(
+        description="The maximum number of text tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    output_tokens: int = Field(
+        description="The average number of text tokens generated for outputs.",
+        gt=0,
+    )
+    output_tokens_stdev: int | None = Field(
+        description="The standard deviation of the tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_min: int | None = Field(
+        description="The minimum number of text tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_max: int | None = Field(
+        description="The maximum number of text tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    model_config = ConfigDict(
+        extra="allow",
+    )
+    prefix_buckets: list[SyntheticTextPrefixBucketConfig] | None = Field(
+        description="Buckets for the prefix tokens distribution.",
+        default=None,
+    )
+    @model_validator(mode="after")
+    def check_prefix_options(self) -> SyntheticTextDatasetConfig:
+        if self.__pydantic_extra__ is not None:
+            prefix_count = self.__pydantic_extra__.get("prefix_count", None)  # type: ignore[attr-defined]
+            prefix_tokens = self.__pydantic_extra__.get("prefix_tokens", None)  # type: ignore[attr-defined]
+            if prefix_count is not None or prefix_tokens is not None:
+                if self.prefix_buckets:
+                    raise ValueError(
+                        "prefix_buckets is mutually exclusive"
+                        " with prefix_count and prefix_tokens"
+                    )
+                self.prefix_buckets = [
+                    SyntheticTextPrefixBucketConfig(
+                        prefix_count=prefix_count or 1,
+                        prefix_tokens=prefix_tokens or 0,
+                    )
+                ]
+        return self

guidellm/data/utils/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .dataset import DEFAULT_SPLITS, resolve_dataset_split
+__all__ = [
+    "DEFAULT_SPLITS",
+    "resolve_dataset_split",
+]

guidellm/data/utils/dataset.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+from typing import Literal
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+__all__ = ["DEFAULT_SPLITS", "resolve_dataset_split"]
+DEFAULT_SPLITS: dict[Literal["train", "calib", "val", "test"], list[str]] = {
+    "train": [
+        "train",
+        "training",
+        "train_set",
+        "training_set",
+        "train_dataset",
+        "training_dataset",
+        "train_data",
+        "training_data",
+        "pretrain",
+        "pretrain_set",
+        "pretrain_dataset",
+        "pretrain_data",
+        "pretraining",
+    ],
+    "calib": [
+        "calibration",
+        "calib",
+        "cal",
+        "calibration_set",
+        "calib_set",
+        "cal_set",
+        "calibration_dataset",
+        "calib_dataset",
+        "cal_set",
+        "calibration_data",
+        "calib_data",
+        "cal_data",
+    ],
+    "val": [
+        "validation",
+        "val",
+        "valid",
+        "validation_set",
+        "val_set",
+        "validation_dataset",
+        "val_dataset",
+        "validation_data",
+        "val_data",
+        "dev",
+        "dev_set",
+        "dev_dataset",
+        "dev_data",
+    ],
+    "test": [
+        "test",
+        "testing",
+        "test_set",
+        "testing_set",
+        "test_dataset",
+        "testing_dataset",
+        "test_data",
+        "testing_data",
+        "eval",
+        "eval_set",
+        "eval_dataset",
+        "eval_data",
+    ],
+}
+def resolve_dataset_split(
+    dataset: Dataset | IterableDataset | DatasetDict | IterableDatasetDict,
+    split: str | None = None,
+) -> Dataset | IterableDataset:
+    if split is not None and isinstance(dataset, DatasetDict | IterableDatasetDict):
+        if split in dataset:
+            return dataset[split]
+        raise ValueError(f"Requested split '{split}' not found in dataset: {dataset}.")
+    elif split is not None:
+        raise ValueError(
+            f"Requested split '{split}' but dataset has no splits: {dataset}."
+        )
+    if isinstance(dataset, Dataset | IterableDataset):
+        return dataset
+    for _, default_splits in DEFAULT_SPLITS.items():
+        for default_split in default_splits:
+            if default_split in dataset:
+                return dataset[default_split]
+    return dataset[list(dataset.keys())[0]]

guidellm/extras/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""
+Code that depends on optional dependencies.
+Each submodule should be deferred imported.
+"""

guidellm/extras/audio.py ADDED Viewed

@@ -0,0 +1,220 @@
+from __future__ import annotations
+import base64
+from pathlib import Path
+from typing import Any, Literal
+import httpx
+import numpy as np
+import torch
+try:
+    from torchcodec import AudioSamples
+    from torchcodec.decoders import AudioDecoder
+    from torchcodec.encoders import AudioEncoder
+except ImportError as e:
+    raise ImportError("Please install guidellm[audio] to use audio features") from e
+__all__ = [
+    "encode_audio",
+    "is_url",
+]
+def is_url(text: Any) -> bool:
+    return isinstance(text, str) and text.startswith(("http://", "https://"))
+def encode_audio(
+    audio: AudioDecoder
+    | bytes
+    | str
+    | Path
+    | np.ndarray
+    | torch.Tensor
+    | dict[str, Any],
+    b64encode: bool = False,
+    sample_rate: int | None = None,
+    file_name: str = "audio.wav",
+    encode_sample_rate: int = 16000,
+    max_duration: float | None = None,
+    mono: bool = True,
+    audio_format: str = "mp3",
+    bitrate: str = "64k",
+) -> dict[
+    Literal[
+        "type",
+        "audio",
+        "format",
+        "mimetype",
+        "audio_samples",
+        "audio_seconds",
+        "audio_bytes",
+        "file_name",
+    ],
+    str | int | float | bytes | None,
+]:
+    """Decode audio (if necessary) and re-encode to specified format."""
+    samples = _decode_audio(audio, sample_rate=sample_rate, max_duration=max_duration)
+    bitrate_val = (
+        int(bitrate.rstrip("k")) * 1000 if bitrate.endswith("k") else int(bitrate)
+    )
+    format_val = audio_format.lower()
+    encoded_audio = _encode_audio(
+        samples=samples,
+        resample_rate=encode_sample_rate,
+        bitrate=bitrate_val,
+        audio_format=format_val,
+        mono=mono,
+    )
+    return {
+        "type": "audio_base64" if b64encode else "audio_file",
+        "audio": (
+            base64.b64encode(encoded_audio).decode("utf-8")
+            if b64encode
+            else encoded_audio
+        ),
+        "file_name": get_file_name(audio)
+        if isinstance(audio, str | Path)
+        else file_name,
+        "format": audio_format,
+        "mimetype": f"audio/{format_val}",
+        "audio_samples": samples.sample_rate,
+        "audio_seconds": samples.duration_seconds,
+        "audio_bytes": len(encoded_audio),
+    }
+def _decode_audio(  # noqa: C901, PLR0912
+    audio: AudioDecoder
+    | bytes
+    | str
+    | Path
+    | np.ndarray
+    | torch.Tensor
+    | dict[str, Any],
+    sample_rate: int | None = None,
+    max_duration: float | None = None,
+) -> AudioSamples:
+    """Decode audio from various input types into AudioSamples."""
+    # If input is a dict, unwrap it into a function call
+    if isinstance(audio, dict):
+        sample_rate = audio.get("sample_rate", audio.get("sampling_rate", sample_rate))
+        if "data" not in audio and "url" not in audio:
+            raise ValueError(
+                f"Audio dict must contain either 'data' or 'url' keys, got {audio}"
+            )
+        audio_data = audio["data"] if "data" in audio else audio.get("url")
+        if audio_data is None:
+            raise ValueError(
+                f"Audio dict must contain either 'data' or 'url' keys, got {audio}"
+            )
+        return _decode_audio(
+            audio=audio_data,
+            sample_rate=sample_rate,
+            max_duration=max_duration,
+        )
+    # Convert numpy array to torch tensor and re-call
+    if isinstance(audio, np.ndarray):
+        return _decode_audio(
+            audio=torch.from_numpy(audio),
+            sample_rate=sample_rate,
+            max_duration=max_duration,
+        )
+    samples: AudioSamples
+    data: torch.Tensor | bytes
+    # HF datasets return AudioDecoder for audio column
+    if isinstance(audio, AudioDecoder):
+        samples = audio.get_samples_played_in_range(stop_seconds=max_duration)
+    elif isinstance(audio, torch.Tensor):
+        # If float stream assume decoded audio
+        if torch.is_floating_point(audio):
+            if sample_rate is None:
+                raise ValueError("Sample rate must be set for decoded audio")
+            full_duration = audio.shape[1] / sample_rate
+            # If max_duration is set, trim the audio to that duration
+            if max_duration is not None:
+                num_samples = int(max_duration * sample_rate)
+                duration = min(max_duration, full_duration)
+                data = audio[:, :num_samples]
+            else:
+                duration = full_duration
+                data = audio
+            samples = AudioSamples(
+                data=data,
+                pts_seconds=0.0,
+                duration_seconds=duration,
+                sample_rate=sample_rate,
+            )
+        # If bytes tensor assume encoded audio
+        elif audio.dtype == torch.uint8:
+            decoder = AudioDecoder(
+                source=audio,
+                sample_rate=sample_rate,
+            )
+            samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+        else:
+            raise ValueError(f"Unsupported audio type: {type(audio)}")
+    # If bytes, assume encoded audio
+    elif isinstance(audio, bytes):
+        decoder = AudioDecoder(
+            source=audio,
+            sample_rate=sample_rate,
+        )
+        samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+    # If str or Path, assume file path or URL to encoded audio
+    elif isinstance(audio, str | Path):
+        if isinstance(audio, str) and is_url(audio):
+            response = httpx.get(audio)
+            response.raise_for_status()
+            data = response.content
+        else:
+            if not Path(audio).exists():
+                raise ValueError(f"Audio file does not exist: {audio}")
+            data = Path(audio).read_bytes()
+        decoder = AudioDecoder(
+            source=data,
+        )
+        samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+    else:
+        raise ValueError(f"Unsupported audio type: {type(audio)}")
+    return samples
+def _encode_audio(
+    samples: AudioSamples,
+    resample_rate: int | None = None,
+    bitrate: int = 64000,
+    audio_format: str = "mp3",
+    mono: bool = True,
+) -> bytes:
+    encoder = AudioEncoder(
+        samples=samples.data,
+        sample_rate=samples.sample_rate,
+    )
+    audio_tensor = encoder.to_tensor(
+        format=audio_format,
+        bit_rate=bitrate if audio_format == "mp3" else None,
+        num_channels=1 if mono else None,
+        sample_rate=resample_rate,
+    )
+    return audio_tensor.numpy().tobytes()
+def get_file_name(path: Path | str) -> str:
+    """Get file name from path."""
+    return Path(path).name

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl