PyPI - guidellm - Versions diffs - 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl - Mend

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +452 -252
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +110 -0
guidellm/backends/openai.py +355 -0
guidellm/backends/response_handlers.py +455 -0
guidellm/benchmark/__init__.py +53 -39
guidellm/benchmark/benchmarker.py +150 -317
guidellm/benchmark/entrypoints.py +467 -128
guidellm/benchmark/output.py +519 -771
guidellm/benchmark/profile.py +580 -280
guidellm/benchmark/progress.py +568 -549
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas.py +2086 -0
guidellm/data/__init__.py +28 -4
guidellm/data/collators.py +16 -0
guidellm/data/deserializers/__init__.py +53 -0
guidellm/data/deserializers/deserializer.py +144 -0
guidellm/data/deserializers/file.py +222 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +348 -0
guidellm/data/loaders.py +149 -0
guidellm/data/preprocessors/__init__.py +25 -0
guidellm/data/preprocessors/formatters.py +404 -0
guidellm/data/preprocessors/mappers.py +198 -0
guidellm/data/preprocessors/preprocessor.py +31 -0
guidellm/data/processor.py +31 -0
guidellm/data/schemas.py +13 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +215 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +168 -0
guidellm/mock_server/utils.py +302 -0
guidellm/preprocess/dataset.py +23 -26
guidellm/presentation/builder.py +2 -2
guidellm/presentation/data_models.py +25 -21
guidellm/presentation/injector.py +2 -3
guidellm/scheduler/__init__.py +65 -26
guidellm/scheduler/constraints.py +1035 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +140 -368
guidellm/scheduler/schemas.py +272 -0
guidellm/scheduler/strategies.py +519 -0
guidellm/scheduler/worker.py +391 -420
guidellm/scheduler/worker_group.py +707 -0
guidellm/schemas/__init__.py +31 -0
guidellm/schemas/info.py +159 -0
guidellm/schemas/request.py +226 -0
guidellm/schemas/response.py +119 -0
guidellm/schemas/stats.py +228 -0
guidellm/{config.py → settings.py} +32 -21
guidellm/utils/__init__.py +95 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +71 -2
guidellm/utils/console.py +183 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +134 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/pydantic_utils.py +411 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/{objects → utils}/statistics.py +341 -247
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +1 -1
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
guidellm-0.4.0a169.dist-info/RECORD +95 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -705
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.4.0a21.dist-info/RECORD +0 -62
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0

guidellm/data/utils/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .dataset import DEFAULT_SPLITS, resolve_dataset_split
+__all__ = [
+    "DEFAULT_SPLITS",
+    "resolve_dataset_split",
+]

guidellm/data/utils/dataset.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+from typing import Literal
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+__all__ = ["DEFAULT_SPLITS", "resolve_dataset_split"]
+DEFAULT_SPLITS: dict[Literal["train", "calib", "val", "test"], list[str]] = {
+    "train": [
+        "train",
+        "training",
+        "train_set",
+        "training_set",
+        "train_dataset",
+        "training_dataset",
+        "train_data",
+        "training_data",
+        "pretrain",
+        "pretrain_set",
+        "pretrain_dataset",
+        "pretrain_data",
+        "pretraining",
+    ],
+    "calib": [
+        "calibration",
+        "calib",
+        "cal",
+        "calibration_set",
+        "calib_set",
+        "cal_set",
+        "calibration_dataset",
+        "calib_dataset",
+        "cal_set",
+        "calibration_data",
+        "calib_data",
+        "cal_data",
+    ],
+    "val": [
+        "validation",
+        "val",
+        "valid",
+        "validation_set",
+        "val_set",
+        "validation_dataset",
+        "val_dataset",
+        "validation_data",
+        "val_data",
+        "dev",
+        "dev_set",
+        "dev_dataset",
+        "dev_data",
+    ],
+    "test": [
+        "test",
+        "testing",
+        "test_set",
+        "testing_set",
+        "test_dataset",
+        "testing_dataset",
+        "test_data",
+        "testing_data",
+        "eval",
+        "eval_set",
+        "eval_dataset",
+        "eval_data",
+    ],
+}
+def resolve_dataset_split(
+    dataset: Dataset | IterableDataset | DatasetDict | IterableDatasetDict,
+    split: str | None = None,
+) -> Dataset | IterableDataset:
+    if split is not None and isinstance(dataset, DatasetDict | IterableDatasetDict):
+        if split in dataset:
+            return dataset[split]
+        raise ValueError(f"Requested split '{split}' not found in dataset: {dataset}.")
+    elif split is not None:
+        raise ValueError(
+            f"Requested split '{split}' but dataset has no splits: {dataset}."
+        )
+    if isinstance(dataset, Dataset | IterableDataset):
+        return dataset
+    for _, default_splits in DEFAULT_SPLITS.items():
+        for default_split in default_splits:
+            if default_split in dataset:
+                return dataset[default_split]
+    return dataset[list(dataset.keys())[0]]

guidellm/extras/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""
+Code that depends on optional dependencies.
+Each submodule should be deferred imported.
+"""

guidellm/extras/audio.py ADDED Viewed

@@ -0,0 +1,215 @@
+from __future__ import annotations
+import base64
+from pathlib import Path
+from typing import Any, Literal
+import httpx
+import numpy as np
+import torch
+try:
+    from torchcodec import AudioSamples
+    from torchcodec.decoders import AudioDecoder
+    from torchcodec.encoders import AudioEncoder
+except ImportError as e:
+    raise ImportError("Please install guidellm[audio] to use audio features") from e
+__all__ = [
+    "encode_audio",
+    "is_url",
+]
+def is_url(text: Any) -> bool:
+    return isinstance(text, str) and text.startswith(("http://", "https://"))
+def encode_audio(
+    audio: AudioDecoder
+    | bytes
+    | str
+    | Path
+    | np.ndarray
+    | torch.Tensor
+    | dict[str, Any],
+    b64encode: bool = False,
+    sample_rate: int | None = None,
+    file_name: str = "audio.wav",
+    encode_sample_rate: int = 16000,
+    max_duration: float | None = None,
+    mono: bool = True,
+    audio_format: str = "mp3",
+    bitrate: str = "64k",
+) -> dict[
+    Literal[
+        "type",
+        "audio",
+        "format",
+        "mimetype",
+        "audio_samples",
+        "audio_seconds",
+        "audio_bytes",
+        "file_name",
+    ],
+    str | int | float | bytes | None,
+]:
+    """Decode audio (if necessary) and re-encode to specified format."""
+    samples = _decode_audio(audio, sample_rate=sample_rate, max_duration=max_duration)
+    bitrate_val = (
+        int(bitrate.rstrip("k")) * 1000 if bitrate.endswith("k") else int(bitrate)
+    )
+    format_val = audio_format.lower()
+    encoded_audio = _encode_audio(
+        samples=samples,
+        resample_rate=encode_sample_rate,
+        bitrate=bitrate_val,
+        audio_format=format_val,
+        mono=mono,
+    )
+    return {
+        "type": "audio_base64" if b64encode else "audio_file",
+        "audio": (
+            base64.b64encode(encoded_audio).decode("utf-8")
+            if b64encode
+            else encoded_audio
+        ),
+        "file_name": get_file_name(audio)
+        if isinstance(audio, str | Path)
+        else file_name,
+        "format": audio_format,
+        "mimetype": f"audio/{format_val}",
+        "audio_samples": samples.sample_rate,
+        "audio_seconds": samples.duration_seconds,
+        "audio_bytes": len(encoded_audio),
+    }
+def _decode_audio(  # noqa: C901, PLR0912
+    audio: AudioDecoder
+    | bytes
+    | str
+    | Path
+    | np.ndarray
+    | torch.Tensor
+    | dict[str, Any],
+    sample_rate: int | None = None,
+    max_duration: float | None = None,
+) -> AudioSamples:
+    """Decode audio from various input types into AudioSamples."""
+    # If input is a dict, unwrap it into a function call
+    if isinstance(audio, dict):
+        sample_rate = audio.get("sample_rate", audio.get("sampling_rate", sample_rate))
+        if "data" not in audio and "url" not in audio:
+            raise ValueError(
+                f"Audio dict must contain either 'data' or 'url' keys, got {audio}"
+            )
+        return _decode_audio(
+            audio=audio.get("data") or audio.get("url"),
+            sample_rate=sample_rate,
+            max_duration=max_duration,
+        )
+    # Convert numpy array to torch tensor and re-call
+    if isinstance(audio, np.ndarray):
+        return _decode_audio(
+            audio=torch.from_numpy(audio),
+            sample_rate=sample_rate,
+            max_duration=max_duration,
+        )
+    samples: AudioSamples
+    data: torch.Tensor | bytes
+    # HF datasets return AudioDecoder for audio column
+    if isinstance(audio, AudioDecoder):
+        samples = audio.get_samples_played_in_range(stop_seconds=max_duration)
+    elif isinstance(audio, torch.Tensor):
+        # If float stream assume decoded audio
+        if torch.is_floating_point(audio):
+            if sample_rate is None:
+                raise ValueError("Sample rate must be set for decoded audio")
+            full_duration = audio.shape[1] / sample_rate
+            # If max_duration is set, trim the audio to that duration
+            if max_duration is not None:
+                num_samples = int(max_duration * sample_rate)
+                duration = min(max_duration, full_duration)
+                data = audio[:, :num_samples]
+            else:
+                duration = full_duration
+                data = audio
+            samples = AudioSamples(
+                data=data,
+                pts_seconds=0.0,
+                duration_seconds=duration,
+                sample_rate=sample_rate,
+            )
+        # If bytes tensor assume encoded audio
+        elif audio.dtype == torch.uint8:
+            decoder = AudioDecoder(
+                source=audio,
+                sample_rate=sample_rate,
+            )
+            samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+        else:
+            raise ValueError(f"Unsupported audio type: {type(audio)}")
+    # If bytes, assume encoded audio
+    elif isinstance(audio, bytes):
+        decoder = AudioDecoder(
+            source=audio,
+            sample_rate=sample_rate,
+        )
+        samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+    # If str or Path, assume file path or URL to encoded audio
+    elif isinstance(audio, str | Path):
+        if isinstance(audio, str) and is_url(audio):
+            response = httpx.get(audio)
+            response.raise_for_status()
+            data = response.content
+        else:
+            if not Path(audio).exists():
+                raise ValueError(f"Audio file does not exist: {audio}")
+            data = Path(audio).read_bytes()
+        decoder = AudioDecoder(
+            source=data,
+        )
+        samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
+    else:
+        raise ValueError(f"Unsupported audio type: {type(audio)}")
+    return samples
+def _encode_audio(
+    samples: AudioSamples,
+    resample_rate: int | None = None,
+    bitrate: int = 64000,
+    audio_format: str = "mp3",
+    mono: bool = True,
+) -> bytes:
+    encoder = AudioEncoder(
+        samples=samples.data,
+        sample_rate=samples.sample_rate,
+    )
+    audio_tensor = encoder.to_tensor(
+        format=audio_format,
+        bit_rate=bitrate if audio_format == "mp3" else None,
+        num_channels=1 if mono else None,
+        sample_rate=resample_rate,
+    )
+    return audio_tensor.numpy().tobytes()
+def get_file_name(path: Path | str) -> str:
+    """Get file name from path."""
+    return Path(path).name

guidellm/extras/vision.py ADDED Viewed

@@ -0,0 +1,242 @@
+from __future__ import annotations
+import base64
+import io
+from pathlib import Path
+from typing import Any, Literal
+import httpx
+import numpy as np
+try:
+    from PIL import Image as PILImage
+except ImportError as e:
+    raise ImportError(
+        "Please install guidellm[vision] to use image/video features"
+    ) from e
+__all__ = [
+    "encode_image",
+    "encode_video",
+    "get_file_format",
+    "is_url",
+    "resize_image",
+]
+def is_url(text: Any) -> bool:
+    return isinstance(text, str) and text.startswith(("http://", "https://"))
+def encode_image(
+    image: bytes | str | Path | np.ndarray | PILImage.Image,
+    width: int | None = None,
+    height: int | None = None,
+    max_size: int | None = None,
+    max_width: int | None = None,
+    max_height: int | None = None,
+    encode_type: Literal["base64", "url"] | None = "base64",
+) -> dict[Literal["type", "image", "image_pixels", "image_bytes"], str | int | None]:
+    """
+    Input image types:
+    - bytes: raw image bytes, decoded with Pillow
+    - str: file path on disk, url, or already base64 encoded image string
+    - pathlib.Path: file path on disk
+    - np.ndarray: image array, decoded with Pillow
+    - PIL.Image.Image: Pillow image
+    - datasets.Image: HuggingFace datasets Image object
+    max_size: maximum size of the longest edge of the image
+    max_width: maximum width of the image
+    max_height: maximum height of the image
+    encode_type: None to return the supported format
+        (url for url, base64 string for others)
+        "base64" to return base64 encoded string (or download URL and encode)
+        "url" to return url (only if input is url, otherwise fails)
+    Returns a str of either:
+    - image url
+    - "data:image/{type};base64, {data}" string
+    """
+    if isinstance(image, str) and is_url(image):
+        if encode_type == "base64":
+            response = httpx.get(image)
+            response.raise_for_status()
+            return encode_image(
+                image=response.content,
+                max_size=max_size,
+                max_width=max_width,
+                max_height=max_height,
+                encode_type="base64",
+            )
+        if any([width, height, max_size, max_width, max_height]):
+            raise ValueError(f"Cannot resize image {image} when encode_type is 'url'")
+        return {
+            "type": "image_url",
+            "image": image,
+            "image_pixels": None,
+            "image_bytes": None,
+        }
+    decoded_image: PILImage.Image
+    if isinstance(image, bytes):
+        decoded_image = PILImage.open(io.BytesIO(image))
+    elif isinstance(image, str) and image.startswith("data:image/"):
+        _, encoded = image.split(",", 1)
+        image_data = base64.b64decode(encoded)
+        decoded_image = PILImage.open(io.BytesIO(image_data))
+    elif isinstance(image, str | Path):
+        decoded_image = PILImage.open(image)
+    elif isinstance(image, np.ndarray):
+        decoded_image = PILImage.fromarray(image)
+    elif isinstance(image, PILImage.Image):
+        decoded_image = image
+    else:
+        raise ValueError(f"Unsupported image type: {type(image)} for {image}")
+    output_image = resize_image(
+        decoded_image,
+        width=width,
+        height=height,
+        max_width=max_width,
+        max_height=max_height,
+        max_size=max_size,
+    )
+    if output_image.mode != "RGB":
+        output_image = output_image.convert("RGB")
+    buffer = io.BytesIO()
+    output_image.save(buffer, format="JPEG")
+    image_bytes = buffer.getvalue()
+    image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+    return {
+        "type": "image_base64",
+        "image": f"data:image/jpeg;base64,{image_base64}",
+        "image_pixels": output_image.width * output_image.height,
+        "image_bytes": len(image_bytes),
+    }
+def resize_image(
+    image: PILImage.Image,
+    width: int | None = None,
+    height: int | None = None,
+    max_width: int | None = None,
+    max_height: int | None = None,
+    max_size: int | None = None,
+) -> PILImage.Image:
+    if not isinstance(image, PILImage.Image):
+        raise ValueError(f"Unsupported image type: {type(image)}")
+    if width is not None and height is not None:
+        return image.resize((width, height), PILImage.Resampling.BILINEAR)
+    orig_w, orig_h = image.size
+    aspect = orig_w / orig_h
+    if width is not None:
+        target_w = width
+        target_h = round(width / aspect)
+    elif height is not None:
+        target_h = height
+        target_w = round(height * aspect)
+    else:
+        target_w, target_h = orig_w, orig_h
+    # Normalize max_size → max_width/max_height
+    if max_size is not None:
+        max_width = max_width or max_size
+        max_height = max_height or max_size
+    # Apply max constraints (preserve aspect ratio)
+    if max_width or max_height:
+        scale_w = max_width / target_w if max_width else 1.0
+        scale_h = max_height / target_h if max_height else 1.0
+        scale = min(scale_w, scale_h, 1.0)  # never upscale
+        target_w = round(target_w * scale)
+        target_h = round(target_h * scale)
+    if (target_w, target_h) != (orig_w, orig_h):
+        image = image.resize((target_w, target_h), PILImage.Resampling.BILINEAR)
+    return image
+def encode_video(
+    video: bytes | str | Path,
+    encode_type: Literal["base64", "url"] | None = "base64",
+) -> dict[
+    Literal["type", "video", "video_frames", "video_seconds", "video_bytes"],
+    str | int | float | None,
+]:
+    """
+    Input video types:
+    - bytes: raw video bytes
+    - str: file path on disk, url, or already base64 encoded video string
+    - pathlib.Path: file path on disk
+    - datasets.Video: HuggingFace datasets Video object
+    encode_type: None to return the supported format
+        (url for url, base64 string for others)
+        "base64" to return base64 encoded string (or download URL and encode)
+        "url" to return url (only if input is url, otherwise fails)
+    Returns a str of either:
+    - video url
+    - "data:video/{type};base64, {data}" string
+    """
+    if isinstance(video, str) and is_url(video):
+        if encode_type == "base64":
+            response = httpx.get(video)
+            response.raise_for_status()
+            return encode_video(video=response.content, encode_type="base64")
+        return {
+            "type": "video_url",
+            "video": video,
+            "video_frames": None,
+            "video_seconds": None,
+            "video_bytes": None,
+        }
+    if isinstance(video, str) and video.startswith("data:video/"):
+        data_str = video.split(",", 1)[1]
+        return {
+            "type": "video_base64",
+            "video": video,
+            "video_frames": None,
+            "video_seconds": None,
+            "video_bytes": len(data_str) * 3 // 4,  # base64 to bytes
+        }
+    if isinstance(video, str | Path):
+        path = Path(video)
+        video_bytes = path.read_bytes()
+        video_format = get_file_format(path)
+    elif isinstance(video, bytes):
+        video_bytes = video
+        video_format = "unknown"
+    else:
+        raise ValueError(f"Unsupported video type: {type(video)} for {video}")
+    video_base64 = base64.b64encode(video_bytes).decode("utf-8")
+    return {
+        "type": "video_base64",
+        "video": f"data:video/{video_format};base64,{video_base64}",
+        "video_frames": None,
+        "video_seconds": None,
+        "video_bytes": len(video_bytes),
+    }
+def get_file_format(path: Path | str) -> str:
+    """Get file format from path extension."""
+    suffix = Path(path).suffix.lower()
+    return suffix[1:] if suffix.startswith(".") else "unknown"

guidellm/logger.py CHANGED Viewed

@@ -41,7 +41,7 @@ import sys
 from loguru import logger
-from guidellm.config import LoggingSettings, settings
+from guidellm.settings import LoggingSettings, settings
 __all__ = ["configure_logger", "logger"]
@@ -72,7 +72,7 @@ def configure_logger(config: LoggingSettings = settings.logging):
         sys.stdout,
         level=config.console_log_level.upper(),
         format="<green>{time:YY-MM-DD HH:mm:ss}</green>|<level>{level: <8}</level> \
-        |<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
+        |<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
     )
     if config.log_file or config.log_file_level:

guidellm/mock_server/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+GuideLLM Mock Server for OpenAI and vLLM API compatibility.
+"""
+from .config import MockServerConfig
+from .server import MockServer
+__all__ = ["MockServer", "MockServerConfig"]

guidellm/mock_server/config.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""
+Configuration settings for the mock server component.
+Provides centralized configuration management for mock server behavior including
+network binding, model identification, response timing characteristics, and token
+generation parameters. Supports environment variable configuration for deployment
+flexibility with automatic validation through Pydantic settings.
+"""
+from __future__ import annotations
+from pydantic import Field
+from pydantic_settings import BaseSettings
+__all__ = ["MockServerConfig"]
+class MockServerConfig(BaseSettings):
+    """
+    Configuration settings for mock server behavior and deployment.
+    Centralizes all configurable parameters for mock server operation including
+    network settings, model identification, response timing characteristics, and
+    token generation behavior. Environment variables with GUIDELLM_MOCK_SERVER_
+    prefix override default values for deployment flexibility.
+    Example:
+    ::
+        config = MockServerConfig(host="0.0.0.0", port=8080, model="custom-model")
+        # Use with environment variables:
+        # GUIDELLM_MOCK_SERVER_HOST=127.0.0.1 GUIDELLM_MOCK_SERVER_PORT=9000
+    """
+    host: str = Field(
+        default="127.0.0.1", description="Host address to bind the server to"
+    )
+    port: int = Field(default=8000, description="Port number to bind the server to")
+    workers: int = Field(default=1, description="Number of worker processes to spawn")
+    model: str = Field(
+        default="llama-3.1-8b-instruct",
+        description="Model name to present in API responses",
+    )
+    processor: str | None = Field(
+        default=None,
+        description=(
+            "Processor type to use for token stats, tokenize, and detokenize. "
+            "If None, a mock one is created."
+        ),
+    )
+    request_latency: float = Field(
+        default=3.0,
+        description="Base request latency in seconds for non-streaming responses",
+    )
+    request_latency_std: float = Field(
+        default=0.0,
+        description="Standard deviation for request latency variation",
+    )
+    ttft_ms: float = Field(
+        default=150.0,
+        description="Time to first token in milliseconds for streaming responses",
+    )
+    ttft_ms_std: float = Field(
+        default=0.0,
+        description="Standard deviation for time to first token variation",
+    )
+    itl_ms: float = Field(
+        default=10.0,
+        description="Inter-token latency in milliseconds for streaming responses",
+    )
+    itl_ms_std: float = Field(
+        default=0.0,
+        description="Standard deviation for inter-token latency variation",
+    )
+    output_tokens: int = Field(
+        default=128, description="Number of output tokens to generate in responses"
+    )
+    output_tokens_std: float = Field(
+        default=0.0,
+        description="Standard deviation for output token count variation",
+    )
+    class Config:
+        env_prefix = "GUIDELLM_MOCK_SERVER_"
+        case_sensitive = False

guidellm/mock_server/handlers/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+HTTP request handlers for the GuideLLM mock server.
+This module exposes request handlers that implement OpenAI-compatible API endpoints
+for the mock server. The handlers provide realistic LLM simulation capabilities
+including chat completions, legacy completions, and tokenization services with
+configurable timing characteristics, token counting, and proper error handling to
+support comprehensive benchmarking and testing scenarios.
+"""
+from __future__ import annotations
+from .chat_completions import ChatCompletionsHandler
+from .completions import CompletionsHandler
+from .tokenizer import TokenizerHandler
+__all__ = ["ChatCompletionsHandler", "CompletionsHandler", "TokenizerHandler"]

guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

Potentially problematic release.

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl