PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/backends/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""
+Backend infrastructure for GuideLLM language model interactions.
+Provides abstract base classes, concrete backend implementations, and response
+handlers for standardized communication with generative AI model providers.
+The backend system supports distributed execution across worker processes with
+pluggable response handlers for different API formats. Key components include
+the abstract Backend base class, OpenAI-compatible HTTP backend, and response
+handlers for processing streaming and non-streaming API responses.
+"""
+from __future__ import annotations
+from .backend import Backend, BackendType
+from .openai import OpenAIHTTPBackend
+from .response_handlers import (
+    AudioResponseHandler,
+    ChatCompletionsResponseHandler,
+    GenerationResponseHandler,
+    GenerationResponseHandlerFactory,
+    TextCompletionsResponseHandler,
+)
+__all__ = [
+    "AudioResponseHandler",
+    "Backend",
+    "BackendType",
+    "ChatCompletionsResponseHandler",
+    "GenerationResponseHandler",
+    "GenerationResponseHandlerFactory",
+    "OpenAIHTTPBackend",
+    "TextCompletionsResponseHandler",
+]

guidellm/backends/backend.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""
+Backend interface and registry for generative AI model interactions.
+Provides the abstract base class for implementing backends that communicate with
+generative AI models. Backends handle the lifecycle of generation requests and
+provide a standard interface for distributed execution across worker processes.
+"""
+from __future__ import annotations
+from abc import abstractmethod
+from typing import Literal
+from guidellm.scheduler import BackendInterface
+from guidellm.schemas import GenerationRequest, GenerationResponse
+from guidellm.utils import RegistryMixin
+__all__ = [
+    "Backend",
+    "BackendType",
+]
+BackendType = Literal["openai_http"]
+class Backend(
+    RegistryMixin["type[Backend]"],
+    BackendInterface[GenerationRequest, GenerationResponse],
+):
+    """
+    Base class for generative AI backends with registry and lifecycle management.
+    Provides a standard interface for backends that communicate with generative AI
+    models. Combines the registry pattern for automatic discovery with a defined
+    lifecycle for process-based distributed execution. Backend state must be
+    pickleable for distributed execution across process boundaries.
+    Backend lifecycle phases:
+    1. Creation and configuration
+    2. Process startup - Initialize resources in worker process
+    3. Validation - Verify backend readiness
+    4. Request resolution - Process generation requests
+    5. Process shutdown - Clean up resources
+    Example:
+    ::
+        @Backend.register("my_backend")
+        class MyBackend(Backend):
+            def __init__(self, api_key: str):
+                super().__init__("my_backend")
+                self.api_key = api_key
+            async def process_startup(self):
+                self.client = MyAPIClient(self.api_key)
+        backend = Backend.create("my_backend", api_key="secret")
+    """
+    @classmethod
+    def create(cls, type_: BackendType, **kwargs) -> Backend:
+        """
+        Create a backend instance based on the backend type.
+        :param type_: The type of backend to create
+        :param kwargs: Additional arguments for backend initialization
+        :return: An instance of a subclass of Backend
+        :raises ValueError: If the backend type is not registered
+        """
+        backend = cls.get_registered_object(type_)
+        if backend is None:
+            raise ValueError(
+                f"Backend type '{type_}' is not registered. "
+                f"Available types: {list(cls.registry.keys()) if cls.registry else []}"
+            )
+        return backend(**kwargs)
+    def __init__(self, type_: BackendType):
+        """
+        Initialize a backend instance.
+        :param type_: The backend type identifier
+        """
+        self.type_ = type_
+    @property
+    def processes_limit(self) -> int | None:
+        """
+        :return: Maximum number of worker processes supported, None if unlimited
+        """
+        return None
+    @property
+    def requests_limit(self) -> int | None:
+        """
+        :return: Maximum number of concurrent requests supported globally,
+            None if unlimited
+        """
+        return None
+    @abstractmethod
+    async def default_model(self) -> str:
+        """
+        :return: The default model name or identifier for generation requests,
+        """
+        ...

guidellm/backends/openai.py ADDED Viewed

@@ -0,0 +1,340 @@
+"""
+OpenAI HTTP backend implementation for GuideLLM.
+Provides HTTP-based backend for OpenAI-compatible servers including OpenAI API,
+vLLM servers, and other compatible inference engines. Supports text and chat
+completions with streaming, authentication, and multimodal capabilities.
+Handles request formatting, response parsing, error handling, and token usage
+tracking with flexible parameter customization.
+"""
+from __future__ import annotations
+import asyncio
+import time
+from collections.abc import AsyncIterator
+from typing import Any
+import httpx
+from guidellm.backends.backend import Backend
+from guidellm.backends.response_handlers import GenerationResponseHandlerFactory
+from guidellm.schemas import GenerationRequest, GenerationResponse, RequestInfo
+__all__ = ["OpenAIHTTPBackend"]
+@Backend.register("openai_http")
+class OpenAIHTTPBackend(Backend):
+    """
+    HTTP backend for OpenAI-compatible servers.
+    Supports OpenAI API, vLLM servers, and other compatible endpoints with
+    text/chat completions, streaming, authentication, and multimodal inputs.
+    Handles request formatting, response parsing, error handling, and token
+    usage tracking with flexible parameter customization.
+    Example:
+    ::
+        backend = OpenAIHTTPBackend(
+            target="http://localhost:8000",
+            model="gpt-3.5-turbo",
+            api_key="your-api-key"
+        )
+        await backend.process_startup()
+        async for response, request_info in backend.resolve(request, info):
+            process_response(response)
+        await backend.process_shutdown()
+    """
+    def __init__(
+        self,
+        target: str,
+        model: str = "",
+        api_routes: dict[str, str] | None = None,
+        response_handlers: dict[str, Any] | None = None,
+        timeout: float = 60.0,
+        http2: bool = True,
+        follow_redirects: bool = True,
+        verify: bool = False,
+        validate_backend: bool | str | dict[str, Any] = True,
+    ):
+        """
+        Initialize OpenAI HTTP backend with server configuration.
+        :param target: Base URL of the OpenAI-compatible server
+        :param model: Model identifier for generation requests
+        :param api_routes: Custom API endpoint routes mapping
+        :param response_handlers: Custom response handlers for different request types
+        :param timeout: Request timeout in seconds
+        :param http2: Enable HTTP/2 protocol support
+        :param follow_redirects: Follow HTTP redirects automatically
+        :param verify: Enable SSL certificate verification
+        :param validate_backend: Backend validation configuration
+        """
+        super().__init__(type_="openai_http")
+        # Request Values
+        self.target = target.rstrip("/").removesuffix("/v1")
+        self.model = model
+        # Store configuration
+        self.api_routes = api_routes or {
+            "health": "health",
+            "models": "v1/models",
+            "text_completions": "v1/completions",
+            "chat_completions": "v1/chat/completions",
+            "audio_transcriptions": "v1/audio/transcriptions",
+            "audio_translations": "v1/audio/translations",
+        }
+        self.response_handlers = response_handlers
+        self.timeout = timeout
+        self.http2 = http2
+        self.follow_redirects = follow_redirects
+        self.verify = verify
+        self.validate_backend: dict[str, Any] | None = self._resolve_validate_kwargs(
+            validate_backend
+        )
+        # Runtime state
+        self._in_process = False
+        self._async_client: httpx.AsyncClient | None = None
+    @property
+    def info(self) -> dict[str, Any]:
+        """
+        Get backend configuration details.
+        :return: Dictionary containing backend configuration details
+        """
+        return {
+            "target": self.target,
+            "model": self.model,
+            "timeout": self.timeout,
+            "http2": self.http2,
+            "follow_redirects": self.follow_redirects,
+            "verify": self.verify,
+            "openai_paths": self.api_routes,
+            "validate_backend": self.validate_backend,
+        }
+    async def process_startup(self):
+        """
+        Initialize HTTP client and backend resources.
+        :raises RuntimeError: If backend is already initialized
+        :raises httpx.RequestError: If HTTP client cannot be created
+        """
+        if self._in_process:
+            raise RuntimeError("Backend already started up for process.")
+        self._async_client = httpx.AsyncClient(
+            http2=self.http2,
+            timeout=self.timeout,
+            follow_redirects=self.follow_redirects,
+            verify=self.verify,
+            # Allow unlimited connections
+            limits=httpx.Limits(
+                max_connections=None,
+                max_keepalive_connections=None,
+                keepalive_expiry=5.0,  # default
+            ),
+        )
+        self._in_process = True
+    async def process_shutdown(self):
+        """
+        Clean up HTTP client and backend resources.
+        :raises RuntimeError: If backend was not properly initialized
+        :raises httpx.RequestError: If HTTP client cannot be closed
+        """
+        if not self._in_process:
+            raise RuntimeError("Backend not started up for process.")
+        await self._async_client.aclose()  # type: ignore [union-attr]
+        self._async_client = None
+        self._in_process = False
+    async def validate(self):
+        """
+        Validate backend connectivity and configuration.
+        :raises RuntimeError: If backend cannot connect or validate configuration
+        """
+        if self._async_client is None:
+            raise RuntimeError("Backend not started up for process.")
+        if not self.validate_backend:
+            return
+        try:
+            response = await self._async_client.request(**self.validate_backend)
+            response.raise_for_status()
+        except Exception as exc:
+            raise RuntimeError(
+                "Backend validation request failed. Could not connect to the server "
+                "or validate the backend configuration."
+            ) from exc
+    async def available_models(self) -> list[str]:
+        """
+        Get available models from the target server.
+        :return: List of model identifiers
+        :raises httpx.HTTPError: If models endpoint returns an error
+        :raises RuntimeError: If backend is not initialized
+        """
+        if self._async_client is None:
+            raise RuntimeError("Backend not started up for process.")
+        target = f"{self.target}/{self.api_routes['models']}"
+        response = await self._async_client.get(target)
+        response.raise_for_status()
+        return [item["id"] for item in response.json()["data"]]
+    async def default_model(self) -> str:
+        """
+        Get the default model for this backend.
+        :return: Model name or None if no model is available
+        """
+        if self.model or not self._in_process:
+            return self.model
+        models = await self.available_models()
+        return models[0] if models else ""
+    async def resolve(  # type: ignore[override]
+        self,
+        request: GenerationRequest,
+        request_info: RequestInfo,
+        history: list[tuple[GenerationRequest, GenerationResponse]] | None = None,
+    ) -> AsyncIterator[tuple[GenerationResponse, RequestInfo]]:
+        """
+        Process generation request and yield progressive responses.
+        Handles request formatting, timing tracking, API communication, and
+        response parsing with streaming support.
+        :param request: Generation request with content and parameters
+        :param request_info: Request tracking info updated with timing metadata
+        :param history: Conversation history (currently not supported)
+        :raises NotImplementedError: If history is provided
+        :raises RuntimeError: If backend is not initialized
+        :raises ValueError: If request type is unsupported
+        :yields: Tuples of (response, updated_request_info) as generation progresses
+        """
+        if self._async_client is None:
+            raise RuntimeError("Backend not started up for process.")
+        if history is not None:
+            raise NotImplementedError("Multi-turn requests not yet supported")
+        if (request_path := self.api_routes.get(request.request_type)) is None:
+            raise ValueError(f"Unsupported request type '{request.request_type}'")
+        request_url = f"{self.target}/{request_path}"
+        request_files = (
+            {
+                key: tuple(value) if isinstance(value, list) else value
+                for key, value in request.arguments.files.items()
+            }
+            if request.arguments.files
+            else None
+        )
+        request_json = request.arguments.body if not request_files else None
+        request_data = request.arguments.body if request_files else None
+        response_handler = GenerationResponseHandlerFactory.create(
+            request.request_type, handler_overrides=self.response_handlers
+        )
+        if not request.arguments.stream:
+            request_info.timings.request_start = time.time()
+            response = await self._async_client.request(
+                request.arguments.method or "POST",
+                request_url,
+                params=request.arguments.params,
+                headers=request.arguments.headers,
+                json=request_json,
+                data=request_data,
+                files=request_files,
+            )
+            request_info.timings.request_end = time.time()
+            response.raise_for_status()
+            data = response.json()
+            yield response_handler.compile_non_streaming(request, data), request_info
+            return
+        try:
+            request_info.timings.request_start = time.time()
+            async with self._async_client.stream(
+                request.arguments.method or "POST",
+                request_url,
+                params=request.arguments.params,
+                headers=request.arguments.headers,
+                json=request_json,
+                data=request_data,
+                files=request_files,
+            ) as stream:
+                stream.raise_for_status()
+                end_reached = False
+                async for chunk in stream.aiter_lines():
+                    iter_time = time.time()
+                    if request_info.timings.first_request_iteration is None:
+                        request_info.timings.first_request_iteration = iter_time
+                    request_info.timings.last_request_iteration = iter_time
+                    request_info.timings.request_iterations += 1
+                    iterations = response_handler.add_streaming_line(chunk)
+                    if iterations is None or iterations <= 0 or end_reached:
+                        end_reached = end_reached or iterations is None
+                        continue
+                    if request_info.timings.first_token_iteration is None:
+                        request_info.timings.first_token_iteration = iter_time
+                        request_info.timings.token_iterations = 0
+                    request_info.timings.last_token_iteration = iter_time
+                    request_info.timings.token_iterations += iterations
+            request_info.timings.request_end = time.time()
+            yield response_handler.compile_streaming(request), request_info
+        except asyncio.CancelledError as err:
+            # Yield current result to store iterative results before propagating
+            yield response_handler.compile_streaming(request), request_info
+            raise err
+    def _resolve_validate_kwargs(
+        self, validate_backend: bool | str | dict[str, Any]
+    ) -> dict[str, Any] | None:
+        if not (validate_kwargs := validate_backend):
+            return None
+        if validate_kwargs is True:
+            validate_kwargs = "health"
+        if isinstance(validate_kwargs, str) and validate_kwargs in self.api_routes:
+            validate_kwargs = f"{self.target}/{self.api_routes[validate_kwargs]}"
+        if isinstance(validate_kwargs, str):
+            validate_kwargs = {
+                "method": "GET",
+                "url": validate_kwargs,
+            }
+        if not isinstance(validate_kwargs, dict) or "url" not in validate_kwargs:
+            raise ValueError(
+                "validate_backend must be a boolean, string, or dictionary and contain "
+                f"a target URL. Got: {validate_kwargs}"
+            )
+        if "method" not in validate_kwargs:
+            validate_kwargs["method"] = "GET"
+        return validate_kwargs

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl