PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

guidellm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (35) hide show

guidellm/__init__.py +19 -0
guidellm/backend/__init__.py +10 -0
guidellm/backend/base.py +320 -0
guidellm/backend/openai.py +168 -0
guidellm/config.py +234 -0
guidellm/core/__init__.py +24 -0
guidellm/core/distribution.py +190 -0
guidellm/core/report.py +321 -0
guidellm/core/request.py +44 -0
guidellm/core/result.py +545 -0
guidellm/core/serializable.py +169 -0
guidellm/executor/__init__.py +10 -0
guidellm/executor/base.py +213 -0
guidellm/executor/profile_generator.py +343 -0
guidellm/logger.py +83 -0
guidellm/main.py +336 -0
guidellm/request/__init__.py +13 -0
guidellm/request/base.py +194 -0
guidellm/request/emulated.py +391 -0
guidellm/request/file.py +76 -0
guidellm/request/transformers.py +100 -0
guidellm/scheduler/__init__.py +4 -0
guidellm/scheduler/base.py +374 -0
guidellm/scheduler/load_generator.py +196 -0
guidellm/utils/__init__.py +40 -0
guidellm/utils/injector.py +70 -0
guidellm/utils/progress.py +196 -0
guidellm/utils/text.py +455 -0
guidellm/utils/transformers.py +151 -0
guidellm-0.1.0.dist-info/LICENSE +201 -0
guidellm-0.1.0.dist-info/METADATA +434 -0
guidellm-0.1.0.dist-info/RECORD +35 -0
guidellm-0.1.0.dist-info/WHEEL +5 -0
guidellm-0.1.0.dist-info/entry_points.txt +3 -0
guidellm-0.1.0.dist-info/top_level.txt +1 -0

guidellm/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+Guidellm is a package that provides an easy and intuitive interface for
+evaluating and benchmarking large language models (LLMs).
+"""
+# flake8: noqa
+import os
+import transformers  # type: ignore
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Silence warnings for tokenizers
+transformers.logging.set_verbosity_error()  # Silence warnings for transformers
+from .config import settings
+from .logger import configure_logger, logger
+from .main import generate_benchmark_report
+__all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]

guidellm/backend/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse
+from .openai import OpenAIBackend
+__all__ = [
+    "Backend",
+    "BackendEngine",
+    "BackendEnginePublic",
+    "GenerativeResponse",
+    "OpenAIBackend",
+]

guidellm/backend/base.py ADDED Viewed

@@ -0,0 +1,320 @@
+import asyncio
+import functools
+from abc import ABC, abstractmethod
+from typing import AsyncGenerator, Dict, List, Literal, Optional, Type, Union
+from loguru import logger
+from pydantic import BaseModel
+from transformers import (  # type: ignore  # noqa: PGH003
+    AutoTokenizer,
+    PreTrainedTokenizer,
+)
+from guidellm.core import TextGenerationRequest, TextGenerationResult
+__all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
+BackendEnginePublic = Literal["openai_server"]
+BackendEngine = Union[BackendEnginePublic, Literal["test"]]
+class GenerativeResponse(BaseModel):
+    """
+    A model representing a response from a generative AI backend.
+    :param type_: The type of response, either 'token_iter' for intermediate
+        token output or 'final' for the final result.
+    :type type_: Literal["token_iter", "final"]
+    :param add_token: The token to add to the output
+        (only applicable if type_ is 'token_iter').
+    :type add_token: Optional[str]
+    :param prompt: The original prompt sent to the backend.
+    :type prompt: Optional[str]
+    :param output: The final generated output (only applicable if type_ is 'final').
+    :type output: Optional[str]
+    :param prompt_token_count: The number of tokens in the prompt.
+    :type prompt_token_count: Optional[int]
+    :param output_token_count: The number of tokens in the output.
+    :type output_token_count: Optional[int]
+    """
+    type_: Literal["token_iter", "final"]
+    add_token: Optional[str] = None
+    prompt: Optional[str] = None
+    output: Optional[str] = None
+    prompt_token_count: Optional[int] = None
+    output_token_count: Optional[int] = None
+class Backend(ABC):
+    """
+    Abstract base class for generative AI backends.
+    This class provides a common interface for creating and interacting with different
+    generative AI backends. Subclasses should implement the abstract methods to
+    define specific backend behavior.
+    :cvar _registry: A dictionary that maps BackendEngine types to backend classes.
+    :type _registry: Dict[BackendEngine, Type[Backend]]
+    :param type_: The type of the backend.
+    :type type_: BackendEngine
+    :param target: The target URL for the backend.
+    :type target: str
+    :param model: The model used by the backend.
+    :type model: str
+    """
+    _registry: Dict[BackendEngine, "Type[Backend]"] = {}
+    @classmethod
+    def register(cls, backend_type: BackendEngine):
+        """
+        A decorator to register a backend class in the backend registry.
+        :param backend_type: The type of backend to register.
+        :type backend_type: BackendEngine
+        :return: The decorated backend class.
+        :rtype: Type[Backend]
+        """
+        def inner_wrapper(wrapped_class: Type["Backend"]):
+            cls._registry[backend_type] = wrapped_class
+            logger.info("Registered backend type: {}", backend_type)
+            return wrapped_class
+        return inner_wrapper
+    @classmethod
+    def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
+        """
+        Factory method to create a backend instance based on the backend type.
+        :param backend_type: The type of backend to create.
+        :type backend_type: BackendEngine
+        :param kwargs: Additional arguments for backend initialization.
+        :return: An instance of a subclass of Backend.
+        :rtype: Backend
+        :raises ValueError: If the backend type is not registered.
+        """
+        logger.info("Creating backend of type {}", backend_type)
+        if backend_type not in cls._registry:
+            err = ValueError(f"Unsupported backend type: {backend_type}")
+            logger.error("{}", err)
+            raise err
+        return Backend._registry[backend_type](**kwargs)
+    def __init__(self, type_: BackendEngine, target: str, model: str):
+        """
+        Base constructor for the Backend class.
+        Calls into test_connection to ensure the backend is reachable.
+        Ensure all setup is done in the subclass constructor before calling super.
+        :param type_: The type of the backend.
+        :param target: The target URL for the backend.
+        :param model: The model used by the backend.
+        """
+        self._type = type_
+        self._target = target
+        self._model = model
+        self.test_connection()
+    @property
+    def default_model(self) -> str:
+        """
+        Get the default model for the backend.
+        :return: The default model.
+        :rtype: str
+        :raises ValueError: If no models are available.
+        """
+        return _cachable_default_model(self)
+    @property
+    def type_(self) -> BackendEngine:
+        """
+        Get the type of the backend.
+        :return: The type of the backend.
+        :rtype: BackendEngine
+        """
+        return self._type
+    @property
+    def target(self) -> str:
+        """
+        Get the target URL for the backend.
+        :return: The target URL.
+        :rtype: str
+        """
+        return self._target
+    @property
+    def model(self) -> str:
+        """
+        Get the model used by the backend.
+        :return: The model name.
+        :rtype: str
+        """
+        return self._model
+    def model_tokenizer(self) -> PreTrainedTokenizer:
+        """
+        Get the tokenizer for the backend model.
+        :return: The tokenizer instance.
+        """
+        return AutoTokenizer.from_pretrained(self.model)
+    def test_connection(self) -> bool:
+        """
+        Test the connection to the backend by running a short text generation request.
+        If successful, returns True, otherwise raises an exception.
+        :return: True if the connection is successful.
+        :rtype: bool
+        :raises ValueError: If the connection test fails.
+        """
+        try:
+            asyncio.get_running_loop()
+            is_async = True
+        except RuntimeError:
+            is_async = False
+        if is_async:
+            logger.warning("Running in async mode, cannot test connection")
+            return True
+        try:
+            request = TextGenerationRequest(
+                prompt="Test connection", output_token_count=5
+            )
+            asyncio.run(self.submit(request))
+            return True
+        except Exception as err:
+            raise_err = RuntimeError(
+                f"Backend connection test failed for backend type={self.type_} "
+                f"with target={self.target} and model={self.model} with error: {err}"
+            )
+            logger.error(raise_err)
+            raise raise_err from err
+    async def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
+        """
+        Submit a text generation request and return the result.
+        This method handles the request submission to the backend and processes
+        the response in a streaming fashion if applicable.
+        :param request: The request object containing the prompt
+            and other configurations.
+        :type request: TextGenerationRequest
+        :return: The result of the text generation request.
+        :rtype: TextGenerationResult
+        :raises ValueError: If no response is received from the backend.
+        """
+        logger.debug("Submitting request with prompt: {}", request.prompt)
+        result = TextGenerationResult(request=request)
+        result.start(request.prompt)
+        received_final = False
+        async for response in self.make_request(request):
+            logger.debug("Received response: {}", response)
+            if response.type_ == "token_iter":
+                result.output_token(response.add_token if response.add_token else "")
+            elif response.type_ == "final":
+                if received_final:
+                    err = ValueError(
+                        "Received multiple final responses from the backend."
+                    )
+                    logger.error(err)
+                    raise err
+                result.end(
+                    output=response.output,
+                    prompt_token_count=response.prompt_token_count,
+                    output_token_count=response.output_token_count,
+                )
+                received_final = True
+            else:
+                err = ValueError(
+                    f"Invalid response received from the backend of type: "
+                    f"{response.type_} for {response}"
+                )
+                logger.error(err)
+                raise err
+        if not received_final:
+            err = ValueError("No final response received from the backend.")
+            logger.error(err)
+            raise err
+        logger.info("Request completed with output: {}", result.output)
+        return result
+    @abstractmethod
+    async def make_request(
+        self,
+        request: TextGenerationRequest,
+    ) -> AsyncGenerator[GenerativeResponse, None]:
+        """
+        Abstract method to make a request to the backend.
+        Subclasses must implement this method to define how requests are handled
+        by the backend.
+        :param request: The request object containing the prompt and
+            other configurations.
+        :type request: TextGenerationRequest
+        :yield: A generator yielding responses from the backend.
+        :rtype: AsyncGenerator[GenerativeResponse, None]
+        """
+        yield None  # type: ignore  # noqa: PGH003
+    @abstractmethod
+    def available_models(self) -> List[str]:
+        """
+        Abstract method to get the available models for the backend.
+        Subclasses must implement this method to provide the list of models
+        supported by the backend.
+        :return: A list of available models.
+        :rtype: List[str]
+        :raises NotImplementedError: If the method is not implemented by a subclass.
+        """
+        raise NotImplementedError
+@functools.lru_cache(maxsize=1)
+def _cachable_default_model(backend: Backend) -> str:
+    """
+    Get the default model for a backend using LRU caching.
+    This function caches the default model to optimize repeated lookups.
+    :param backend: The backend instance for which to get the default model.
+    :type backend: Backend
+    :return: The default model.
+    :rtype: str
+    :raises ValueError: If no models are available.
+    """
+    logger.debug("Getting default model for backend: {}", backend)
+    models = backend.available_models()
+    if models:
+        logger.debug("Default model: {}", models[0])
+        return models[0]
+    err = ValueError("No models available.")
+    logger.error(err)
+    raise err

guidellm/backend/openai.py ADDED Viewed

@@ -0,0 +1,168 @@
+from typing import AsyncGenerator, Dict, List, Optional
+from loguru import logger
+from openai import AsyncOpenAI, OpenAI
+from guidellm.backend.base import Backend, GenerativeResponse
+from guidellm.config import settings
+from guidellm.core import TextGenerationRequest
+__all__ = ["OpenAIBackend"]
+@Backend.register("openai_server")
+class OpenAIBackend(Backend):
+    """
+    An OpenAI backend implementation for generative AI results.
+    This class provides an interface to communicate with the
+    OpenAI server for generating responses based on given prompts.
+    :param openai_api_key: The API key for OpenAI.
+        If not provided, it will default to the key from settings.
+    :type openai_api_key: Optional[str]
+    :param target: The target URL string for the OpenAI server.
+    :type target: Optional[str]
+    :param model: The OpenAI model to use, defaults to the first available model.
+    :type model: Optional[str]
+    :param request_args: Additional arguments for the OpenAI request.
+    :type request_args: Dict[str, Any]
+    """
+    def __init__(
+        self,
+        openai_api_key: Optional[str] = None,
+        target: Optional[str] = None,
+        model: Optional[str] = None,
+        **request_args,
+    ):
+        self._request_args: Dict = request_args
+        api_key: str = openai_api_key or settings.openai.api_key
+        if not api_key:
+            err = ValueError(
+                "`GUIDELLM__OPENAI__API_KEY` environment variable or "
+                "--openai-api-key CLI parameter must be specified for the "
+                "OpenAI backend."
+            )
+            logger.error("{}", err)
+            raise err
+        base_url = target or settings.openai.base_url
+        if not base_url:
+            err = ValueError(
+                "`GUIDELLM__OPENAI__BASE_URL` environment variable or "
+                "target parameter must be specified for the OpenAI backend."
+            )
+            logger.error("{}", err)
+            raise err
+        self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        self._client = OpenAI(api_key=api_key, base_url=base_url)
+        self._model = model or self.default_model
+        super().__init__(type_="openai_server", target=base_url, model=self._model)
+        logger.info("OpenAI {} Backend listening on {}", self._model, base_url)
+    async def make_request(
+        self,
+        request: TextGenerationRequest,
+    ) -> AsyncGenerator[GenerativeResponse, None]:
+        """
+        Make a request to the OpenAI backend.
+        This method sends a prompt to the OpenAI backend and streams
+        the response tokens back.
+        :param request: The text generation request to submit.
+        :type request: TextGenerationRequest
+        :yield: A stream of GenerativeResponse objects.
+        :rtype: AsyncGenerator[GenerativeResponse, None]
+        """
+        logger.debug("Making request to OpenAI backend with prompt: {}", request.prompt)
+        request_args: Dict = {
+            "n": 1,  # Number of completions for each prompt
+        }
+        if request.output_token_count is not None:
+            request_args.update(
+                {
+                    "max_tokens": request.output_token_count,
+                    "stop": None,
+                }
+            )
+        elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0:
+            request_args.update(
+                {
+                    "max_tokens": settings.openai.max_gen_tokens,
+                }
+            )
+        request_args.update(self._request_args)
+        stream = await self._async_client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": request.prompt},
+            ],
+            stream=True,
+            **request_args,
+        )
+        token_count = 0
+        async for chunk in stream:
+            choice = chunk.choices[0]
+            token = choice.delta.content or ""
+            if choice.finish_reason is not None:
+                yield GenerativeResponse(
+                    type_="final",
+                    prompt=request.prompt,
+                    prompt_token_count=request.prompt_token_count,
+                    output_token_count=token_count,
+                )
+                break
+            token_count += 1
+            yield GenerativeResponse(
+                type_="token_iter",
+                add_token=token,
+                prompt=request.prompt,
+                prompt_token_count=request.prompt_token_count,
+                output_token_count=token_count,
+            )
+    def available_models(self) -> List[str]:
+        """
+        Get the available models for the backend.
+        This method queries the OpenAI API to retrieve a list of available models.
+        :return: A list of available models.
+        :rtype: List[str]
+        :raises openai.OpenAIError: If an error occurs while retrieving models.
+        """
+        try:
+            return [model.id for model in self._client.models.list().data]
+        except Exception as error:
+            logger.error("Failed to retrieve available models: {}", error)
+            raise error
+    def validate_connection(self):
+        """
+        Validate the connection to the OpenAI backend.
+        This method checks that the OpenAI backend is reachable and
+        the API key is valid.
+        :raises openai.OpenAIError: If the connection is invalid.
+        """
+        try:
+            self._client.models.list()
+        except Exception as error:
+            logger.error("Failed to validate OpenAI connection: {}", error)
+            raise error