PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl - Mend

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show

guidellm/__init__.py +38 -6
guidellm/__main__.py +294 -0
guidellm/backend/__init__.py +19 -6
guidellm/backend/backend.py +238 -0
guidellm/backend/openai.py +532 -122
guidellm/backend/response.py +132 -0
guidellm/benchmark/__init__.py +73 -0
guidellm/benchmark/aggregator.py +760 -0
guidellm/benchmark/benchmark.py +838 -0
guidellm/benchmark/benchmarker.py +334 -0
guidellm/benchmark/entrypoints.py +141 -0
guidellm/benchmark/output.py +946 -0
guidellm/benchmark/profile.py +409 -0
guidellm/benchmark/progress.py +720 -0
guidellm/config.py +34 -56
guidellm/data/__init__.py +4 -0
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +22 -0
guidellm/dataset/creator.py +213 -0
guidellm/dataset/entrypoints.py +42 -0
guidellm/dataset/file.py +90 -0
guidellm/dataset/hf_datasets.py +62 -0
guidellm/dataset/in_memory.py +132 -0
guidellm/dataset/synthetic.py +262 -0
guidellm/objects/__init__.py +18 -0
guidellm/objects/pydantic.py +60 -0
guidellm/objects/statistics.py +947 -0
guidellm/request/__init__.py +12 -10
guidellm/request/loader.py +281 -0
guidellm/request/request.py +79 -0
guidellm/scheduler/__init__.py +51 -3
guidellm/scheduler/result.py +137 -0
guidellm/scheduler/scheduler.py +382 -0
guidellm/scheduler/strategy.py +493 -0
guidellm/scheduler/types.py +7 -0
guidellm/scheduler/worker.py +511 -0
guidellm/utils/__init__.py +16 -29
guidellm/utils/colors.py +8 -0
guidellm/utils/hf_transformers.py +35 -0
guidellm/utils/random.py +43 -0
guidellm/utils/text.py +118 -357
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
guidellm/backend/base.py +0 -320
guidellm/core/__init__.py +0 -24
guidellm/core/distribution.py +0 -190
guidellm/core/report.py +0 -321
guidellm/core/request.py +0 -44
guidellm/core/result.py +0 -545
guidellm/core/serializable.py +0 -169
guidellm/executor/__init__.py +0 -10
guidellm/executor/base.py +0 -213
guidellm/executor/profile_generator.py +0 -343
guidellm/main.py +0 -336
guidellm/request/base.py +0 -194
guidellm/request/emulated.py +0 -391
guidellm/request/file.py +0 -76
guidellm/request/transformers.py +0 -100
guidellm/scheduler/base.py +0 -374
guidellm/scheduler/load_generator.py +0 -196
guidellm/utils/injector.py +0 -70
guidellm/utils/progress.py +0 -196
guidellm/utils/transformers.py +0 -151
guidellm-0.1.0.dist-info/RECORD +0 -35
guidellm-0.1.0.dist-info/entry_points.txt +0 -3
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0

guidellm/backend/base.py DELETED Viewed

@@ -1,320 +0,0 @@
-import asyncio
-import functools
-from abc import ABC, abstractmethod
-from typing import AsyncGenerator, Dict, List, Literal, Optional, Type, Union
-from loguru import logger
-from pydantic import BaseModel
-from transformers import (  # type: ignore  # noqa: PGH003
-    AutoTokenizer,
-    PreTrainedTokenizer,
-)
-from guidellm.core import TextGenerationRequest, TextGenerationResult
-__all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
-BackendEnginePublic = Literal["openai_server"]
-BackendEngine = Union[BackendEnginePublic, Literal["test"]]
-class GenerativeResponse(BaseModel):
-    """
-    A model representing a response from a generative AI backend.
-    :param type_: The type of response, either 'token_iter' for intermediate
-        token output or 'final' for the final result.
-    :type type_: Literal["token_iter", "final"]
-    :param add_token: The token to add to the output
-        (only applicable if type_ is 'token_iter').
-    :type add_token: Optional[str]
-    :param prompt: The original prompt sent to the backend.
-    :type prompt: Optional[str]
-    :param output: The final generated output (only applicable if type_ is 'final').
-    :type output: Optional[str]
-    :param prompt_token_count: The number of tokens in the prompt.
-    :type prompt_token_count: Optional[int]
-    :param output_token_count: The number of tokens in the output.
-    :type output_token_count: Optional[int]
-    """
-    type_: Literal["token_iter", "final"]
-    add_token: Optional[str] = None
-    prompt: Optional[str] = None
-    output: Optional[str] = None
-    prompt_token_count: Optional[int] = None
-    output_token_count: Optional[int] = None
-class Backend(ABC):
-    """
-    Abstract base class for generative AI backends.
-    This class provides a common interface for creating and interacting with different
-    generative AI backends. Subclasses should implement the abstract methods to
-    define specific backend behavior.
-    :cvar _registry: A dictionary that maps BackendEngine types to backend classes.
-    :type _registry: Dict[BackendEngine, Type[Backend]]
-    :param type_: The type of the backend.
-    :type type_: BackendEngine
-    :param target: The target URL for the backend.
-    :type target: str
-    :param model: The model used by the backend.
-    :type model: str
-    """
-    _registry: Dict[BackendEngine, "Type[Backend]"] = {}
-    @classmethod
-    def register(cls, backend_type: BackendEngine):
-        """
-        A decorator to register a backend class in the backend registry.
-        :param backend_type: The type of backend to register.
-        :type backend_type: BackendEngine
-        :return: The decorated backend class.
-        :rtype: Type[Backend]
-        """
-        def inner_wrapper(wrapped_class: Type["Backend"]):
-            cls._registry[backend_type] = wrapped_class
-            logger.info("Registered backend type: {}", backend_type)
-            return wrapped_class
-        return inner_wrapper
-    @classmethod
-    def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
-        """
-        Factory method to create a backend instance based on the backend type.
-        :param backend_type: The type of backend to create.
-        :type backend_type: BackendEngine
-        :param kwargs: Additional arguments for backend initialization.
-        :return: An instance of a subclass of Backend.
-        :rtype: Backend
-        :raises ValueError: If the backend type is not registered.
-        """
-        logger.info("Creating backend of type {}", backend_type)
-        if backend_type not in cls._registry:
-            err = ValueError(f"Unsupported backend type: {backend_type}")
-            logger.error("{}", err)
-            raise err
-        return Backend._registry[backend_type](**kwargs)
-    def __init__(self, type_: BackendEngine, target: str, model: str):
-        """
-        Base constructor for the Backend class.
-        Calls into test_connection to ensure the backend is reachable.
-        Ensure all setup is done in the subclass constructor before calling super.
-        :param type_: The type of the backend.
-        :param target: The target URL for the backend.
-        :param model: The model used by the backend.
-        """
-        self._type = type_
-        self._target = target
-        self._model = model
-        self.test_connection()
-    @property
-    def default_model(self) -> str:
-        """
-        Get the default model for the backend.
-        :return: The default model.
-        :rtype: str
-        :raises ValueError: If no models are available.
-        """
-        return _cachable_default_model(self)
-    @property
-    def type_(self) -> BackendEngine:
-        """
-        Get the type of the backend.
-        :return: The type of the backend.
-        :rtype: BackendEngine
-        """
-        return self._type
-    @property
-    def target(self) -> str:
-        """
-        Get the target URL for the backend.
-        :return: The target URL.
-        :rtype: str
-        """
-        return self._target
-    @property
-    def model(self) -> str:
-        """
-        Get the model used by the backend.
-        :return: The model name.
-        :rtype: str
-        """
-        return self._model
-    def model_tokenizer(self) -> PreTrainedTokenizer:
-        """
-        Get the tokenizer for the backend model.
-        :return: The tokenizer instance.
-        """
-        return AutoTokenizer.from_pretrained(self.model)
-    def test_connection(self) -> bool:
-        """
-        Test the connection to the backend by running a short text generation request.
-        If successful, returns True, otherwise raises an exception.
-        :return: True if the connection is successful.
-        :rtype: bool
-        :raises ValueError: If the connection test fails.
-        """
-        try:
-            asyncio.get_running_loop()
-            is_async = True
-        except RuntimeError:
-            is_async = False
-        if is_async:
-            logger.warning("Running in async mode, cannot test connection")
-            return True
-        try:
-            request = TextGenerationRequest(
-                prompt="Test connection", output_token_count=5
-            )
-            asyncio.run(self.submit(request))
-            return True
-        except Exception as err:
-            raise_err = RuntimeError(
-                f"Backend connection test failed for backend type={self.type_} "
-                f"with target={self.target} and model={self.model} with error: {err}"
-            )
-            logger.error(raise_err)
-            raise raise_err from err
-    async def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
-        """
-        Submit a text generation request and return the result.
-        This method handles the request submission to the backend and processes
-        the response in a streaming fashion if applicable.
-        :param request: The request object containing the prompt
-            and other configurations.
-        :type request: TextGenerationRequest
-        :return: The result of the text generation request.
-        :rtype: TextGenerationResult
-        :raises ValueError: If no response is received from the backend.
-        """
-        logger.debug("Submitting request with prompt: {}", request.prompt)
-        result = TextGenerationResult(request=request)
-        result.start(request.prompt)
-        received_final = False
-        async for response in self.make_request(request):
-            logger.debug("Received response: {}", response)
-            if response.type_ == "token_iter":
-                result.output_token(response.add_token if response.add_token else "")
-            elif response.type_ == "final":
-                if received_final:
-                    err = ValueError(
-                        "Received multiple final responses from the backend."
-                    )
-                    logger.error(err)
-                    raise err
-                result.end(
-                    output=response.output,
-                    prompt_token_count=response.prompt_token_count,
-                    output_token_count=response.output_token_count,
-                )
-                received_final = True
-            else:
-                err = ValueError(
-                    f"Invalid response received from the backend of type: "
-                    f"{response.type_} for {response}"
-                )
-                logger.error(err)
-                raise err
-        if not received_final:
-            err = ValueError("No final response received from the backend.")
-            logger.error(err)
-            raise err
-        logger.info("Request completed with output: {}", result.output)
-        return result
-    @abstractmethod
-    async def make_request(
-        self,
-        request: TextGenerationRequest,
-    ) -> AsyncGenerator[GenerativeResponse, None]:
-        """
-        Abstract method to make a request to the backend.
-        Subclasses must implement this method to define how requests are handled
-        by the backend.
-        :param request: The request object containing the prompt and
-            other configurations.
-        :type request: TextGenerationRequest
-        :yield: A generator yielding responses from the backend.
-        :rtype: AsyncGenerator[GenerativeResponse, None]
-        """
-        yield None  # type: ignore  # noqa: PGH003
-    @abstractmethod
-    def available_models(self) -> List[str]:
-        """
-        Abstract method to get the available models for the backend.
-        Subclasses must implement this method to provide the list of models
-        supported by the backend.
-        :return: A list of available models.
-        :rtype: List[str]
-        :raises NotImplementedError: If the method is not implemented by a subclass.
-        """
-        raise NotImplementedError
-@functools.lru_cache(maxsize=1)
-def _cachable_default_model(backend: Backend) -> str:
-    """
-    Get the default model for a backend using LRU caching.
-    This function caches the default model to optimize repeated lookups.
-    :param backend: The backend instance for which to get the default model.
-    :type backend: Backend
-    :return: The default model.
-    :rtype: str
-    :raises ValueError: If no models are available.
-    """
-    logger.debug("Getting default model for backend: {}", backend)
-    models = backend.available_models()
-    if models:
-        logger.debug("Default model: {}", models[0])
-        return models[0]
-    err = ValueError("No models available.")
-    logger.error(err)
-    raise err

guidellm/core/__init__.py DELETED Viewed

@@ -1,24 +0,0 @@
-from .distribution import Distribution
-from .report import GuidanceReport
-from .request import TextGenerationRequest
-from .result import (
-    RequestConcurrencyMeasurement,
-    TextGenerationBenchmark,
-    TextGenerationBenchmarkReport,
-    TextGenerationError,
-    TextGenerationResult,
-)
-from .serializable import Serializable, SerializableFileType
-__all__ = [
-    "Distribution",
-    "GuidanceReport",
-    "RequestConcurrencyMeasurement",
-    "Serializable",
-    "SerializableFileType",
-    "TextGenerationBenchmark",
-    "TextGenerationBenchmarkReport",
-    "TextGenerationError",
-    "TextGenerationRequest",
-    "TextGenerationResult",
-]

guidellm/core/distribution.py DELETED Viewed

@@ -1,190 +0,0 @@
-from typing import List, Sequence
-import numpy as np
-from loguru import logger
-from pydantic import Field
-from guidellm.core.serializable import Serializable
-__all__ = ["Distribution"]
-class Distribution(Serializable):
-    """
-    A class to represent a statistical distribution and perform various
-    statistical analyses.
-    """
-    data: Sequence[float] = Field(
-        default_factory=list,
-        description="The data points of the distribution.",
-    )
-    def __str__(self):
-        return f"Distribution({self.describe()})"
-    def __len__(self):
-        return len(self.data)
-    @property
-    def mean(self) -> float:
-        """
-        Calculate and return the mean of the distribution.
-        :return: The mean of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate mean.")
-            return 0.0
-        mean_value = np.mean(self.data).item()
-        logger.debug(f"Calculated mean: {mean_value}")
-        return mean_value
-    @property
-    def median(self) -> float:
-        """
-        Calculate and return the median of the distribution.
-        :return: The median of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate median.")
-            return 0.0
-        median_value = np.median(self.data).item()
-        logger.debug(f"Calculated median: {median_value}")
-        return median_value
-    @property
-    def variance(self) -> float:
-        """
-        Calculate and return the variance of the distribution.
-        :return: The variance of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate variance.")
-            return 0.0
-        variance_value = np.var(self.data).item()
-        logger.debug(f"Calculated variance: {variance_value}")
-        return variance_value
-    @property
-    def std_deviation(self) -> float:
-        """
-        Calculate and return the standard deviation of the distribution.
-        :return: The standard deviation of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate standard deviation.")
-            return 0.0
-        std_deviation_value = np.std(self.data).item()
-        logger.debug(f"Calculated standard deviation: {std_deviation_value}")
-        return std_deviation_value
-    def percentile(self, percentile: float) -> float:
-        """
-        Calculate and return the specified percentile of the distribution.
-        :param percentile: The desired percentile to calculate (0-100).
-        :return: The specified percentile of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate percentile.")
-            return 0.0
-        percentile_value = np.percentile(self.data, percentile).item()
-        logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
-        return percentile_value
-    def percentiles(self, percentiles: List[float]) -> List[float]:
-        """
-        Calculate and return the specified percentiles of the distribution.
-        :param percentiles: A list of desired percentiles to calculate (0-100).
-        :return: A list of the specified percentiles of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate percentiles.")
-            return [0.0] * len(percentiles)
-        percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}")
-        return percentiles_values
-    @property
-    def min(self) -> float:
-        """
-        Return the minimum value of the distribution.
-        :return: The minimum value of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate minimum.")
-            return 0.0
-        min_value: float = np.min(self.data).item()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated min: {min_value}")
-        return min_value
-    @property
-    def max(self) -> float:
-        """
-        Return the maximum value of the distribution.
-        :return: The maximum value of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate maximum.")
-            return 0.0
-        max_value: float = np.max(self.data).item()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated max: {max_value}")
-        return max_value
-    @property
-    def range(self) -> float:
-        """
-        Calculate and return the range of the distribution (max - min).
-        :return: The range of the distribution.
-        """
-        if not self.data:
-            logger.warning("No data points available to calculate range.")
-            return 0.0
-        range_value = self.max - self.min
-        logger.debug(f"Calculated range: {range_value}")
-        return range_value
-    def describe(self) -> dict:
-        """
-        Return a dictionary describing various statistics of the distribution.
-        :return: A dictionary with statistical summaries of the distribution.
-        """
-        description = {
-            "mean": self.mean,
-            "median": self.median,
-            "variance": self.variance,
-            "std_deviation": self.std_deviation,
-            "percentile_indices": [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
-            "percentile_values": self.percentiles(
-                [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
-            ),
-            "min": self.min,
-            "max": self.max,
-            "range": self.range,
-        }
-        logger.debug(f"Generated description: {description}")
-        return description
-    def add_data(self, new_data: Sequence[float]):
-        """
-        Add new data points to the distribution.
-        :param new_data: A list of new numerical data points to add.
-        """
-        self.data = list(self.data) + list(new_data)
-        logger.debug(f"Added new data: {new_data}")
-    def remove_data(self, remove_data: Sequence[float]):
-        """
-        Remove specified data points from the distribution.
-        :param remove_data: A list of numerical data points to remove.
-        """
-        self.data = [item for item in self.data if item not in remove_data]
-        logger.debug(f"Removed data: {remove_data}")

guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

Potentially problematic release.

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl