PyPI - guidellm - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl - Mend

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show

guidellm/__init__.py +38 -6
guidellm/__main__.py +294 -0
guidellm/backend/__init__.py +19 -6
guidellm/backend/backend.py +238 -0
guidellm/backend/openai.py +532 -122
guidellm/backend/response.py +132 -0
guidellm/benchmark/__init__.py +73 -0
guidellm/benchmark/aggregator.py +760 -0
guidellm/benchmark/benchmark.py +838 -0
guidellm/benchmark/benchmarker.py +334 -0
guidellm/benchmark/entrypoints.py +141 -0
guidellm/benchmark/output.py +946 -0
guidellm/benchmark/profile.py +409 -0
guidellm/benchmark/progress.py +720 -0
guidellm/config.py +34 -56
guidellm/data/__init__.py +4 -0
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +22 -0
guidellm/dataset/creator.py +213 -0
guidellm/dataset/entrypoints.py +42 -0
guidellm/dataset/file.py +90 -0
guidellm/dataset/hf_datasets.py +62 -0
guidellm/dataset/in_memory.py +132 -0
guidellm/dataset/synthetic.py +262 -0
guidellm/objects/__init__.py +18 -0
guidellm/objects/pydantic.py +60 -0
guidellm/objects/statistics.py +947 -0
guidellm/request/__init__.py +12 -10
guidellm/request/loader.py +281 -0
guidellm/request/request.py +79 -0
guidellm/scheduler/__init__.py +51 -3
guidellm/scheduler/result.py +137 -0
guidellm/scheduler/scheduler.py +382 -0
guidellm/scheduler/strategy.py +493 -0
guidellm/scheduler/types.py +7 -0
guidellm/scheduler/worker.py +511 -0
guidellm/utils/__init__.py +16 -29
guidellm/utils/colors.py +8 -0
guidellm/utils/hf_transformers.py +35 -0
guidellm/utils/random.py +43 -0
guidellm/utils/text.py +118 -357
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
guidellm/backend/base.py +0 -320
guidellm/core/__init__.py +0 -24
guidellm/core/distribution.py +0 -190
guidellm/core/report.py +0 -321
guidellm/core/request.py +0 -44
guidellm/core/result.py +0 -545
guidellm/core/serializable.py +0 -169
guidellm/executor/__init__.py +0 -10
guidellm/executor/base.py +0 -213
guidellm/executor/profile_generator.py +0 -343
guidellm/main.py +0 -336
guidellm/request/base.py +0 -194
guidellm/request/emulated.py +0 -391
guidellm/request/file.py +0 -76
guidellm/request/transformers.py +0 -100
guidellm/scheduler/base.py +0 -374
guidellm/scheduler/load_generator.py +0 -196
guidellm/utils/injector.py +0 -70
guidellm/utils/progress.py +0 -196
guidellm/utils/transformers.py +0 -151
guidellm-0.1.0.dist-info/RECORD +0 -35
guidellm-0.1.0.dist-info/entry_points.txt +0 -3
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
{guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0

guidellm/core/result.py DELETED Viewed

@@ -1,545 +0,0 @@
-from time import time
-from typing import Any, Dict, List, Literal, Optional, Union
-from loguru import logger
-from pydantic import Field
-from guidellm.core.distribution import Distribution
-from guidellm.core.request import TextGenerationRequest
-from guidellm.core.serializable import Serializable
-__all__ = [
-    "RequestConcurrencyMeasurement",
-    "TextGenerationBenchmark",
-    "TextGenerationBenchmarkReport",
-    "TextGenerationError",
-    "TextGenerationResult",
-]
-class TextGenerationResult(Serializable):
-    """
-    A class to represent the result of a text generation request
-    for generative AI workloads.
-    """
-    request: TextGenerationRequest = Field(
-        description="The text generation request used to generate the result.",
-    )
-    prompt: str = Field(
-        default_factory=str,
-        description="The input prompt for the text generation.",
-    )
-    prompt_word_count: int = Field(
-        default=0,
-        description="The number of words in the input prompt.",
-    )
-    prompt_token_count: int = Field(
-        default=0,
-        description="The number of tokens in the input prompt.",
-    )
-    output: str = Field(
-        default_factory=str,
-        description="The generated output for the text generation.",
-    )
-    output_word_count: int = Field(
-        default=0,
-        description="The number of words in the output.",
-    )
-    output_token_count: int = Field(
-        default=0,
-        description="The number of tokens in the output.",
-    )
-    last_time: Optional[float] = Field(
-        default=None,
-        description="The last time recorded.",
-    )
-    first_token_set: bool = Field(
-        default=False,
-        description="Whether the first token time is set.",
-    )
-    start_time: Optional[float] = Field(
-        default=None,
-        description="The start time of the text generation.",
-    )
-    end_time: Optional[float] = Field(
-        default=None,
-        description="The end time of the text generation.",
-    )
-    first_token_time: Optional[float] = Field(
-        default=None,
-        description="The time taken to decode the first token.",
-    )
-    decode_times: Distribution = Field(
-        default_factory=Distribution,
-        description="The distribution of decode times.",
-    )
-    def start(self, prompt: str):
-        """
-        Start the text generation by recording the prompt and start time.
-        :param prompt: The input prompt for the text generation.
-        :type prompt: str
-        """
-        self.prompt = prompt
-        self.prompt_word_count = len(prompt.split())
-        self.prompt_token_count = len(prompt)  # Token count placeholder
-        self.start_time = time()
-        self.last_time = time()
-        self.first_token_set = False
-        logger.info("Text generation started with prompt: '{}'", prompt)
-    def output_token(self, token: str):
-        """
-        Add a token to the output and record the decode time.
-        :param token: The decoded token.
-        :type token: str
-        """
-        self._check_recording_started()
-        if self.last_time is None:
-            raise ValueError(
-                "last time is not specified. "
-                "Did you call `text_generation_benchmark.start()`?"
-            )
-        current_counter = time()
-        if not self.first_token_set:
-            self.first_token_time = current_counter - self.last_time
-            self.first_token_set = True
-            logger.debug(f"First token decode time: {self.first_token_time}")
-        else:
-            decode_time = current_counter - self.last_time
-            self.decode_times.add_data([decode_time])
-            logger.debug(f"Token '{token}' decoded in {decode_time} seconds")
-        self.last_time = current_counter
-        self.output += token
-        logger.debug("Added token {} to output", token)
-    def end(
-        self,
-        output: Optional[str] = None,
-        prompt_token_count: Optional[int] = None,
-        output_token_count: Optional[int] = None,
-    ):
-        """
-        End the text generation by recording the output and end time.
-        :param output: The generated output for the text generation.
-        :type output: str
-        :param prompt_token_count: Optional token count for the prompt,
-            defaults to word count.
-        :type prompt_token_count: Optional[int]
-        :param output_token_count: Optional token count for the output,
-            defaults to word count.
-        :type output_token_count: Optional[int]
-        """
-        self._check_recording_started()
-        self.end_time = time()
-        if output:
-            self.output = output
-        self.output_word_count = len(self.output.split())
-        self.output_token_count = output_token_count or self.output_word_count
-        self.prompt_token_count = prompt_token_count or self.prompt_word_count
-        logger.info(f"Text generation ended with output: '{self.output}'")
-    def _check_recording_started(
-        self,
-    ):
-        if self.start_time is None:
-            raise ValueError(
-                "start time is not specified. "
-                "Did you make the `text_generation_benchmark.start()`?",
-            )
-class TextGenerationError(Serializable):
-    """
-    A class to represent an error that occurred during a text generation request
-    for generative AI workloads.
-    """
-    request: TextGenerationRequest = Field(
-        description="The text generation request that resulted in an error.",
-    )
-    message: str = Field(
-        description="The error message that occurred during text generation.",
-    )
-class RequestConcurrencyMeasurement(Serializable):
-    """
-    A dataclass to represent the concurrency measurement of a request.
-    """
-    time: float = Field(description="The time of the measurement.")
-    completed: int = Field(description="The number of completed requests.")
-    errored: int = Field(description="The number of errored requests.")
-    processing: int = Field(description="The number of processing requests.")
-class TextGenerationBenchmark(Serializable):
-    """
-    A class to represent a report of text generation requests
-    (results and errors) for generative AI workloads.
-    This is a set of results and errors for a specific mode and rate.
-    """
-    mode: Literal["asynchronous", "synchronous", "throughput"] = Field(
-        description="The generation mode, one of 'async', 'sync', or 'throughput'."
-    )
-    rate: Optional[float] = Field(
-        default=None,
-        description="The requested rate of requests per second.",
-    )
-    results: List[TextGenerationResult] = Field(
-        default_factory=list,
-        description="The results of the text generation requests.",
-    )
-    errors: List[TextGenerationError] = Field(
-        default_factory=list,
-        description="The errors of the text generation requests.",
-    )
-    concurrencies: List[RequestConcurrencyMeasurement] = Field(
-        default_factory=list,
-        description="The concurrency measurements of the requests.",
-    )
-    def __iter__(self):
-        """
-        Provide an iterator interface to iterate over the results.
-        :return: An iterator over the results.
-        """
-        return iter(self.results)
-    @property
-    def request_count(self) -> int:
-        """
-        Get the number of requests in the result.
-        :return: The number of requests.
-        :rtype: int
-        """
-        return len(self.results)
-    @property
-    def error_count(self) -> int:
-        """
-        Get the number of errors in the result.
-        :return: The number of errors.
-        :rtype: int
-        """
-        return len(self.errors)
-    @property
-    def total_count(self) -> int:
-        """
-        Get the total number of requests in the result.
-        :return: The total number of requests.
-        :rtype: int
-        """
-        return self.request_count + self.error_count
-    @property
-    def start_time(self) -> Optional[float]:
-        """
-        Get the start time of the first request in the result.
-        :return: The start time of the first request.
-        :rtype: Optional[float]
-        """
-        if not self.results:
-            return None
-        return self.results[0].start_time
-    @property
-    def end_time(self) -> Optional[float]:
-        """
-        Get the end time of the last request in the result.
-        :return: The end time of the last request.
-        :rtype: Optional[float]
-        """
-        if not self.results:
-            return None
-        return self.results[-1].end_time
-    @property
-    def duration(self) -> float:
-        """
-        Get the duration of the result in seconds.
-        :return: The duration of the result.
-        :rtype: float
-        """
-        if not self.results or not self.start_time or not self.end_time:
-            return 0.0
-        return self.end_time - self.start_time
-    @property
-    def completed_request_rate(self) -> float:
-        """
-        Get the rate of requests per second in the result.
-        :return: The rate of requests per second.
-        :rtype: float
-        """
-        if not self.results or not self.duration:
-            return 0.0
-        return len(self.results) / self.duration
-    @property
-    def request_latency(self) -> float:
-        """
-        Get the average request latency in seconds.
-        :return: The average request latency in seconds.
-        :rtype: float
-        """
-        if not self.results:
-            return 0.0
-        return self.request_latency_distribution.mean
-    @property
-    def request_latency_distribution(self) -> Distribution:
-        """
-        Get the distribution of request latencies.
-        :return: The distribution of request latencies.
-        :rtype: Distribution
-        """
-        return Distribution(
-            data=[
-                result.end_time - result.start_time
-                for result in self.results
-                if result.end_time is not None and result.start_time is not None
-            ]
-        )
-    @property
-    def time_to_first_token(self) -> float:
-        """
-        Get the time taken to decode the first token in milliseconds.
-        :return: The time taken to decode the first token in milliseconds.
-        :rtype: float
-        """
-        if not self.results:
-            return 0.0
-        return 1000 * self.ttft_distribution.mean
-    @property
-    def ttft_distribution(self) -> Distribution:
-        """
-        Get the distribution of time taken to decode the first token.
-        :return: The distribution of time taken to decode the first token.
-        :rtype: Distribution
-        """
-        return Distribution(
-            data=[
-                result.first_token_time
-                for result in self.results
-                if result.first_token_time is not None
-            ]
-        )
-    @property
-    def inter_token_latency(self) -> float:
-        """
-        Get the average time between tokens in milliseconds.
-        :return: The average time between tokens.
-        :rtype: float
-        """
-        if not self.results:
-            return 0.0
-        return 1000 * self.itl_distribution.mean
-    @property
-    def itl_distribution(self) -> Distribution:
-        """
-        Get the distribution of time between tokens.
-        :return: The distribution of time between tokens.
-        :rtype: Distribution
-        """
-        return Distribution(
-            data=[
-                decode for result in self.results for decode in result.decode_times.data
-            ]
-        )
-    @property
-    def output_token_throughput(self) -> float:
-        """
-        Get the average token throughput in tokens per second.
-        :return: The average token throughput.
-        :rtype: float
-        """
-        if not self.results or not self.duration:
-            return 0.0
-        total_tokens = sum(result.output_token_count for result in self.results)
-        return total_tokens / self.duration
-    @property
-    def prompt_token_distribution(self) -> Distribution:
-        """
-        Get the distribution of prompt token counts.
-        :return: The distribution of prompt token counts.
-        :rtype: Distribution
-        """
-        return Distribution(data=[result.prompt_token_count for result in self.results])
-    @property
-    def output_token_distribution(self) -> Distribution:
-        """
-        Get the distribution of output token counts.
-        :return: The distribution of output token counts.
-        :rtype: Distribution
-        """
-        return Distribution(data=[result.output_token_count for result in self.results])
-    @property
-    def overloaded(self) -> bool:
-        if (
-            self.rate is None
-            or not self.results
-            or not self.concurrencies
-            or len(self.concurrencies) < 2  # noqa: PLR2004
-        ):
-            # if rate was not set, sync mode is assumed,
-            # or we have less than 2 data points,
-            # then we cannot be overloaded by definition
-            return False
-        # if the calculated rate is less than 75% of the requested rate,
-        # safe to assume the system is overloaded
-        return self.completed_request_rate < 0.75 * self.rate
-    def request_started(self):
-        """
-        Record the start of a generation request.
-        """
-        if not self.concurrencies:
-            self.concurrencies = [
-                RequestConcurrencyMeasurement(
-                    time=time(),
-                    completed=0,
-                    errored=0,
-                    processing=1,
-                ),
-            ]
-        else:
-            last = self.concurrencies[-1]
-            self.concurrencies.append(
-                RequestConcurrencyMeasurement(
-                    time=time(),
-                    completed=last.completed,
-                    errored=last.errored,
-                    processing=last.processing + 1,
-                ),
-            )
-        logger.info("Text generation request started")
-    def request_completed(
-        self,
-        result: Union[TextGenerationResult, TextGenerationError],
-    ):
-        """
-        Record the completion of a text generation request.
-        :param result: The completed result or error.
-        :type result: Union[TextGenerationResult, TextGenerationError]
-        """
-        if not self.concurrencies:
-            raise ValueError("Request completed without starting")
-        if isinstance(result, TextGenerationError):
-            is_error = True
-            self.errors.append(result)
-            logger.info(
-                "Text generation request resulted in error: {}",
-                result.message,
-            )
-        else:
-            if not result.start_time or not result.end_time:
-                raise ValueError("Start time and End time are not defined")
-            is_error = False
-            self.results.append(result)
-            logger.info("Text generation request completed successfully: {}", result)
-        last = self.concurrencies[-1]
-        self.concurrencies.append(
-            RequestConcurrencyMeasurement(
-                time=time(),
-                completed=last.completed + (not is_error),
-                errored=last.errored + is_error,
-                processing=last.processing - 1,
-            )
-        )
-class TextGenerationBenchmarkReport(Serializable):
-    """
-    A class to represent a report of text generation benchmarks
-    for generative AI workloads.
-    This is a collection of benchmarks for different modes and rates.
-    """
-    benchmarks: List[TextGenerationBenchmark] = Field(
-        default_factory=list,
-        description="The benchmarks of text generation requests.",
-    )
-    args: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="The arguments used for the benchmarks.",
-    )
-    def __iter__(self):
-        return iter(self.benchmarks)
-    @property
-    def benchmarks_sorted(self) -> List[TextGenerationBenchmark]:
-        """
-        Get the list of benchmarks sorted by request rate.
-        :return: The sorted list of benchmarks.
-        :rtype: List[TextGenerationBenchmark]
-        """
-        return sorted(self.benchmarks, key=lambda x: x.completed_request_rate)
-    def add_benchmark(self, benchmark: TextGenerationBenchmark):
-        """
-        Add a result to the report.
-        :param benchmark: The result to add.
-        :type benchmark: TextGenerationBenchmark
-        """
-        self.benchmarks.append(benchmark)
-        logger.debug("Added result: {}", benchmark)

guidellm/core/serializable.py DELETED Viewed

@@ -1,169 +0,0 @@
-from pathlib import Path
-from typing import Any, Literal, Union, get_args
-import yaml
-from loguru import logger
-from pydantic import BaseModel, ConfigDict
-__all__ = ["Serializable", "SerializableFileType"]
-SerializableFileType = Literal["yaml", "json"]
-class Serializable(BaseModel):
-    """
-    A base class for models that require YAML and JSON serialization and
-    deserialization.
-    """
-    model_config = ConfigDict(
-        extra="forbid",
-        use_enum_values=True,
-        validate_assignment=True,
-        from_attributes=True,
-    )
-    def __init__(self, /, **data: Any) -> None:
-        super().__init__(**data)
-        logger.debug(
-            "Initialized new instance of {} with data: {}",
-            self.__class__.__name__,
-            data,
-        )
-    def to_yaml(self) -> str:
-        """
-        Serialize the model to a YAML string.
-        :return: YAML string representation of the model.
-        """
-        logger.debug("Serializing to YAML... {}", self)
-        return yaml.dump(self.model_dump())
-    @classmethod
-    def from_yaml(cls, data: str):
-        """
-        Deserialize a YAML string to a model instance.
-        :param data: YAML string to deserialize.
-        :return: An instance of the model.
-        """
-        logger.debug("Deserializing from YAML... {}", data)
-        return cls.model_validate(yaml.safe_load(data))
-    def to_json(self) -> str:
-        """
-        Serialize the model to a JSON string.
-        :return: JSON string representation of the model.
-        """
-        logger.debug("Serializing to JSON... {}", self)
-        return self.model_dump_json()
-    @classmethod
-    def from_json(cls, data: str):
-        """
-        Deserialize a JSON string to a model instance.
-        :param data: JSON string to deserialize.
-        :return: An instance of the model.
-        """
-        logger.debug("Deserializing from JSON... {}", data)
-        return cls.model_validate_json(data)
-    def save_file(
-        self,
-        path: Union[str, Path],
-        type_: SerializableFileType = "yaml",
-    ) -> str:
-        """
-        Save the model to a file in either YAML or JSON format.
-        :param path: Path to the exact file or the containing directory.
-            If it is a directory, the file name will be inferred from the class name.
-        :param type_: Optional type to save ('yaml' or 'json').
-            If not provided and the path has an extension,
-            it will be inferred to save in that format.
-            If not provided and the path does not have an extension,
-            it will save in YAML format.
-        :return: The path to the saved file.
-        """
-        logger.debug("Saving to file... {} with format: {}", path, type_)
-        if isinstance(path, str):
-            path = Path(path)
-        if path.suffix:
-            # is a file
-            ext = path.suffix[1:].lower()
-            if type_ not in get_args(SerializableFileType):
-                raise ValueError(
-                    f"Unsupported file extension: {type_}. "
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-            type_ = ext  # type: ignore # noqa: PGH003
-        else:
-            # is a directory
-            file_name = f"{self.__class__.__name__.lower()}.{type_}"
-            path = path / file_name
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with path.open("w") as file:
-            if type_ == "yaml":
-                file.write(self.to_yaml())
-            elif type_ == "json":
-                file.write(self.to_json())
-            else:
-                raise ValueError(
-                    f"Unsupported file extension: {type_}"
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-        logger.info("Successfully saved {} to {}", self.__class__.__name__, path)
-        return str(path)
-    @classmethod
-    def load_file(cls, path: Union[str, Path]):
-        """
-        Load a model from a file in either YAML or JSON format.
-        :param path: Path to the file.
-        :return: An instance of the model.
-        """
-        logger.debug("Loading from file... {}", path)
-        if isinstance(path, str):
-            path = Path(path)
-        if not path.exists():
-            raise FileNotFoundError(f"File not found: {path}")
-        if not path.is_file():
-            raise ValueError(f"Path is not a file: {path}")
-        extension = path.suffix[1:].lower()
-        with path.open() as file:
-            data = file.read()
-            if extension == "yaml":
-                obj = cls.from_yaml(data)
-            elif extension == "json":
-                obj = cls.from_json(data)
-            else:
-                raise ValueError(
-                    f"Unsupported file extension: {extension}"
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-        return obj

guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

Potentially problematic release.

guidellm 0.1.0py3-none-any.whl → 0.2.0rc20250418py3-none-any.whl