PyPI - guidellm - Versions diffs - 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl - Mend

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +452 -252
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +110 -0
guidellm/backends/openai.py +355 -0
guidellm/backends/response_handlers.py +455 -0
guidellm/benchmark/__init__.py +53 -39
guidellm/benchmark/benchmarker.py +150 -317
guidellm/benchmark/entrypoints.py +467 -128
guidellm/benchmark/output.py +519 -771
guidellm/benchmark/profile.py +580 -280
guidellm/benchmark/progress.py +568 -549
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas.py +2086 -0
guidellm/data/__init__.py +28 -4
guidellm/data/collators.py +16 -0
guidellm/data/deserializers/__init__.py +53 -0
guidellm/data/deserializers/deserializer.py +144 -0
guidellm/data/deserializers/file.py +222 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +348 -0
guidellm/data/loaders.py +149 -0
guidellm/data/preprocessors/__init__.py +25 -0
guidellm/data/preprocessors/formatters.py +404 -0
guidellm/data/preprocessors/mappers.py +198 -0
guidellm/data/preprocessors/preprocessor.py +31 -0
guidellm/data/processor.py +31 -0
guidellm/data/schemas.py +13 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +215 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +168 -0
guidellm/mock_server/utils.py +302 -0
guidellm/preprocess/dataset.py +23 -26
guidellm/presentation/builder.py +2 -2
guidellm/presentation/data_models.py +25 -21
guidellm/presentation/injector.py +2 -3
guidellm/scheduler/__init__.py +65 -26
guidellm/scheduler/constraints.py +1035 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +140 -368
guidellm/scheduler/schemas.py +272 -0
guidellm/scheduler/strategies.py +519 -0
guidellm/scheduler/worker.py +391 -420
guidellm/scheduler/worker_group.py +707 -0
guidellm/schemas/__init__.py +31 -0
guidellm/schemas/info.py +159 -0
guidellm/schemas/request.py +226 -0
guidellm/schemas/response.py +119 -0
guidellm/schemas/stats.py +228 -0
guidellm/{config.py → settings.py} +32 -21
guidellm/utils/__init__.py +95 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +71 -2
guidellm/utils/console.py +183 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +134 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/pydantic_utils.py +411 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/{objects → utils}/statistics.py +341 -247
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +1 -1
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
guidellm-0.4.0a169.dist-info/RECORD +95 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -705
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.4.0a21.dist-info/RECORD +0 -62
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0

guidellm/{objects → utils}/statistics.py RENAMED Viewed

@@ -1,12 +1,24 @@
+"""
+Statistical analysis utilities for distribution calculations and running metrics.
+Provides comprehensive statistical computation tools for analyzing numerical
+distributions, percentiles, and streaming data. Includes specialized support for
+request timing analysis, concurrency measurement, and rate calculations. Integrates
+with Pydantic for serializable statistical models and supports both weighted and
+unweighted distributions with cumulative distribution function (CDF) generation.
+"""
+from __future__ import annotations
 import math
 import time as timer
 from collections import defaultdict
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 import numpy as np
 from pydantic import Field, computed_field
-from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
+from guidellm.utils.pydantic_utils import StandardBaseModel, StatusBreakdown
 __all__ = [
     "DistributionSummary",
@@ -19,7 +31,11 @@ __all__ = [
 class Percentiles(StandardBaseModel):
     """
-    A pydantic model representing the standard percentiles of a distribution.
+    Standard percentiles model for statistical distribution analysis.
+    Provides complete percentile coverage from 0.1th to 99.9th percentiles for
+    statistical distribution characterization. Used as a component within
+    DistributionSummary to provide detailed distribution shape analysis.
     """
     p001: float = Field(
@@ -59,8 +75,25 @@ class Percentiles(StandardBaseModel):
 class DistributionSummary(StandardBaseModel):
     """
-    A pydantic model representing a statistical summary for a given
-    distribution of numerical values.
+    Comprehensive statistical summary for numerical value distributions.
+    Calculates and stores complete statistical metrics including central tendency,
+    dispersion, extremes, and percentiles for any numerical distribution. Supports
+    both weighted and unweighted data with optional cumulative distribution function
+    generation. Primary statistical analysis tool for request timing, performance
+    metrics, and benchmark result characterization.
+    Example:
+    ::
+        # Create from simple values
+        summary = DistributionSummary.from_values([1.0, 2.0, 3.0, 4.0, 5.0])
+        print(f"Mean: {summary.mean}, P95: {summary.percentiles.p95}")
+        # Create from request timings for concurrency analysis
+        requests = [(0.0, 1.0), (0.5, 2.0), (1.0, 2.5)]
+        concurrency = DistributionSummary.from_request_times(
+            requests, "concurrency"
+        )
     """
     mean: float = Field(
@@ -93,7 +126,7 @@ class DistributionSummary(StandardBaseModel):
     percentiles: Percentiles = Field(
         description="The percentiles of the distribution.",
     )
-    cumulative_distribution_function: Optional[list[tuple[float, float]]] = Field(
+    cumulative_distribution_function: list[tuple[float, float]] | None = Field(
         description="The cumulative distribution function (CDF) of the distribution.",
         default=None,
     )
@@ -102,24 +135,21 @@ class DistributionSummary(StandardBaseModel):
     def from_distribution_function(
         distribution: list[tuple[float, float]],
         include_cdf: bool = False,
-    ) -> "DistributionSummary":
-        """
-        Create a statistical summary for a given distribution of weighted numerical
-        values or a probability distribution function (PDF).
-        1.  If the distribution is a PDF, it is expected to be a list of tuples
-            where each tuple contains (value, probability). The sum of the
-            probabilities should be 1. If it is not, it will be normalized.
-        2.  If the distribution is a values distribution function, it is expected
-            to be a list of tuples where each tuple contains (value, weight).
-            The weights are normalized to a probability distribution function.
-        :param distribution: A list of tuples representing the distribution.
-            Each tuple contains (value, weight) or (value, probability).
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output DistributionSummary.
-        :return: An instance of DistributionSummary with calculated values.
-        """
-        values, weights = zip(*distribution) if distribution else ([], [])
+    ) -> DistributionSummary:
+        """
+        Create statistical summary from weighted distribution or probability function.
+        Converts weighted numerical values or probability distribution function (PDF)
+        into comprehensive statistical summary. Normalizes weights to probabilities
+        and calculates all statistical metrics including percentiles.
+        :param distribution: List of (value, weight) or (value, probability) tuples
+            representing the distribution
+        :param include_cdf: Whether to include cumulative distribution function
+            in the output
+        :return: DistributionSummary instance with calculated statistical metrics
+        """
+        values, weights = zip(*distribution, strict=True) if distribution else ([], [])
         values = np.array(values)  # type: ignore[assignment]
         weights = np.array(weights)  # type: ignore[assignment]
@@ -190,20 +220,23 @@ class DistributionSummary(StandardBaseModel):
     @staticmethod
     def from_values(
         values: list[float],
-        weights: Optional[list[float]] = None,
+        weights: list[float] | None = None,
         include_cdf: bool = False,
-    ) -> "DistributionSummary":
+    ) -> DistributionSummary:
         """
-        Create a statistical summary for a given distribution of numerical values.
-        This is a wrapper around from_distribution_function to handle the optional case
-        of including weights for the values. If weights are not provided, they are
-        automatically set to 1.0 for each value, so each value is equally weighted.
-        :param values: A list of numerical values representing the distribution.
-        :param weights: A list of weights for each value in the distribution.
-            If not provided, all values are equally weighted.
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output DistributionSummary.
+        Create statistical summary from numerical values with optional weights.
+        Wrapper around from_distribution_function for simple value lists. If weights
+        are not provided, all values are equally weighted. Enables statistical
+        analysis of any numerical dataset.
+        :param values: Numerical values representing the distribution
+        :param weights: Optional weights for each value. If not provided, all values
+            are equally weighted
+        :param include_cdf: Whether to include cumulative distribution function in
+            the output DistributionSummary
+        :return: DistributionSummary instance with calculated statistical metrics
+        :raises ValueError: If values and weights lists have different lengths
         """
         if weights is None:
             weights = [1.0] * len(values)
@@ -214,7 +247,7 @@ class DistributionSummary(StandardBaseModel):
             )
         return DistributionSummary.from_distribution_function(
-            distribution=list(zip(values, weights)),
+            distribution=list(zip(values, weights, strict=True)),
             include_cdf=include_cdf,
         )
@@ -222,122 +255,154 @@ class DistributionSummary(StandardBaseModel):
     def from_request_times(
         requests: list[tuple[float, float]],
         distribution_type: Literal["concurrency", "rate"],
+        weights: list[float] | None = None,
         include_cdf: bool = False,
         epsilon: float = 1e-6,
-    ) -> "DistributionSummary":
-        """
-        Create a statistical summary for a given distribution of request times.
-        Specifically, this is used to measure concurrency or rate of requests
-        given an input list containing the start and end time of each request.
-        This will first convert the request times into a distribution function
-        and then calculate the statistics with from_distribution_function.
-        :param requests: A list of tuples representing the start and end times of
-            each request. Example: [(start_1, end_1), (start_2, end_2), ...]
-        :param distribution_type: The type of distribution to calculate.
-            Either "concurrency" or "rate".
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output DistributionSummary.
-        :param epsilon: The epsilon value for merging close events.
-        :return: An instance of DistributionSummary with calculated values.
+    ) -> DistributionSummary:
         """
-        if distribution_type == "concurrency":
-            # convert to delta changes based on when requests were running
-            events = [(start, 1) for start, _ in requests] + [
-                (end, -1) for _, end in requests
-            ]
-        elif distribution_type == "rate":
-            # convert to events for when requests finished
-            global_start = min(start for start, _ in requests) if requests else 0
-            events = [(global_start, 1)] + [(end, 1) for _, end in requests]
-        else:
-            raise ValueError(
-                f"Invalid distribution_type '{distribution_type}'. "
-                "Must be 'concurrency' or 'rate'."
-            )
+        Create statistical summary from request timing data.
+        Analyzes request start/end times to calculate concurrency or rate
+        distributions. Converts timing events into statistical metrics for
+        performance analysis and load characterization.
+        :param requests: List of (start_time, end_time) tuples for each request
+        :param distribution_type: Type of analysis - "concurrency" for simultaneous
+            requests or "rate" for completion rates
+        :param include_cdf: Whether to include cumulative distribution function
+        :param epsilon: Threshold for merging close timing events
+        :return: DistributionSummary with timing-based statistical metrics
+        :raises ValueError: If distribution_type is not "concurrency" or "rate"
+        """
+        if not weights:
+            weights = [1.0] * len(requests)
-        # combine any events that are very close together
-        flattened_events: list[tuple[float, float]] = []
-        for time, val in sorted(events):
-            last_time, last_val = (
-                flattened_events[-1] if flattened_events else (None, None)
+        if len(requests) != len(weights):
+            raise ValueError(
+                "The length of requests and weights must be the same.",
             )
-            if (
-                last_time is not None
-                and last_val is not None
-                and abs(last_time - time) <= epsilon
-            ):
-                flattened_events[-1] = (last_time, last_val + val)
-            else:
-                flattened_events.append((time, val))
-        if distribution_type == "concurrency":
-            # convert to the events over time measuring concurrency changes
-            events_over_time: list[tuple[float, float]] = []
-            active = 0
-            for time, delta in flattened_events:
-                active += delta  # type: ignore [assignment]
-                events_over_time.append((time, active))
+        # First convert to timing events based on type
+        events = DistributionSummary._convert_to_timing_events(
+            requests, distribution_type, weights
+        )
-            flattened_events = events_over_time
+        # Combine any events within epsilon of each other for stability
+        flattened_events = DistributionSummary._combine_events(events, epsilon)
-        # convert to value distribution function
+        # Convert events to value distribution function
         distribution: dict[float, float] = defaultdict(float)
-        for ind in range(len(flattened_events) - 1):
-            start_time, value = flattened_events[ind]
-            end_time, _ = flattened_events[ind + 1]
-            duration = end_time - start_time
-            if distribution_type == "concurrency":
-                # weight the concurrency value by the duration
+        if distribution_type == "concurrency":
+            # For concurrency, convert to active concurrency over time
+            active = 0.0
+            for ind in range(len(flattened_events)):
+                time, change = flattened_events[ind]
+                active += change
+                flattened_events[ind] = (time, active)
+            # Then convert to distribution by weighting each concurrency
+            # by duration to next event (last event is 0 concurrency)
+            for ind in range(len(flattened_events) - 1):
+                time, value = flattened_events[ind]
+                next_time = flattened_events[ind + 1][0]
+                duration = next_time - time
                 distribution[value] += duration
-            elif distribution_type == "rate":
-                # weight the rate value by the duration
-                rate = value / duration
+        elif distribution_type == "rate":
+            # For rate, convert to distribution by converting each value
+            # to a rate (value/duration) weighted by duration from previous
+            # (first event is 0 rate)
+            for ind in range(1, len(flattened_events)):
+                time, value = flattened_events[ind]
+                prev_time = flattened_events[ind - 1][0]
+                duration = time - prev_time
+                rate = value / duration if duration > 0 else 0.0
                 distribution[rate] += duration
-        distribution_list: list[tuple[float, float]] = sorted(distribution.items())
+        else:
+            raise ValueError(
+                f"Invalid distribution_type '{distribution_type}'. "
+                "Must be 'concurrency' or 'rate'."
+            )
         return DistributionSummary.from_distribution_function(
-            distribution=distribution_list,
+            distribution=sorted(distribution.items()),
             include_cdf=include_cdf,
         )
+    @staticmethod
+    def _convert_to_timing_events(
+        requests: list[tuple[float, float]],
+        distribution_type: Literal["concurrency", "rate"],
+        weights: list[float],
+    ) -> list[tuple[float, float]]:
+        events: list[tuple[float, float]] = []
+        if distribution_type == "concurrency":
+            # For concurrency, each request adds to concurrency at start
+            # and subtracts at end
+            for (start, end), weight in zip(requests, weights, strict=False):
+                events.append((start, weight))
+                events.append((end, -1 * weight))
+        elif distribution_type == "rate":
+            # For rate, each request is added at the end time only
+            global_start = min(start for start, _ in requests) if requests else 0.0
+            events.append((global_start, 0.0))
+            for (_, end), weight in zip(requests, weights, strict=False):
+                events.append((end, weight))
+        else:
+            raise ValueError(
+                f"Invalid distribution_type '{distribution_type}'. "
+                "Must be 'concurrency' or 'rate'."
+            )
+        return events
+    @staticmethod
+    def _combine_events(
+        events: list[tuple[float, float]],
+        epsilon: float,
+    ) -> list[tuple[float, float]]:
+        sorted_events = sorted(events, key=lambda event: event[0])
+        flattened_events: list[tuple[float, float]] = (
+            [sorted_events.pop(0)] if sorted_events else []
+        )
+        last_time = flattened_events[0][0] if flattened_events else 0.0
+        for time, val in sorted_events:
+            if abs(time - last_time) <= epsilon:
+                last_val = flattened_events[-1][1]
+                flattened_events[-1] = (last_time, last_val + val)
+            else:
+                last_time = time
+                flattened_events.append((time, val))
+        return flattened_events
     @staticmethod
     def from_iterable_request_times(
         requests: list[tuple[float, float]],
         first_iter_times: list[float],
         iter_counts: list[int],
-        first_iter_counts: Optional[list[int]] = None,
+        first_iter_counts: list[int] | None = None,
         include_cdf: bool = False,
         epsilon: float = 1e-6,
-    ) -> "DistributionSummary":
-        """
-        Create a statistical summary for a given distribution of request times
-        for a request with iterable responses between the start and end.
-        For example, this is used to measure auto regressive requests where
-        a request is started and at some later point, iterative responses are
-        received. This will convert the request times and iterable values into
-        a distribution function and then calculate the statistics with
-        from_distribution_function.
-        :param requests: A list of tuples representing the start and end times of
-            each request. Example: [(start_1, end_1), (start_2, end_2), ...]
-        :param first_iter_times: A list of times when the first iteration of
-            each request was received. Must be the same length as requests.
-        :param iter_counts: A list of the total number of iterations for each
-            request that occurred starting at the first iteration and ending
-            at the request end time. Must be the same length as requests.
-        :param first_iter_counts: A list of the number of iterations to log
-            for the first iteration of each request. For example, when calculating
-            total number of tokens processed, this is set to the prompt tokens number.
-            If not provided, defaults to 1 for each request.
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output DistributionSummary.
-        :param epsilon: The epsilon value for merging close events.
-        :return: An instance of DistributionSummary with calculated values.
+    ) -> DistributionSummary:
+        """
+        Create statistical summary from iterative request timing data.
+        Analyzes autoregressive or streaming requests with multiple iterations
+        between start and end times. Calculates rate distributions based on
+        iteration timing patterns for LLM token generation analysis.
+        :param requests: List of (start_time, end_time) tuples for each request
+        :param first_iter_times: Times when first iteration was received for
+            each request
+        :param iter_counts: Total iteration counts for each request from first
+            iteration to end
+        :param first_iter_counts: Iteration counts for first iteration (defaults
+            to 1 for each request)
+        :param include_cdf: Whether to include cumulative distribution function
+        :param epsilon: Threshold for merging close timing events
+        :return: DistributionSummary with iteration rate statistical metrics
+        :raises ValueError: If input lists have mismatched lengths
         """
         if first_iter_counts is None:
@@ -363,7 +428,7 @@ class DistributionSummary(StandardBaseModel):
         events[global_end] = 0
         for (_, end), first_iter, first_iter_count, total_count in zip(
-            requests, first_iter_times, first_iter_counts, iter_counts
+            requests, first_iter_times, first_iter_counts, iter_counts, strict=True
         ):
             events[first_iter] += first_iter_count
@@ -416,36 +481,45 @@ class StatusDistributionSummary(
     ]
 ):
     """
-    A pydantic model representing a statistical summary for a given
-    distribution of numerical values grouped by status.
-    Specifically used to represent the total, successful, incomplete,
-    and errored values for a benchmark or other statistical summary.
+    Status-grouped statistical summary for request processing analysis.
+    Provides comprehensive statistical analysis grouped by request status (total,
+    successful, incomplete, errored). Enables performance analysis across different
+    request outcomes for benchmarking and monitoring applications. Each status
+    category maintains complete DistributionSummary metrics.
+    Example:
+    ::
+        status_summary = StatusDistributionSummary.from_values(
+            value_types=["successful", "error", "successful"],
+            values=[1.5, 10.0, 2.1]
+        )
+        print(f"Success mean: {status_summary.successful.mean}")
+        print(f"Error rate: {status_summary.errored.count}")
     """
     @staticmethod
     def from_values(
         value_types: list[Literal["successful", "incomplete", "error"]],
         values: list[float],
-        weights: Optional[list[float]] = None,
+        weights: list[float] | None = None,
         include_cdf: bool = False,
-    ) -> "StatusDistributionSummary":
-        """
-        Create a statistical summary by status for a given distribution of numerical
-        values. This is used to measure the distribution of values for different
-        statuses (e.g., successful, incomplete, error) and calculate the statistics
-        for each status. Weights are optional to weight the probability distribution
-        for each value by. If not provided, all values are equally weighted.
-        :param value_types: A list of status types for each value in the distribution.
-            Must be one of 'successful', 'incomplete', or 'error'.
-        :param values: A list of numerical values representing the distribution.
-            Must be the same length as value_types.
-        :param weights: A list of weights for each value in the distribution.
-            If not provided, all values are equally weighted (set to 1).
-            Must be the same length as value_types.
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output StatusDistributionSummary.
-        :return: An instance of StatusDistributionSummary with calculated values.
+    ) -> StatusDistributionSummary:
+        """
+        Create status-grouped statistical summary from values and status types.
+        Groups numerical values by request status and calculates complete
+        statistical summaries for each category. Enables performance analysis
+        across different request outcomes.
+        :param value_types: Status type for each value ("successful", "incomplete",
+            or "error")
+        :param values: Numerical values representing the distribution
+        :param weights: Optional weights for each value (defaults to equal weighting)
+        :param include_cdf: Whether to include cumulative distribution functions
+        :return: StatusDistributionSummary with statistics grouped by status
+        :raises ValueError: If input lists have mismatched lengths or invalid
+            status types
         """
         if any(
             type_ not in {"successful", "incomplete", "error"} for type_ in value_types
@@ -464,36 +538,36 @@ class StatusDistributionSummary(
             )
         _, successful_values, successful_weights = (
-            zip(*successful)
+            zip(*successful, strict=True)
             if (
                 successful := list(
                     filter(
                         lambda val: val[0] == "successful",
-                        zip(value_types, values, weights),
+                        zip(value_types, values, weights, strict=True),
                     )
                 )
             )
             else ([], [], [])
         )
         _, incomplete_values, incomplete_weights = (
-            zip(*incomplete)
+            zip(*incomplete, strict=True)
             if (
                 incomplete := list(
                     filter(
                         lambda val: val[0] == "incomplete",
-                        zip(value_types, values, weights),
+                        zip(value_types, values, weights, strict=True),
                     )
                 )
             )
             else ([], [], [])
         )
         _, errored_values, errored_weights = (
-            zip(*errored)
+            zip(*errored, strict=True)
             if (
                 errored := list(
                     filter(
                         lambda val: val[0] == "error",
-                        zip(value_types, values, weights),
+                        zip(value_types, values, weights, strict=True),
                     )
                 )
             )
@@ -528,27 +602,25 @@ class StatusDistributionSummary(
         request_types: list[Literal["successful", "incomplete", "error"]],
         requests: list[tuple[float, float]],
         distribution_type: Literal["concurrency", "rate"],
+        weights: list[float] | None = None,
         include_cdf: bool = False,
         epsilon: float = 1e-6,
-    ) -> "StatusDistributionSummary":
-        """
-        Create a statistical summary by status for given distribution of request times.
-        This is used to measure the distribution of request times for different statuses
-        (e.g., successful, incomplete, error) for concurrency and rates.
-        This will call into DistributionSummary.from_request_times to calculate
-        the statistics for each status.
-        :param request_types: List of status types for each request in the distribution.
-            Must be one of 'successful', 'incomplete', or 'error'.
-        :param requests: A list of tuples representing the start and end times of
-            each request. Example: [(start_1, end_1), (start_2, end_2), ...].
-            Must be the same length as request_types.
-        :param distribution_type: The type of distribution to calculate.
-            Either "concurrency" or "rate".
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output StatusDistributionSummary.
-        :param epsilon: The epsilon value for merging close events.
-        :return: An instance of StatusDistributionSummary with calculated values.
+    ) -> StatusDistributionSummary:
+        """
+        Create status-grouped statistical summary from request timing data.
+        Analyzes request timings grouped by status to calculate concurrency or
+        rate distributions for each outcome category. Enables comparative
+        performance analysis across successful, incomplete, and errored requests.
+        :param request_types: Status type for each request ("successful",
+            "incomplete", or "error")
+        :param requests: List of (start_time, end_time) tuples for each request
+        :param distribution_type: Analysis type - "concurrency" or "rate"
+        :param include_cdf: Whether to include cumulative distribution functions
+        :param epsilon: Threshold for merging close timing events
+        :return: StatusDistributionSummary with timing statistics by status
+        :raises ValueError: If input lists have mismatched lengths or invalid types
         """
         if distribution_type not in {"concurrency", "rate"}:
             raise ValueError(
@@ -571,65 +643,78 @@ class StatusDistributionSummary(
                 f"Got {len(request_types)} and {len(requests)} instead.",
             )
-        _, successful_requests = (
-            zip(*successful)
+        if weights is None:
+            weights = [1.0] * len(requests)
+        if len(requests) != len(weights):
+            raise ValueError(
+                "The length of requests and weights must be the same."
+                f"Got {len(requests)} and {len(weights)} instead.",
+            )
+        _, successful_requests, successful_weights = (
+            zip(*successful, strict=False)
             if (
                 successful := list(
                     filter(
                         lambda val: val[0] == "successful",
-                        zip(request_types, requests),
+                        zip(request_types, requests, weights, strict=False),
                     )
                 )
             )
-            else ([], [])
+            else ([], [], [])
         )
-        _, incomplete_requests = (
-            zip(*incomplete)
+        _, incomplete_requests, incomplete_weights = (
+            zip(*incomplete, strict=False)
             if (
                 incomplete := list(
                     filter(
                         lambda val: val[0] == "incomplete",
-                        zip(request_types, requests),
+                        zip(request_types, requests, weights, strict=False),
                     )
                 )
             )
-            else ([], [])
+            else ([], [], [])
         )
-        _, errored_requests = (
-            zip(*errored)
+        _, errored_requests, errored_weights = (
+            zip(*errored, strict=False)
             if (
                 errored := list(
                     filter(
                         lambda val: val[0] == "error",
-                        zip(request_types, requests),
+                        zip(request_types, requests, weights, strict=False),
                     )
                 )
             )
-            else ([], [])
+            else ([], [], [])
         )
         return StatusDistributionSummary(
             total=DistributionSummary.from_request_times(
                 requests,
                 distribution_type=distribution_type,
+                weights=weights,
                 include_cdf=include_cdf,
                 epsilon=epsilon,
             ),
             successful=DistributionSummary.from_request_times(
                 successful_requests,  # type: ignore[arg-type]
                 distribution_type=distribution_type,
+                weights=successful_weights,  # type: ignore[arg-type]
                 include_cdf=include_cdf,
                 epsilon=epsilon,
             ),
             incomplete=DistributionSummary.from_request_times(
                 incomplete_requests,  # type: ignore[arg-type]
                 distribution_type=distribution_type,
+                weights=incomplete_weights,  # type: ignore[arg-type]
                 include_cdf=include_cdf,
                 epsilon=epsilon,
             ),
             errored=DistributionSummary.from_request_times(
                 errored_requests,  # type: ignore[arg-type]
                 distribution_type=distribution_type,
+                weights=errored_weights,  # type: ignore[arg-type]
                 include_cdf=include_cdf,
                 epsilon=epsilon,
             ),
@@ -640,38 +725,31 @@ class StatusDistributionSummary(
         request_types: list[Literal["successful", "incomplete", "error"]],
         requests: list[tuple[float, float]],
         first_iter_times: list[float],
-        iter_counts: Optional[list[int]] = None,
-        first_iter_counts: Optional[list[int]] = None,
+        iter_counts: list[int] | None = None,
+        first_iter_counts: list[int] | None = None,
         include_cdf: bool = False,
         epsilon: float = 1e-6,
-    ) -> "StatusDistributionSummary":
-        """
-        Create a statistical summary by status for given distribution of request times
-        for a request with iterable responses between the start and end.
-        For example, this is used to measure auto regressive requests where
-        a request is started and at some later point, iterative responses are
-        received. This will call into DistributionSummary.from_iterable_request_times
-        to calculate the statistics for each status.
-        :param request_types: List of status types for each request in the distribution.
-            Must be one of 'successful', 'incomplete', or 'error'.
-        :param requests: A list of tuples representing the start and end times of
-            each request. Example: [(start_1, end_1), (start_2, end_2), ...].
-            Must be the same length as request_types.
-        :param first_iter_times: A list of times when the first iteration of
-            each request was received. Must be the same length as requests.
-        :param iter_counts: A list of the total number of iterations for each
-            request that occurred starting at the first iteration and ending
-            at the request end time. Must be the same length as requests.
-            If not provided, defaults to 1 for each request.
-        :param first_iter_counts: A list of the number of iterations to log
-            for the first iteration of each request. For example, when calculating
-            total number of tokens processed, this is set to the prompt tokens number.
-            If not provided, defaults to 1 for each request.
-        :param include_cdf: Whether to include the calculated cumulative distribution
-            function (CDF) in the output StatusDistributionSummary.
-        :param epsilon: The epsilon value for merging close events.
-        :return: An instance of StatusDistributionSummary with calculated values.
+    ) -> StatusDistributionSummary:
+        """
+        Create status-grouped statistical summary from iterative request timing data.
+        Analyzes autoregressive request timings grouped by status to calculate
+        iteration rate distributions for each outcome category. Enables comparative
+        analysis of token generation or streaming response performance across
+        different request statuses.
+        :param request_types: Status type for each request ("successful",
+            "incomplete", or "error")
+        :param requests: List of (start_time, end_time) tuples for each request
+        :param first_iter_times: Times when first iteration was received for
+            each request
+        :param iter_counts: Total iteration counts for each request (defaults to 1)
+        :param first_iter_counts: Iteration counts for first iteration (defaults
+            to 1)
+        :param include_cdf: Whether to include cumulative distribution functions
+        :param epsilon: Threshold for merging close timing events
+        :return: StatusDistributionSummary with iteration statistics by status
+        :raises ValueError: If input lists have mismatched lengths or invalid types
         """
         if any(
             type_ not in {"successful", "incomplete", "error"}
@@ -709,7 +787,7 @@ class StatusDistributionSummary(
             successful_iter_counts,
             successful_first_iter_counts,
         ) = (
-            zip(*successful)
+            zip(*successful, strict=True)
             if (
                 successful := list(
                     filter(
@@ -720,6 +798,7 @@ class StatusDistributionSummary(
                             first_iter_times,
                             iter_counts,
                             first_iter_counts,
+                            strict=True,
                         ),
                     )
                 )
@@ -733,7 +812,7 @@ class StatusDistributionSummary(
             incomplete_iter_counts,
             incomplete_first_iter_counts,
         ) = (
-            zip(*incomplete)
+            zip(*incomplete, strict=True)
             if (
                 incomplete := list(
                     filter(
@@ -744,6 +823,7 @@ class StatusDistributionSummary(
                             first_iter_times,
                             iter_counts,
                             first_iter_counts,
+                            strict=True,
                         ),
                     )
                 )
@@ -757,7 +837,7 @@ class StatusDistributionSummary(
             errored_iter_counts,
             errored_first_iter_counts,
         ) = (
-            zip(*errored)
+            zip(*errored, strict=True)
             if (
                 errored := list(
                     filter(
@@ -768,6 +848,7 @@ class StatusDistributionSummary(
                             first_iter_times,
                             iter_counts,
                             first_iter_counts,
+                            strict=True,
                         ),
                     )
                 )
@@ -813,13 +894,19 @@ class StatusDistributionSummary(
 class RunningStats(StandardBaseModel):
     """
-    Create a running statistics object to track the mean, rate, and other
-    statistics of a stream of values.
-    1.  The start time is set to the time the object is created.
-    2.  The count is set to 0.
-    3.  The total is set to 0.
-    4.  The last value is set to 0.
-    5.  The mean is calculated as the total / count.
+    Real-time statistics tracking for streaming numerical data.
+    Maintains mean, rate, and cumulative statistics for continuous data streams
+    without storing individual values. Optimized for memory efficiency in
+    long-running monitoring applications. Supports arithmetic operators for
+    convenient value addition and provides computed properties for derived metrics.
+    Example:
+    ::
+        stats = RunningStats()
+        stats += 10.5  # Add value using operator
+        stats.update(20.0, count=3)  # Add value with custom count
+        print(f"Mean: {stats.mean}, Rate: {stats.rate}")
     """
     start_time: float = Field(
@@ -867,12 +954,13 @@ class RunningStats(StandardBaseModel):
     def __add__(self, value: Any) -> float:
         """
-        Enable the use of the + operator to add a value to the running statistics.
+        Add value using + operator and return current mean.
-        :param value: The value to add to the running statistics.
-        :return: The mean of the running statistics.
+        :param value: Numerical value to add to the running statistics
+        :return: Updated mean after adding the value
+        :raises ValueError: If value is not numeric (int or float)
         """
-        if not isinstance(value, (int, float)):
+        if not isinstance(value, int | float):
             raise ValueError(
                 f"Value must be an int or float, got {type(value)} instead.",
             )
@@ -881,14 +969,15 @@ class RunningStats(StandardBaseModel):
         return self.mean
-    def __iadd__(self, value: Any) -> "RunningStats":
+    def __iadd__(self, value: Any) -> RunningStats:
         """
-        Enable the use of the += operator to add a value to the running statistics.
+        Add value using += operator and return updated instance.
-        :param value: The value to add to the running statistics.
-        :return: The running statistics object.
+        :param value: Numerical value to add to the running statistics
+        :return: Self reference for method chaining
+        :raises ValueError: If value is not numeric (int or float)
         """
-        if not isinstance(value, (int, float)):
+        if not isinstance(value, int | float):
             raise ValueError(
                 f"Value must be an int or float, got {type(value)} instead.",
             )
@@ -899,11 +988,10 @@ class RunningStats(StandardBaseModel):
     def update(self, value: float, count: int = 1) -> None:
         """
-        Update the running statistics with a new value.
+        Update running statistics with new value and count.
-        :param value: The new value to add to the running statistics.
-        :param count: The number of times to 'count' for the value.
-            If not provided, defaults to 1.
+        :param value: Numerical value to add to the running statistics
+        :param count: Number of occurrences to count for this value (defaults to 1)
         """
         self.count += count
         self.total += value
@@ -912,11 +1000,17 @@ class RunningStats(StandardBaseModel):
 class TimeRunningStats(RunningStats):
     """
-    Create a running statistics object to track the mean, rate, and other
-    statistics of a stream of time values. This is used to track time values
-    in milliseconds and seconds.
+    Specialized running statistics for time-based measurements.
+    Extends RunningStats with time-specific computed properties for millisecond
+    conversions. Designed for tracking latency, duration, and timing metrics in
+    performance monitoring applications.
-    Adds time specific computed_fields such as measurements in milliseconds and seconds.
+    Example:
+    ::
+        time_stats = TimeRunningStats()
+        time_stats += 0.125  # Add 125ms in seconds
+        print(f"Mean: {time_stats.mean_ms}ms, Total: {time_stats.total_ms}ms")
     """
     @computed_field  # type: ignore[misc]

guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

Potentially problematic release.

guidellm 0.4.0a21py3-none-any.whl → 0.4.0a169py3-none-any.whl