guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,24 @@
1
+ """
2
+ Statistical analysis utilities for distribution calculations and running metrics.
3
+
4
+ Provides comprehensive statistical computation tools for analyzing numerical
5
+ distributions, percentiles, and streaming data. Includes specialized support for
6
+ request timing analysis, concurrency measurement, and rate calculations. Integrates
7
+ with Pydantic for serializable statistical models and supports both weighted and
8
+ unweighted distributions with cumulative distribution function (CDF) generation.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
1
13
  import math
2
14
  import time as timer
3
15
  from collections import defaultdict
4
- from typing import Any, Literal, Optional
16
+ from typing import Any, Literal
5
17
 
6
18
  import numpy as np
7
19
  from pydantic import Field, computed_field
8
20
 
9
- from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
21
+ from guidellm.utils.pydantic_utils import StandardBaseModel, StatusBreakdown
10
22
 
11
23
  __all__ = [
12
24
  "DistributionSummary",
@@ -19,7 +31,11 @@ __all__ = [
19
31
 
20
32
  class Percentiles(StandardBaseModel):
21
33
  """
22
- A pydantic model representing the standard percentiles of a distribution.
34
+ Standard percentiles model for statistical distribution analysis.
35
+
36
+ Provides complete percentile coverage from 0.1th to 99.9th percentiles for
37
+ statistical distribution characterization. Used as a component within
38
+ DistributionSummary to provide detailed distribution shape analysis.
23
39
  """
24
40
 
25
41
  p001: float = Field(
@@ -59,8 +75,25 @@ class Percentiles(StandardBaseModel):
59
75
 
60
76
  class DistributionSummary(StandardBaseModel):
61
77
  """
62
- A pydantic model representing a statistical summary for a given
63
- distribution of numerical values.
78
+ Comprehensive statistical summary for numerical value distributions.
79
+
80
+ Calculates and stores complete statistical metrics including central tendency,
81
+ dispersion, extremes, and percentiles for any numerical distribution. Supports
82
+ both weighted and unweighted data with optional cumulative distribution function
83
+ generation. Primary statistical analysis tool for request timing, performance
84
+ metrics, and benchmark result characterization.
85
+
86
+ Example:
87
+ ::
88
+ # Create from simple values
89
+ summary = DistributionSummary.from_values([1.0, 2.0, 3.0, 4.0, 5.0])
90
+ print(f"Mean: {summary.mean}, P95: {summary.percentiles.p95}")
91
+
92
+ # Create from request timings for concurrency analysis
93
+ requests = [(0.0, 1.0), (0.5, 2.0), (1.0, 2.5)]
94
+ concurrency = DistributionSummary.from_request_times(
95
+ requests, "concurrency"
96
+ )
64
97
  """
65
98
 
66
99
  mean: float = Field(
@@ -93,7 +126,7 @@ class DistributionSummary(StandardBaseModel):
93
126
  percentiles: Percentiles = Field(
94
127
  description="The percentiles of the distribution.",
95
128
  )
96
- cumulative_distribution_function: Optional[list[tuple[float, float]]] = Field(
129
+ cumulative_distribution_function: list[tuple[float, float]] | None = Field(
97
130
  description="The cumulative distribution function (CDF) of the distribution.",
98
131
  default=None,
99
132
  )
@@ -102,24 +135,21 @@ class DistributionSummary(StandardBaseModel):
102
135
  def from_distribution_function(
103
136
  distribution: list[tuple[float, float]],
104
137
  include_cdf: bool = False,
105
- ) -> "DistributionSummary":
106
- """
107
- Create a statistical summary for a given distribution of weighted numerical
108
- values or a probability distribution function (PDF).
109
- 1. If the distribution is a PDF, it is expected to be a list of tuples
110
- where each tuple contains (value, probability). The sum of the
111
- probabilities should be 1. If it is not, it will be normalized.
112
- 2. If the distribution is a values distribution function, it is expected
113
- to be a list of tuples where each tuple contains (value, weight).
114
- The weights are normalized to a probability distribution function.
115
-
116
- :param distribution: A list of tuples representing the distribution.
117
- Each tuple contains (value, weight) or (value, probability).
118
- :param include_cdf: Whether to include the calculated cumulative distribution
119
- function (CDF) in the output DistributionSummary.
120
- :return: An instance of DistributionSummary with calculated values.
121
- """
122
- values, weights = zip(*distribution) if distribution else ([], [])
138
+ ) -> DistributionSummary:
139
+ """
140
+ Create statistical summary from weighted distribution or probability function.
141
+
142
+ Converts weighted numerical values or probability distribution function (PDF)
143
+ into comprehensive statistical summary. Normalizes weights to probabilities
144
+ and calculates all statistical metrics including percentiles.
145
+
146
+ :param distribution: List of (value, weight) or (value, probability) tuples
147
+ representing the distribution
148
+ :param include_cdf: Whether to include cumulative distribution function
149
+ in the output
150
+ :return: DistributionSummary instance with calculated statistical metrics
151
+ """
152
+ values, weights = zip(*distribution, strict=True) if distribution else ([], [])
123
153
  values = np.array(values) # type: ignore[assignment]
124
154
  weights = np.array(weights) # type: ignore[assignment]
125
155
 
@@ -190,20 +220,23 @@ class DistributionSummary(StandardBaseModel):
190
220
  @staticmethod
191
221
  def from_values(
192
222
  values: list[float],
193
- weights: Optional[list[float]] = None,
223
+ weights: list[float] | None = None,
194
224
  include_cdf: bool = False,
195
- ) -> "DistributionSummary":
225
+ ) -> DistributionSummary:
196
226
  """
197
- Create a statistical summary for a given distribution of numerical values.
198
- This is a wrapper around from_distribution_function to handle the optional case
199
- of including weights for the values. If weights are not provided, they are
200
- automatically set to 1.0 for each value, so each value is equally weighted.
201
-
202
- :param values: A list of numerical values representing the distribution.
203
- :param weights: A list of weights for each value in the distribution.
204
- If not provided, all values are equally weighted.
205
- :param include_cdf: Whether to include the calculated cumulative distribution
206
- function (CDF) in the output DistributionSummary.
227
+ Create statistical summary from numerical values with optional weights.
228
+
229
+ Wrapper around from_distribution_function for simple value lists. If weights
230
+ are not provided, all values are equally weighted. Enables statistical
231
+ analysis of any numerical dataset.
232
+
233
+ :param values: Numerical values representing the distribution
234
+ :param weights: Optional weights for each value. If not provided, all values
235
+ are equally weighted
236
+ :param include_cdf: Whether to include cumulative distribution function in
237
+ the output DistributionSummary
238
+ :return: DistributionSummary instance with calculated statistical metrics
239
+ :raises ValueError: If values and weights lists have different lengths
207
240
  """
208
241
  if weights is None:
209
242
  weights = [1.0] * len(values)
@@ -214,7 +247,7 @@ class DistributionSummary(StandardBaseModel):
214
247
  )
215
248
 
216
249
  return DistributionSummary.from_distribution_function(
217
- distribution=list(zip(values, weights)),
250
+ distribution=list(zip(values, weights, strict=True)),
218
251
  include_cdf=include_cdf,
219
252
  )
220
253
 
@@ -222,122 +255,154 @@ class DistributionSummary(StandardBaseModel):
222
255
  def from_request_times(
223
256
  requests: list[tuple[float, float]],
224
257
  distribution_type: Literal["concurrency", "rate"],
258
+ weights: list[float] | None = None,
225
259
  include_cdf: bool = False,
226
260
  epsilon: float = 1e-6,
227
- ) -> "DistributionSummary":
228
- """
229
- Create a statistical summary for a given distribution of request times.
230
- Specifically, this is used to measure concurrency or rate of requests
231
- given an input list containing the start and end time of each request.
232
- This will first convert the request times into a distribution function
233
- and then calculate the statistics with from_distribution_function.
234
-
235
- :param requests: A list of tuples representing the start and end times of
236
- each request. Example: [(start_1, end_1), (start_2, end_2), ...]
237
- :param distribution_type: The type of distribution to calculate.
238
- Either "concurrency" or "rate".
239
- :param include_cdf: Whether to include the calculated cumulative distribution
240
- function (CDF) in the output DistributionSummary.
241
- :param epsilon: The epsilon value for merging close events.
242
- :return: An instance of DistributionSummary with calculated values.
261
+ ) -> DistributionSummary:
243
262
  """
244
- if distribution_type == "concurrency":
245
- # convert to delta changes based on when requests were running
246
- events = [(start, 1) for start, _ in requests] + [
247
- (end, -1) for _, end in requests
248
- ]
249
- elif distribution_type == "rate":
250
- # convert to events for when requests finished
251
- global_start = min(start for start, _ in requests) if requests else 0
252
- events = [(global_start, 1)] + [(end, 1) for _, end in requests]
253
- else:
254
- raise ValueError(
255
- f"Invalid distribution_type '{distribution_type}'. "
256
- "Must be 'concurrency' or 'rate'."
257
- )
263
+ Create statistical summary from request timing data.
264
+
265
+ Analyzes request start/end times to calculate concurrency or rate
266
+ distributions. Converts timing events into statistical metrics for
267
+ performance analysis and load characterization.
268
+
269
+ :param requests: List of (start_time, end_time) tuples for each request
270
+ :param distribution_type: Type of analysis - "concurrency" for simultaneous
271
+ requests or "rate" for completion rates
272
+ :param include_cdf: Whether to include cumulative distribution function
273
+ :param epsilon: Threshold for merging close timing events
274
+ :return: DistributionSummary with timing-based statistical metrics
275
+ :raises ValueError: If distribution_type is not "concurrency" or "rate"
276
+ """
277
+ if not weights:
278
+ weights = [1.0] * len(requests)
258
279
 
259
- # combine any events that are very close together
260
- flattened_events: list[tuple[float, float]] = []
261
- for time, val in sorted(events):
262
- last_time, last_val = (
263
- flattened_events[-1] if flattened_events else (None, None)
280
+ if len(requests) != len(weights):
281
+ raise ValueError(
282
+ "The length of requests and weights must be the same.",
264
283
  )
265
284
 
266
- if (
267
- last_time is not None
268
- and last_val is not None
269
- and abs(last_time - time) <= epsilon
270
- ):
271
- flattened_events[-1] = (last_time, last_val + val)
272
- else:
273
- flattened_events.append((time, val))
274
-
275
- if distribution_type == "concurrency":
276
- # convert to the events over time measuring concurrency changes
277
- events_over_time: list[tuple[float, float]] = []
278
- active = 0
279
- for time, delta in flattened_events:
280
- active += delta # type: ignore [assignment]
281
- events_over_time.append((time, active))
285
+ # First convert to timing events based on type
286
+ events = DistributionSummary._convert_to_timing_events(
287
+ requests, distribution_type, weights
288
+ )
282
289
 
283
- flattened_events = events_over_time
290
+ # Combine any events within epsilon of each other for stability
291
+ flattened_events = DistributionSummary._combine_events(events, epsilon)
284
292
 
285
- # convert to value distribution function
293
+ # Convert events to value distribution function
286
294
  distribution: dict[float, float] = defaultdict(float)
287
295
 
288
- for ind in range(len(flattened_events) - 1):
289
- start_time, value = flattened_events[ind]
290
- end_time, _ = flattened_events[ind + 1]
291
- duration = end_time - start_time
292
-
293
- if distribution_type == "concurrency":
294
- # weight the concurrency value by the duration
296
+ if distribution_type == "concurrency":
297
+ # For concurrency, convert to active concurrency over time
298
+ active = 0.0
299
+ for ind in range(len(flattened_events)):
300
+ time, change = flattened_events[ind]
301
+ active += change
302
+ flattened_events[ind] = (time, active)
303
+
304
+ # Then convert to distribution by weighting each concurrency
305
+ # by duration to next event (last event is 0 concurrency)
306
+ for ind in range(len(flattened_events) - 1):
307
+ time, value = flattened_events[ind]
308
+ next_time = flattened_events[ind + 1][0]
309
+ duration = next_time - time
295
310
  distribution[value] += duration
296
- elif distribution_type == "rate":
297
- # weight the rate value by the duration
298
- rate = value / duration
311
+ elif distribution_type == "rate":
312
+ # For rate, convert to distribution by converting each value
313
+ # to a rate (value/duration) weighted by duration from previous
314
+ # (first event is 0 rate)
315
+ for ind in range(1, len(flattened_events)):
316
+ time, value = flattened_events[ind]
317
+ prev_time = flattened_events[ind - 1][0]
318
+ duration = time - prev_time
319
+ rate = value / duration if duration > 0 else 0.0
299
320
  distribution[rate] += duration
300
-
301
- distribution_list: list[tuple[float, float]] = sorted(distribution.items())
321
+ else:
322
+ raise ValueError(
323
+ f"Invalid distribution_type '{distribution_type}'. "
324
+ "Must be 'concurrency' or 'rate'."
325
+ )
302
326
 
303
327
  return DistributionSummary.from_distribution_function(
304
- distribution=distribution_list,
328
+ distribution=sorted(distribution.items()),
305
329
  include_cdf=include_cdf,
306
330
  )
307
331
 
332
+ @staticmethod
333
+ def _convert_to_timing_events(
334
+ requests: list[tuple[float, float]],
335
+ distribution_type: Literal["concurrency", "rate"],
336
+ weights: list[float],
337
+ ) -> list[tuple[float, float]]:
338
+ events: list[tuple[float, float]] = []
339
+
340
+ if distribution_type == "concurrency":
341
+ # For concurrency, each request adds to concurrency at start
342
+ # and subtracts at end
343
+ for (start, end), weight in zip(requests, weights, strict=False):
344
+ events.append((start, weight))
345
+ events.append((end, -1 * weight))
346
+ elif distribution_type == "rate":
347
+ # For rate, each request is added at the end time only
348
+ global_start = min(start for start, _ in requests) if requests else 0.0
349
+ events.append((global_start, 0.0))
350
+ for (_, end), weight in zip(requests, weights, strict=False):
351
+ events.append((end, weight))
352
+ else:
353
+ raise ValueError(
354
+ f"Invalid distribution_type '{distribution_type}'. "
355
+ "Must be 'concurrency' or 'rate'."
356
+ )
357
+ return events
358
+
359
+ @staticmethod
360
+ def _combine_events(
361
+ events: list[tuple[float, float]],
362
+ epsilon: float,
363
+ ) -> list[tuple[float, float]]:
364
+ sorted_events = sorted(events, key=lambda event: event[0])
365
+ flattened_events: list[tuple[float, float]] = (
366
+ [sorted_events.pop(0)] if sorted_events else []
367
+ )
368
+ last_time = flattened_events[0][0] if flattened_events else 0.0
369
+
370
+ for time, val in sorted_events:
371
+ if abs(time - last_time) <= epsilon:
372
+ last_val = flattened_events[-1][1]
373
+ flattened_events[-1] = (last_time, last_val + val)
374
+ else:
375
+ last_time = time
376
+ flattened_events.append((time, val))
377
+ return flattened_events
378
+
308
379
  @staticmethod
309
380
  def from_iterable_request_times(
310
381
  requests: list[tuple[float, float]],
311
382
  first_iter_times: list[float],
312
383
  iter_counts: list[int],
313
- first_iter_counts: Optional[list[int]] = None,
384
+ first_iter_counts: list[int] | None = None,
314
385
  include_cdf: bool = False,
315
386
  epsilon: float = 1e-6,
316
- ) -> "DistributionSummary":
317
- """
318
- Create a statistical summary for a given distribution of request times
319
- for a request with iterable responses between the start and end.
320
- For example, this is used to measure auto regressive requests where
321
- a request is started and at some later point, iterative responses are
322
- received. This will convert the request times and iterable values into
323
- a distribution function and then calculate the statistics with
324
- from_distribution_function.
325
-
326
- :param requests: A list of tuples representing the start and end times of
327
- each request. Example: [(start_1, end_1), (start_2, end_2), ...]
328
- :param first_iter_times: A list of times when the first iteration of
329
- each request was received. Must be the same length as requests.
330
- :param iter_counts: A list of the total number of iterations for each
331
- request that occurred starting at the first iteration and ending
332
- at the request end time. Must be the same length as requests.
333
- :param first_iter_counts: A list of the number of iterations to log
334
- for the first iteration of each request. For example, when calculating
335
- total number of tokens processed, this is set to the prompt tokens number.
336
- If not provided, defaults to 1 for each request.
337
- :param include_cdf: Whether to include the calculated cumulative distribution
338
- function (CDF) in the output DistributionSummary.
339
- :param epsilon: The epsilon value for merging close events.
340
- :return: An instance of DistributionSummary with calculated values.
387
+ ) -> DistributionSummary:
388
+ """
389
+ Create statistical summary from iterative request timing data.
390
+
391
+ Analyzes autoregressive or streaming requests with multiple iterations
392
+ between start and end times. Calculates rate distributions based on
393
+ iteration timing patterns for LLM token generation analysis.
394
+
395
+ :param requests: List of (start_time, end_time) tuples for each request
396
+ :param first_iter_times: Times when first iteration was received for
397
+ each request
398
+ :param iter_counts: Total iteration counts for each request from first
399
+ iteration to end
400
+ :param first_iter_counts: Iteration counts for first iteration (defaults
401
+ to 1 for each request)
402
+ :param include_cdf: Whether to include cumulative distribution function
403
+ :param epsilon: Threshold for merging close timing events
404
+ :return: DistributionSummary with iteration rate statistical metrics
405
+ :raises ValueError: If input lists have mismatched lengths
341
406
  """
342
407
 
343
408
  if first_iter_counts is None:
@@ -363,7 +428,7 @@ class DistributionSummary(StandardBaseModel):
363
428
  events[global_end] = 0
364
429
 
365
430
  for (_, end), first_iter, first_iter_count, total_count in zip(
366
- requests, first_iter_times, first_iter_counts, iter_counts
431
+ requests, first_iter_times, first_iter_counts, iter_counts, strict=True
367
432
  ):
368
433
  events[first_iter] += first_iter_count
369
434
 
@@ -416,36 +481,45 @@ class StatusDistributionSummary(
416
481
  ]
417
482
  ):
418
483
  """
419
- A pydantic model representing a statistical summary for a given
420
- distribution of numerical values grouped by status.
421
- Specifically used to represent the total, successful, incomplete,
422
- and errored values for a benchmark or other statistical summary.
484
+ Status-grouped statistical summary for request processing analysis.
485
+
486
+ Provides comprehensive statistical analysis grouped by request status (total,
487
+ successful, incomplete, errored). Enables performance analysis across different
488
+ request outcomes for benchmarking and monitoring applications. Each status
489
+ category maintains complete DistributionSummary metrics.
490
+
491
+ Example:
492
+ ::
493
+ status_summary = StatusDistributionSummary.from_values(
494
+ value_types=["successful", "error", "successful"],
495
+ values=[1.5, 10.0, 2.1]
496
+ )
497
+ print(f"Success mean: {status_summary.successful.mean}")
498
+ print(f"Error rate: {status_summary.errored.count}")
423
499
  """
424
500
 
425
501
  @staticmethod
426
502
  def from_values(
427
503
  value_types: list[Literal["successful", "incomplete", "error"]],
428
504
  values: list[float],
429
- weights: Optional[list[float]] = None,
505
+ weights: list[float] | None = None,
430
506
  include_cdf: bool = False,
431
- ) -> "StatusDistributionSummary":
432
- """
433
- Create a statistical summary by status for a given distribution of numerical
434
- values. This is used to measure the distribution of values for different
435
- statuses (e.g., successful, incomplete, error) and calculate the statistics
436
- for each status. Weights are optional to weight the probability distribution
437
- for each value by. If not provided, all values are equally weighted.
438
-
439
- :param value_types: A list of status types for each value in the distribution.
440
- Must be one of 'successful', 'incomplete', or 'error'.
441
- :param values: A list of numerical values representing the distribution.
442
- Must be the same length as value_types.
443
- :param weights: A list of weights for each value in the distribution.
444
- If not provided, all values are equally weighted (set to 1).
445
- Must be the same length as value_types.
446
- :param include_cdf: Whether to include the calculated cumulative distribution
447
- function (CDF) in the output StatusDistributionSummary.
448
- :return: An instance of StatusDistributionSummary with calculated values.
507
+ ) -> StatusDistributionSummary:
508
+ """
509
+ Create status-grouped statistical summary from values and status types.
510
+
511
+ Groups numerical values by request status and calculates complete
512
+ statistical summaries for each category. Enables performance analysis
513
+ across different request outcomes.
514
+
515
+ :param value_types: Status type for each value ("successful", "incomplete",
516
+ or "error")
517
+ :param values: Numerical values representing the distribution
518
+ :param weights: Optional weights for each value (defaults to equal weighting)
519
+ :param include_cdf: Whether to include cumulative distribution functions
520
+ :return: StatusDistributionSummary with statistics grouped by status
521
+ :raises ValueError: If input lists have mismatched lengths or invalid
522
+ status types
449
523
  """
450
524
  if any(
451
525
  type_ not in {"successful", "incomplete", "error"} for type_ in value_types
@@ -464,36 +538,36 @@ class StatusDistributionSummary(
464
538
  )
465
539
 
466
540
  _, successful_values, successful_weights = (
467
- zip(*successful)
541
+ zip(*successful, strict=True)
468
542
  if (
469
543
  successful := list(
470
544
  filter(
471
545
  lambda val: val[0] == "successful",
472
- zip(value_types, values, weights),
546
+ zip(value_types, values, weights, strict=True),
473
547
  )
474
548
  )
475
549
  )
476
550
  else ([], [], [])
477
551
  )
478
552
  _, incomplete_values, incomplete_weights = (
479
- zip(*incomplete)
553
+ zip(*incomplete, strict=True)
480
554
  if (
481
555
  incomplete := list(
482
556
  filter(
483
557
  lambda val: val[0] == "incomplete",
484
- zip(value_types, values, weights),
558
+ zip(value_types, values, weights, strict=True),
485
559
  )
486
560
  )
487
561
  )
488
562
  else ([], [], [])
489
563
  )
490
564
  _, errored_values, errored_weights = (
491
- zip(*errored)
565
+ zip(*errored, strict=True)
492
566
  if (
493
567
  errored := list(
494
568
  filter(
495
569
  lambda val: val[0] == "error",
496
- zip(value_types, values, weights),
570
+ zip(value_types, values, weights, strict=True),
497
571
  )
498
572
  )
499
573
  )
@@ -528,27 +602,25 @@ class StatusDistributionSummary(
528
602
  request_types: list[Literal["successful", "incomplete", "error"]],
529
603
  requests: list[tuple[float, float]],
530
604
  distribution_type: Literal["concurrency", "rate"],
605
+ weights: list[float] | None = None,
531
606
  include_cdf: bool = False,
532
607
  epsilon: float = 1e-6,
533
- ) -> "StatusDistributionSummary":
534
- """
535
- Create a statistical summary by status for given distribution of request times.
536
- This is used to measure the distribution of request times for different statuses
537
- (e.g., successful, incomplete, error) for concurrency and rates.
538
- This will call into DistributionSummary.from_request_times to calculate
539
- the statistics for each status.
540
-
541
- :param request_types: List of status types for each request in the distribution.
542
- Must be one of 'successful', 'incomplete', or 'error'.
543
- :param requests: A list of tuples representing the start and end times of
544
- each request. Example: [(start_1, end_1), (start_2, end_2), ...].
545
- Must be the same length as request_types.
546
- :param distribution_type: The type of distribution to calculate.
547
- Either "concurrency" or "rate".
548
- :param include_cdf: Whether to include the calculated cumulative distribution
549
- function (CDF) in the output StatusDistributionSummary.
550
- :param epsilon: The epsilon value for merging close events.
551
- :return: An instance of StatusDistributionSummary with calculated values.
608
+ ) -> StatusDistributionSummary:
609
+ """
610
+ Create status-grouped statistical summary from request timing data.
611
+
612
+ Analyzes request timings grouped by status to calculate concurrency or
613
+ rate distributions for each outcome category. Enables comparative
614
+ performance analysis across successful, incomplete, and errored requests.
615
+
616
+ :param request_types: Status type for each request ("successful",
617
+ "incomplete", or "error")
618
+ :param requests: List of (start_time, end_time) tuples for each request
619
+ :param distribution_type: Analysis type - "concurrency" or "rate"
620
+ :param include_cdf: Whether to include cumulative distribution functions
621
+ :param epsilon: Threshold for merging close timing events
622
+ :return: StatusDistributionSummary with timing statistics by status
623
+ :raises ValueError: If input lists have mismatched lengths or invalid types
552
624
  """
553
625
  if distribution_type not in {"concurrency", "rate"}:
554
626
  raise ValueError(
@@ -571,65 +643,78 @@ class StatusDistributionSummary(
571
643
  f"Got {len(request_types)} and {len(requests)} instead.",
572
644
  )
573
645
 
574
- _, successful_requests = (
575
- zip(*successful)
646
+ if weights is None:
647
+ weights = [1.0] * len(requests)
648
+
649
+ if len(requests) != len(weights):
650
+ raise ValueError(
651
+ "The length of requests and weights must be the same."
652
+ f"Got {len(requests)} and {len(weights)} instead.",
653
+ )
654
+
655
+ _, successful_requests, successful_weights = (
656
+ zip(*successful, strict=False)
576
657
  if (
577
658
  successful := list(
578
659
  filter(
579
660
  lambda val: val[0] == "successful",
580
- zip(request_types, requests),
661
+ zip(request_types, requests, weights, strict=False),
581
662
  )
582
663
  )
583
664
  )
584
- else ([], [])
665
+ else ([], [], [])
585
666
  )
586
- _, incomplete_requests = (
587
- zip(*incomplete)
667
+ _, incomplete_requests, incomplete_weights = (
668
+ zip(*incomplete, strict=False)
588
669
  if (
589
670
  incomplete := list(
590
671
  filter(
591
672
  lambda val: val[0] == "incomplete",
592
- zip(request_types, requests),
673
+ zip(request_types, requests, weights, strict=False),
593
674
  )
594
675
  )
595
676
  )
596
- else ([], [])
677
+ else ([], [], [])
597
678
  )
598
- _, errored_requests = (
599
- zip(*errored)
679
+ _, errored_requests, errored_weights = (
680
+ zip(*errored, strict=False)
600
681
  if (
601
682
  errored := list(
602
683
  filter(
603
684
  lambda val: val[0] == "error",
604
- zip(request_types, requests),
685
+ zip(request_types, requests, weights, strict=False),
605
686
  )
606
687
  )
607
688
  )
608
- else ([], [])
689
+ else ([], [], [])
609
690
  )
610
691
 
611
692
  return StatusDistributionSummary(
612
693
  total=DistributionSummary.from_request_times(
613
694
  requests,
614
695
  distribution_type=distribution_type,
696
+ weights=weights,
615
697
  include_cdf=include_cdf,
616
698
  epsilon=epsilon,
617
699
  ),
618
700
  successful=DistributionSummary.from_request_times(
619
701
  successful_requests, # type: ignore[arg-type]
620
702
  distribution_type=distribution_type,
703
+ weights=successful_weights, # type: ignore[arg-type]
621
704
  include_cdf=include_cdf,
622
705
  epsilon=epsilon,
623
706
  ),
624
707
  incomplete=DistributionSummary.from_request_times(
625
708
  incomplete_requests, # type: ignore[arg-type]
626
709
  distribution_type=distribution_type,
710
+ weights=incomplete_weights, # type: ignore[arg-type]
627
711
  include_cdf=include_cdf,
628
712
  epsilon=epsilon,
629
713
  ),
630
714
  errored=DistributionSummary.from_request_times(
631
715
  errored_requests, # type: ignore[arg-type]
632
716
  distribution_type=distribution_type,
717
+ weights=errored_weights, # type: ignore[arg-type]
633
718
  include_cdf=include_cdf,
634
719
  epsilon=epsilon,
635
720
  ),
@@ -640,38 +725,31 @@ class StatusDistributionSummary(
640
725
  request_types: list[Literal["successful", "incomplete", "error"]],
641
726
  requests: list[tuple[float, float]],
642
727
  first_iter_times: list[float],
643
- iter_counts: Optional[list[int]] = None,
644
- first_iter_counts: Optional[list[int]] = None,
728
+ iter_counts: list[int] | None = None,
729
+ first_iter_counts: list[int] | None = None,
645
730
  include_cdf: bool = False,
646
731
  epsilon: float = 1e-6,
647
- ) -> "StatusDistributionSummary":
648
- """
649
- Create a statistical summary by status for given distribution of request times
650
- for a request with iterable responses between the start and end.
651
- For example, this is used to measure auto regressive requests where
652
- a request is started and at some later point, iterative responses are
653
- received. This will call into DistributionSummary.from_iterable_request_times
654
- to calculate the statistics for each status.
655
-
656
- :param request_types: List of status types for each request in the distribution.
657
- Must be one of 'successful', 'incomplete', or 'error'.
658
- :param requests: A list of tuples representing the start and end times of
659
- each request. Example: [(start_1, end_1), (start_2, end_2), ...].
660
- Must be the same length as request_types.
661
- :param first_iter_times: A list of times when the first iteration of
662
- each request was received. Must be the same length as requests.
663
- :param iter_counts: A list of the total number of iterations for each
664
- request that occurred starting at the first iteration and ending
665
- at the request end time. Must be the same length as requests.
666
- If not provided, defaults to 1 for each request.
667
- :param first_iter_counts: A list of the number of iterations to log
668
- for the first iteration of each request. For example, when calculating
669
- total number of tokens processed, this is set to the prompt tokens number.
670
- If not provided, defaults to 1 for each request.
671
- :param include_cdf: Whether to include the calculated cumulative distribution
672
- function (CDF) in the output StatusDistributionSummary.
673
- :param epsilon: The epsilon value for merging close events.
674
- :return: An instance of StatusDistributionSummary with calculated values.
732
+ ) -> StatusDistributionSummary:
733
+ """
734
+ Create status-grouped statistical summary from iterative request timing data.
735
+
736
+ Analyzes autoregressive request timings grouped by status to calculate
737
+ iteration rate distributions for each outcome category. Enables comparative
738
+ analysis of token generation or streaming response performance across
739
+ different request statuses.
740
+
741
+ :param request_types: Status type for each request ("successful",
742
+ "incomplete", or "error")
743
+ :param requests: List of (start_time, end_time) tuples for each request
744
+ :param first_iter_times: Times when first iteration was received for
745
+ each request
746
+ :param iter_counts: Total iteration counts for each request (defaults to 1)
747
+ :param first_iter_counts: Iteration counts for first iteration (defaults
748
+ to 1)
749
+ :param include_cdf: Whether to include cumulative distribution functions
750
+ :param epsilon: Threshold for merging close timing events
751
+ :return: StatusDistributionSummary with iteration statistics by status
752
+ :raises ValueError: If input lists have mismatched lengths or invalid types
675
753
  """
676
754
  if any(
677
755
  type_ not in {"successful", "incomplete", "error"}
@@ -709,7 +787,7 @@ class StatusDistributionSummary(
709
787
  successful_iter_counts,
710
788
  successful_first_iter_counts,
711
789
  ) = (
712
- zip(*successful)
790
+ zip(*successful, strict=True)
713
791
  if (
714
792
  successful := list(
715
793
  filter(
@@ -720,6 +798,7 @@ class StatusDistributionSummary(
720
798
  first_iter_times,
721
799
  iter_counts,
722
800
  first_iter_counts,
801
+ strict=True,
723
802
  ),
724
803
  )
725
804
  )
@@ -733,7 +812,7 @@ class StatusDistributionSummary(
733
812
  incomplete_iter_counts,
734
813
  incomplete_first_iter_counts,
735
814
  ) = (
736
- zip(*incomplete)
815
+ zip(*incomplete, strict=True)
737
816
  if (
738
817
  incomplete := list(
739
818
  filter(
@@ -744,6 +823,7 @@ class StatusDistributionSummary(
744
823
  first_iter_times,
745
824
  iter_counts,
746
825
  first_iter_counts,
826
+ strict=True,
747
827
  ),
748
828
  )
749
829
  )
@@ -757,7 +837,7 @@ class StatusDistributionSummary(
757
837
  errored_iter_counts,
758
838
  errored_first_iter_counts,
759
839
  ) = (
760
- zip(*errored)
840
+ zip(*errored, strict=True)
761
841
  if (
762
842
  errored := list(
763
843
  filter(
@@ -768,6 +848,7 @@ class StatusDistributionSummary(
768
848
  first_iter_times,
769
849
  iter_counts,
770
850
  first_iter_counts,
851
+ strict=True,
771
852
  ),
772
853
  )
773
854
  )
@@ -813,13 +894,19 @@ class StatusDistributionSummary(
813
894
 
814
895
  class RunningStats(StandardBaseModel):
815
896
  """
816
- Create a running statistics object to track the mean, rate, and other
817
- statistics of a stream of values.
818
- 1. The start time is set to the time the object is created.
819
- 2. The count is set to 0.
820
- 3. The total is set to 0.
821
- 4. The last value is set to 0.
822
- 5. The mean is calculated as the total / count.
897
+ Real-time statistics tracking for streaming numerical data.
898
+
899
+ Maintains mean, rate, and cumulative statistics for continuous data streams
900
+ without storing individual values. Optimized for memory efficiency in
901
+ long-running monitoring applications. Supports arithmetic operators for
902
+ convenient value addition and provides computed properties for derived metrics.
903
+
904
+ Example:
905
+ ::
906
+ stats = RunningStats()
907
+ stats += 10.5 # Add value using operator
908
+ stats.update(20.0, count=3) # Add value with custom count
909
+ print(f"Mean: {stats.mean}, Rate: {stats.rate}")
823
910
  """
824
911
 
825
912
  start_time: float = Field(
@@ -867,12 +954,13 @@ class RunningStats(StandardBaseModel):
867
954
 
868
955
  def __add__(self, value: Any) -> float:
869
956
  """
870
- Enable the use of the + operator to add a value to the running statistics.
957
+ Add value using + operator and return current mean.
871
958
 
872
- :param value: The value to add to the running statistics.
873
- :return: The mean of the running statistics.
959
+ :param value: Numerical value to add to the running statistics
960
+ :return: Updated mean after adding the value
961
+ :raises ValueError: If value is not numeric (int or float)
874
962
  """
875
- if not isinstance(value, (int, float)):
963
+ if not isinstance(value, int | float):
876
964
  raise ValueError(
877
965
  f"Value must be an int or float, got {type(value)} instead.",
878
966
  )
@@ -881,14 +969,15 @@ class RunningStats(StandardBaseModel):
881
969
 
882
970
  return self.mean
883
971
 
884
- def __iadd__(self, value: Any) -> "RunningStats":
972
+ def __iadd__(self, value: Any) -> RunningStats:
885
973
  """
886
- Enable the use of the += operator to add a value to the running statistics.
974
+ Add value using += operator and return updated instance.
887
975
 
888
- :param value: The value to add to the running statistics.
889
- :return: The running statistics object.
976
+ :param value: Numerical value to add to the running statistics
977
+ :return: Self reference for method chaining
978
+ :raises ValueError: If value is not numeric (int or float)
890
979
  """
891
- if not isinstance(value, (int, float)):
980
+ if not isinstance(value, int | float):
892
981
  raise ValueError(
893
982
  f"Value must be an int or float, got {type(value)} instead.",
894
983
  )
@@ -899,11 +988,10 @@ class RunningStats(StandardBaseModel):
899
988
 
900
989
  def update(self, value: float, count: int = 1) -> None:
901
990
  """
902
- Update the running statistics with a new value.
991
+ Update running statistics with new value and count.
903
992
 
904
- :param value: The new value to add to the running statistics.
905
- :param count: The number of times to 'count' for the value.
906
- If not provided, defaults to 1.
993
+ :param value: Numerical value to add to the running statistics
994
+ :param count: Number of occurrences to count for this value (defaults to 1)
907
995
  """
908
996
  self.count += count
909
997
  self.total += value
@@ -912,11 +1000,17 @@ class RunningStats(StandardBaseModel):
912
1000
 
913
1001
  class TimeRunningStats(RunningStats):
914
1002
  """
915
- Create a running statistics object to track the mean, rate, and other
916
- statistics of a stream of time values. This is used to track time values
917
- in milliseconds and seconds.
1003
+ Specialized running statistics for time-based measurements.
1004
+
1005
+ Extends RunningStats with time-specific computed properties for millisecond
1006
+ conversions. Designed for tracking latency, duration, and timing metrics in
1007
+ performance monitoring applications.
918
1008
 
919
- Adds time specific computed_fields such as measurements in milliseconds and seconds.
1009
+ Example:
1010
+ ::
1011
+ time_stats = TimeRunningStats()
1012
+ time_stats += 0.125 # Add 125ms in seconds
1013
+ print(f"Mean: {time_stats.mean_ms}ms, Total: {time_stats.total_ms}ms")
920
1014
  """
921
1015
 
922
1016
  @computed_field # type: ignore[misc]