guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -1,953 +0,0 @@
1
- import math
2
- import time as timer
3
- from collections import defaultdict
4
- from typing import Any, Literal, Optional
5
-
6
- import numpy as np
7
- from pydantic import Field, computed_field
8
-
9
- from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
10
-
11
- __all__ = [
12
- "DistributionSummary",
13
- "Percentiles",
14
- "RunningStats",
15
- "StatusDistributionSummary",
16
- "TimeRunningStats",
17
- ]
18
-
19
-
20
- class Percentiles(StandardBaseModel):
21
- """
22
- A pydantic model representing the standard percentiles of a distribution.
23
- """
24
-
25
- p001: float = Field(
26
- description="The 0.1th percentile of the distribution.",
27
- )
28
- p01: float = Field(
29
- description="The 1st percentile of the distribution.",
30
- )
31
- p05: float = Field(
32
- description="The 5th percentile of the distribution.",
33
- )
34
- p10: float = Field(
35
- description="The 10th percentile of the distribution.",
36
- )
37
- p25: float = Field(
38
- description="The 25th percentile of the distribution.",
39
- )
40
- p50: float = Field(
41
- description="The 50th percentile of the distribution.",
42
- )
43
- p75: float = Field(
44
- description="The 75th percentile of the distribution.",
45
- )
46
- p90: float = Field(
47
- description="The 90th percentile of the distribution.",
48
- )
49
- p95: float = Field(
50
- description="The 95th percentile of the distribution.",
51
- )
52
- p99: float = Field(
53
- description="The 99th percentile of the distribution.",
54
- )
55
- p999: float = Field(
56
- description="The 99.9th percentile of the distribution.",
57
- )
58
-
59
-
60
- class DistributionSummary(StandardBaseModel):
61
- """
62
- A pydantic model representing a statistical summary for a given
63
- distribution of numerical values.
64
- """
65
-
66
- mean: float = Field(
67
- description="The mean/average of the distribution.",
68
- )
69
- median: float = Field(
70
- description="The median of the distribution.",
71
- )
72
- mode: float = Field(
73
- description="The mode of the distribution.",
74
- )
75
- variance: float = Field(
76
- description="The variance of the distribution.",
77
- )
78
- std_dev: float = Field(
79
- description="The standard deviation of the distribution.",
80
- )
81
- min: float = Field(
82
- description="The minimum value of the distribution.",
83
- )
84
- max: float = Field(
85
- description="The maximum value of the distribution.",
86
- )
87
- count: int = Field(
88
- description="The number of values in the distribution.",
89
- )
90
- total_sum: float = Field(
91
- description="The total sum of the values in the distribution.",
92
- )
93
- percentiles: Percentiles = Field(
94
- description="The percentiles of the distribution.",
95
- )
96
- cumulative_distribution_function: Optional[list[tuple[float, float]]] = Field(
97
- description="The cumulative distribution function (CDF) of the distribution.",
98
- default=None,
99
- )
100
-
101
- @staticmethod
102
- def from_distribution_function(
103
- distribution: list[tuple[float, float]],
104
- include_cdf: bool = False,
105
- ) -> "DistributionSummary":
106
- """
107
- Create a statistical summary for a given distribution of weighted numerical
108
- values or a probability distribution function (PDF).
109
- 1. If the distribution is a PDF, it is expected to be a list of tuples
110
- where each tuple contains (value, probability). The sum of the
111
- probabilities should be 1. If it is not, it will be normalized.
112
- 2. If the distribution is a values distribution function, it is expected
113
- to be a list of tuples where each tuple contains (value, weight).
114
- The weights are normalized to a probability distribution function.
115
-
116
- :param distribution: A list of tuples representing the distribution.
117
- Each tuple contains (value, weight) or (value, probability).
118
- :param include_cdf: Whether to include the calculated cumulative distribution
119
- function (CDF) in the output DistributionSummary.
120
- :return: An instance of DistributionSummary with calculated values.
121
- """
122
- values, weights = zip(*distribution) if distribution else ([], [])
123
- values = np.array(values) # type: ignore[assignment]
124
- weights = np.array(weights) # type: ignore[assignment]
125
-
126
- # create the PDF
127
- probabilities = weights / np.sum(weights) # type: ignore[operator]
128
- pdf = np.column_stack((values, probabilities))
129
- pdf = pdf[np.argsort(pdf[:, 0])]
130
- values = pdf[:, 0] # type: ignore[assignment]
131
- probabilities = pdf[:, 1]
132
-
133
- # calculate the CDF
134
- cumulative_probabilities = np.cumsum(probabilities)
135
- cdf = np.column_stack((values, cumulative_probabilities))
136
-
137
- # calculate statistics
138
- mean = np.sum(values * probabilities).item() # type: ignore[attr-defined]
139
- median = cdf[np.argmax(cdf[:, 1] >= 0.5), 0].item() if len(cdf) > 0 else 0 # noqa: PLR2004
140
- mode = values[np.argmax(probabilities)].item() if len(values) > 0 else 0 # type: ignore[call-overload]
141
- variance = np.sum((values - mean) ** 2 * probabilities).item() # type: ignore[attr-defined]
142
- std_dev = math.sqrt(variance)
143
- minimum = values[0].item() if len(values) > 0 else 0
144
- maximum = values[-1].item() if len(values) > 0 else 0
145
- count = len(values)
146
- total_sum = np.sum(values).item() # type: ignore[attr-defined]
147
-
148
- return DistributionSummary(
149
- mean=mean,
150
- median=median,
151
- mode=mode,
152
- variance=variance,
153
- std_dev=std_dev,
154
- min=minimum,
155
- max=maximum,
156
- count=count,
157
- total_sum=total_sum,
158
- percentiles=(
159
- Percentiles(
160
- p001=cdf[np.argmax(cdf[:, 1] >= 0.001), 0].item(), # noqa: PLR2004
161
- p01=cdf[np.argmax(cdf[:, 1] >= 0.01), 0].item(), # noqa: PLR2004
162
- p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004
163
- p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004
164
- p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004
165
- p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004
166
- p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004
167
- p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004
168
- p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004
169
- p99=cdf[np.argmax(cdf[:, 1] >= 0.99), 0].item(), # noqa: PLR2004
170
- p999=cdf[np.argmax(cdf[:, 1] >= 0.999), 0].item(), # noqa: PLR2004
171
- )
172
- if len(cdf) > 0
173
- else Percentiles(
174
- p001=0,
175
- p01=0,
176
- p05=0,
177
- p10=0,
178
- p25=0,
179
- p50=0,
180
- p75=0,
181
- p90=0,
182
- p95=0,
183
- p99=0,
184
- p999=0,
185
- )
186
- ),
187
- cumulative_distribution_function=cdf.tolist() if include_cdf else None,
188
- )
189
-
190
- @staticmethod
191
- def from_values(
192
- values: list[float],
193
- weights: Optional[list[float]] = None,
194
- include_cdf: bool = False,
195
- ) -> "DistributionSummary":
196
- """
197
- Create a statistical summary for a given distribution of numerical values.
198
- This is a wrapper around from_distribution_function to handle the optional case
199
- of including weights for the values. If weights are not provided, they are
200
- automatically set to 1.0 for each value, so each value is equally weighted.
201
-
202
- :param values: A list of numerical values representing the distribution.
203
- :param weights: A list of weights for each value in the distribution.
204
- If not provided, all values are equally weighted.
205
- :param include_cdf: Whether to include the calculated cumulative distribution
206
- function (CDF) in the output DistributionSummary.
207
- """
208
- if weights is None:
209
- weights = [1.0] * len(values)
210
-
211
- if len(values) != len(weights):
212
- raise ValueError(
213
- "The length of values and weights must be the same.",
214
- )
215
-
216
- return DistributionSummary.from_distribution_function(
217
- distribution=list(zip(values, weights)),
218
- include_cdf=include_cdf,
219
- )
220
-
221
- @staticmethod
222
- def from_request_times(
223
- requests: list[tuple[float, float]],
224
- distribution_type: Literal["concurrency", "rate"],
225
- include_cdf: bool = False,
226
- epsilon: float = 1e-6,
227
- ) -> "DistributionSummary":
228
- """
229
- Create a statistical summary for a given distribution of request times.
230
- Specifically, this is used to measure concurrency or rate of requests
231
- given an input list containing the start and end time of each request.
232
- This will first convert the request times into a distribution function
233
- and then calculate the statistics with from_distribution_function.
234
-
235
- :param requests: A list of tuples representing the start and end times of
236
- each request. Example: [(start_1, end_1), (start_2, end_2), ...]
237
- :param distribution_type: The type of distribution to calculate.
238
- Either "concurrency" or "rate".
239
- :param include_cdf: Whether to include the calculated cumulative distribution
240
- function (CDF) in the output DistributionSummary.
241
- :param epsilon: The epsilon value for merging close events.
242
- :return: An instance of DistributionSummary with calculated values.
243
- """
244
- if distribution_type == "concurrency":
245
- # convert to delta changes based on when requests were running
246
- events = [(start, 1) for start, _ in requests] + [
247
- (end, -1) for _, end in requests
248
- ]
249
- elif distribution_type == "rate":
250
- # convert to events for when requests finished
251
- global_start = min(start for start, _ in requests) if requests else 0
252
- events = [(global_start, 1)] + [(end, 1) for _, end in requests]
253
- else:
254
- raise ValueError(
255
- f"Invalid distribution_type '{distribution_type}'. "
256
- "Must be 'concurrency' or 'rate'."
257
- )
258
-
259
- # combine any events that are very close together
260
- flattened_events: list[tuple[float, float]] = []
261
- for time, val in sorted(events):
262
- last_time, last_val = (
263
- flattened_events[-1] if flattened_events else (None, None)
264
- )
265
-
266
- if (
267
- last_time is not None
268
- and last_val is not None
269
- and abs(last_time - time) <= epsilon
270
- ):
271
- flattened_events[-1] = (last_time, last_val + val)
272
- else:
273
- flattened_events.append((time, val))
274
-
275
- if distribution_type == "concurrency":
276
- # convert to the events over time measuring concurrency changes
277
- events_over_time: list[tuple[float, float]] = []
278
- active = 0
279
- for time, delta in flattened_events:
280
- active += delta # type: ignore [assignment]
281
- events_over_time.append((time, active))
282
-
283
- flattened_events = events_over_time
284
-
285
- # convert to value distribution function
286
- distribution: dict[float, float] = defaultdict(float)
287
-
288
- for ind in range(len(flattened_events) - 1):
289
- start_time, value = flattened_events[ind]
290
- end_time, _ = flattened_events[ind + 1]
291
- duration = end_time - start_time
292
-
293
- if distribution_type == "concurrency":
294
- # weight the concurrency value by the duration
295
- distribution[value] += duration
296
- elif distribution_type == "rate":
297
- # weight the rate value by the duration
298
- rate = value / duration
299
- distribution[rate] += duration
300
-
301
- distribution_list: list[tuple[float, float]] = sorted(distribution.items())
302
-
303
- return DistributionSummary.from_distribution_function(
304
- distribution=distribution_list,
305
- include_cdf=include_cdf,
306
- )
307
-
308
- @staticmethod
309
- def from_iterable_request_times(
310
- requests: list[tuple[float, float]],
311
- first_iter_times: list[float],
312
- iter_counts: list[int],
313
- first_iter_counts: Optional[list[int]] = None,
314
- include_cdf: bool = False,
315
- epsilon: float = 1e-6,
316
- ) -> "DistributionSummary":
317
- """
318
- Create a statistical summary for a given distribution of request times
319
- for a request with iterable responses between the start and end.
320
- For example, this is used to measure auto regressive requests where
321
- a request is started and at some later point, iterative responses are
322
- received. This will convert the request times and iterable values into
323
- a distribution function and then calculate the statistics with
324
- from_distribution_function.
325
-
326
- :param requests: A list of tuples representing the start and end times of
327
- each request. Example: [(start_1, end_1), (start_2, end_2), ...]
328
- :param first_iter_times: A list of times when the first iteration of
329
- each request was received. Must be the same length as requests.
330
- :param iter_counts: A list of the total number of iterations for each
331
- request that occurred starting at the first iteration and ending
332
- at the request end time. Must be the same length as requests.
333
- :param first_iter_counts: A list of the number of iterations to log
334
- for the first iteration of each request. For example, when calculating
335
- total number of tokens processed, this is set to the prompt tokens number.
336
- If not provided, defaults to 1 for each request.
337
- :param include_cdf: Whether to include the calculated cumulative distribution
338
- function (CDF) in the output DistributionSummary.
339
- :param epsilon: The epsilon value for merging close events.
340
- :return: An instance of DistributionSummary with calculated values.
341
- """
342
-
343
- if first_iter_counts is None:
344
- first_iter_counts = [1] * len(requests)
345
-
346
- if (
347
- len(requests) != len(first_iter_times)
348
- or len(requests) != len(iter_counts)
349
- or len(requests) != len(first_iter_counts)
350
- ):
351
- raise ValueError(
352
- "requests, first_iter_times, iter_counts, and first_iter_counts must"
353
- "be the same length."
354
- f"Given {len(requests)}, {len(first_iter_times)}, {len(iter_counts)}, "
355
- f"{len(first_iter_counts)}",
356
- )
357
-
358
- # first break up the requests into individual iterable events
359
- events = defaultdict(int)
360
- global_start = min(start for start, _ in requests) if requests else 0
361
- global_end = max(end for _, end in requests) if requests else 0
362
- events[global_start] = 0
363
- events[global_end] = 0
364
-
365
- for (_, end), first_iter, first_iter_count, total_count in zip(
366
- requests, first_iter_times, first_iter_counts, iter_counts
367
- ):
368
- events[first_iter] += first_iter_count
369
-
370
- if total_count > 1:
371
- iter_latency = (end - first_iter) / (total_count - 1)
372
- for ind in range(1, total_count):
373
- events[first_iter + ind * iter_latency] += 1
374
-
375
- # combine any events that are very close together
376
- flattened_events: list[tuple[float, int]] = []
377
-
378
- for time, count in sorted(events.items()):
379
- last_time, last_count = (
380
- flattened_events[-1] if flattened_events else (None, None)
381
- )
382
-
383
- if (
384
- last_time is not None
385
- and last_count is not None
386
- and abs(last_time - time) <= epsilon
387
- ):
388
- flattened_events[-1] = (last_time, last_count + count)
389
- else:
390
- flattened_events.append((time, count))
391
-
392
- # convert to value distribution function
393
- distribution: dict[float, float] = defaultdict(float)
394
-
395
- for ind in range(len(flattened_events) - 1):
396
- start_time, count = flattened_events[ind]
397
- end_time, _ = flattened_events[ind + 1]
398
- duration = end_time - start_time
399
- rate = count / duration
400
- distribution[rate] += duration
401
-
402
- distribution_list = sorted(distribution.items())
403
-
404
- return DistributionSummary.from_distribution_function(
405
- distribution=distribution_list,
406
- include_cdf=include_cdf,
407
- )
408
-
409
-
410
- class StatusDistributionSummary(
411
- StatusBreakdown[
412
- DistributionSummary,
413
- DistributionSummary,
414
- DistributionSummary,
415
- DistributionSummary,
416
- ]
417
- ):
418
- """
419
- A pydantic model representing a statistical summary for a given
420
- distribution of numerical values grouped by status.
421
- Specifically used to represent the total, successful, incomplete,
422
- and errored values for a benchmark or other statistical summary.
423
- """
424
-
425
- @staticmethod
426
- def from_values(
427
- value_types: list[Literal["successful", "incomplete", "error"]],
428
- values: list[float],
429
- weights: Optional[list[float]] = None,
430
- include_cdf: bool = False,
431
- ) -> "StatusDistributionSummary":
432
- """
433
- Create a statistical summary by status for a given distribution of numerical
434
- values. This is used to measure the distribution of values for different
435
- statuses (e.g., successful, incomplete, error) and calculate the statistics
436
- for each status. Weights are optional to weight the probability distribution
437
- for each value by. If not provided, all values are equally weighted.
438
-
439
- :param value_types: A list of status types for each value in the distribution.
440
- Must be one of 'successful', 'incomplete', or 'error'.
441
- :param values: A list of numerical values representing the distribution.
442
- Must be the same length as value_types.
443
- :param weights: A list of weights for each value in the distribution.
444
- If not provided, all values are equally weighted (set to 1).
445
- Must be the same length as value_types.
446
- :param include_cdf: Whether to include the calculated cumulative distribution
447
- function (CDF) in the output StatusDistributionSummary.
448
- :return: An instance of StatusDistributionSummary with calculated values.
449
- """
450
- if any(
451
- type_ not in {"successful", "incomplete", "error"} for type_ in value_types
452
- ):
453
- raise ValueError(
454
- "value_types must be one of 'successful', 'incomplete', or 'error'. "
455
- f"Got {value_types} instead.",
456
- )
457
-
458
- if weights is None:
459
- weights = [1.0] * len(values)
460
-
461
- if len(value_types) != len(values) or len(value_types) != len(weights):
462
- raise ValueError(
463
- "The length of value_types, values, and weights must be the same.",
464
- )
465
-
466
- _, successful_values, successful_weights = (
467
- zip(*successful)
468
- if (
469
- successful := list(
470
- filter(
471
- lambda val: val[0] == "successful",
472
- zip(value_types, values, weights),
473
- )
474
- )
475
- )
476
- else ([], [], [])
477
- )
478
- _, incomplete_values, incomplete_weights = (
479
- zip(*incomplete)
480
- if (
481
- incomplete := list(
482
- filter(
483
- lambda val: val[0] == "incomplete",
484
- zip(value_types, values, weights),
485
- )
486
- )
487
- )
488
- else ([], [], [])
489
- )
490
- _, errored_values, errored_weights = (
491
- zip(*errored)
492
- if (
493
- errored := list(
494
- filter(
495
- lambda val: val[0] == "error",
496
- zip(value_types, values, weights),
497
- )
498
- )
499
- )
500
- else ([], [], [])
501
- )
502
-
503
- return StatusDistributionSummary(
504
- total=DistributionSummary.from_values(
505
- values,
506
- weights,
507
- include_cdf=include_cdf,
508
- ),
509
- successful=DistributionSummary.from_values(
510
- successful_values, # type: ignore[arg-type]
511
- successful_weights, # type: ignore[arg-type]
512
- include_cdf=include_cdf,
513
- ),
514
- incomplete=DistributionSummary.from_values(
515
- incomplete_values, # type: ignore[arg-type]
516
- incomplete_weights, # type: ignore[arg-type]
517
- include_cdf=include_cdf,
518
- ),
519
- errored=DistributionSummary.from_values(
520
- errored_values, # type: ignore[arg-type]
521
- errored_weights, # type: ignore[arg-type]
522
- include_cdf=include_cdf,
523
- ),
524
- )
525
-
526
- @staticmethod
527
- def from_request_times(
528
- request_types: list[Literal["successful", "incomplete", "error"]],
529
- requests: list[tuple[float, float]],
530
- distribution_type: Literal["concurrency", "rate"],
531
- include_cdf: bool = False,
532
- epsilon: float = 1e-6,
533
- ) -> "StatusDistributionSummary":
534
- """
535
- Create a statistical summary by status for given distribution of request times.
536
- This is used to measure the distribution of request times for different statuses
537
- (e.g., successful, incomplete, error) for concurrency and rates.
538
- This will call into DistributionSummary.from_request_times to calculate
539
- the statistics for each status.
540
-
541
- :param request_types: List of status types for each request in the distribution.
542
- Must be one of 'successful', 'incomplete', or 'error'.
543
- :param requests: A list of tuples representing the start and end times of
544
- each request. Example: [(start_1, end_1), (start_2, end_2), ...].
545
- Must be the same length as request_types.
546
- :param distribution_type: The type of distribution to calculate.
547
- Either "concurrency" or "rate".
548
- :param include_cdf: Whether to include the calculated cumulative distribution
549
- function (CDF) in the output StatusDistributionSummary.
550
- :param epsilon: The epsilon value for merging close events.
551
- :return: An instance of StatusDistributionSummary with calculated values.
552
- """
553
- if distribution_type not in {"concurrency", "rate"}:
554
- raise ValueError(
555
- f"Invalid distribution_type '{distribution_type}'. "
556
- "Must be 'concurrency' or 'rate'."
557
- )
558
-
559
- if any(
560
- type_ not in {"successful", "incomplete", "error"}
561
- for type_ in request_types
562
- ):
563
- raise ValueError(
564
- "request_types must be one of 'successful', 'incomplete', or 'error'. "
565
- f"Got {request_types} instead.",
566
- )
567
-
568
- if len(request_types) != len(requests):
569
- raise ValueError(
570
- "The length of request_types and requests must be the same. "
571
- f"Got {len(request_types)} and {len(requests)} instead.",
572
- )
573
-
574
- _, successful_requests = (
575
- zip(*successful)
576
- if (
577
- successful := list(
578
- filter(
579
- lambda val: val[0] == "successful",
580
- zip(request_types, requests),
581
- )
582
- )
583
- )
584
- else ([], [])
585
- )
586
- _, incomplete_requests = (
587
- zip(*incomplete)
588
- if (
589
- incomplete := list(
590
- filter(
591
- lambda val: val[0] == "incomplete",
592
- zip(request_types, requests),
593
- )
594
- )
595
- )
596
- else ([], [])
597
- )
598
- _, errored_requests = (
599
- zip(*errored)
600
- if (
601
- errored := list(
602
- filter(
603
- lambda val: val[0] == "error",
604
- zip(request_types, requests),
605
- )
606
- )
607
- )
608
- else ([], [])
609
- )
610
-
611
- return StatusDistributionSummary(
612
- total=DistributionSummary.from_request_times(
613
- requests,
614
- distribution_type=distribution_type,
615
- include_cdf=include_cdf,
616
- epsilon=epsilon,
617
- ),
618
- successful=DistributionSummary.from_request_times(
619
- successful_requests, # type: ignore[arg-type]
620
- distribution_type=distribution_type,
621
- include_cdf=include_cdf,
622
- epsilon=epsilon,
623
- ),
624
- incomplete=DistributionSummary.from_request_times(
625
- incomplete_requests, # type: ignore[arg-type]
626
- distribution_type=distribution_type,
627
- include_cdf=include_cdf,
628
- epsilon=epsilon,
629
- ),
630
- errored=DistributionSummary.from_request_times(
631
- errored_requests, # type: ignore[arg-type]
632
- distribution_type=distribution_type,
633
- include_cdf=include_cdf,
634
- epsilon=epsilon,
635
- ),
636
- )
637
-
638
- @staticmethod
639
- def from_iterable_request_times(
640
- request_types: list[Literal["successful", "incomplete", "error"]],
641
- requests: list[tuple[float, float]],
642
- first_iter_times: list[float],
643
- iter_counts: Optional[list[int]] = None,
644
- first_iter_counts: Optional[list[int]] = None,
645
- include_cdf: bool = False,
646
- epsilon: float = 1e-6,
647
- ) -> "StatusDistributionSummary":
648
- """
649
- Create a statistical summary by status for given distribution of request times
650
- for a request with iterable responses between the start and end.
651
- For example, this is used to measure auto regressive requests where
652
- a request is started and at some later point, iterative responses are
653
- received. This will call into DistributionSummary.from_iterable_request_times
654
- to calculate the statistics for each status.
655
-
656
- :param request_types: List of status types for each request in the distribution.
657
- Must be one of 'successful', 'incomplete', or 'error'.
658
- :param requests: A list of tuples representing the start and end times of
659
- each request. Example: [(start_1, end_1), (start_2, end_2), ...].
660
- Must be the same length as request_types.
661
- :param first_iter_times: A list of times when the first iteration of
662
- each request was received. Must be the same length as requests.
663
- :param iter_counts: A list of the total number of iterations for each
664
- request that occurred starting at the first iteration and ending
665
- at the request end time. Must be the same length as requests.
666
- If not provided, defaults to 1 for each request.
667
- :param first_iter_counts: A list of the number of iterations to log
668
- for the first iteration of each request. For example, when calculating
669
- total number of tokens processed, this is set to the prompt tokens number.
670
- If not provided, defaults to 1 for each request.
671
- :param include_cdf: Whether to include the calculated cumulative distribution
672
- function (CDF) in the output StatusDistributionSummary.
673
- :param epsilon: The epsilon value for merging close events.
674
- :return: An instance of StatusDistributionSummary with calculated values.
675
- """
676
- if any(
677
- type_ not in {"successful", "incomplete", "error"}
678
- for type_ in request_types
679
- ):
680
- raise ValueError(
681
- "request_types must be one of 'successful', 'incomplete', or 'error'. "
682
- f"Got {request_types} instead.",
683
- )
684
-
685
- if iter_counts is None:
686
- iter_counts = [1] * len(requests)
687
-
688
- if first_iter_counts is None:
689
- first_iter_counts = [1] * len(requests)
690
-
691
- if (
692
- len(request_types) != len(requests)
693
- or len(requests) != len(first_iter_times)
694
- or len(requests) != len(iter_counts)
695
- or len(requests) != len(first_iter_counts)
696
- ):
697
- raise ValueError(
698
- "request_types, requests, first_iter_times, iter_counts, and "
699
- "first_iter_counts must be the same length."
700
- f"Given {len(request_types)}, {len(requests)}, "
701
- f"{len(first_iter_times)}, {len(iter_counts)}, "
702
- f"{len(first_iter_counts)}",
703
- )
704
-
705
- (
706
- _,
707
- successful_requests,
708
- successful_first_iter_times,
709
- successful_iter_counts,
710
- successful_first_iter_counts,
711
- ) = (
712
- zip(*successful)
713
- if (
714
- successful := list(
715
- filter(
716
- lambda val: val[0] == "successful",
717
- zip(
718
- request_types,
719
- requests,
720
- first_iter_times,
721
- iter_counts,
722
- first_iter_counts,
723
- ),
724
- )
725
- )
726
- )
727
- else ([], [], [], [], [])
728
- )
729
- (
730
- _,
731
- incomplete_requests,
732
- incomplete_first_iter_times,
733
- incomplete_iter_counts,
734
- incomplete_first_iter_counts,
735
- ) = (
736
- zip(*incomplete)
737
- if (
738
- incomplete := list(
739
- filter(
740
- lambda val: val[0] == "incomplete",
741
- zip(
742
- request_types,
743
- requests,
744
- first_iter_times,
745
- iter_counts,
746
- first_iter_counts,
747
- ),
748
- )
749
- )
750
- )
751
- else ([], [], [], [], [])
752
- )
753
- (
754
- _,
755
- errored_requests,
756
- errored_first_iter_times,
757
- errored_iter_counts,
758
- errored_first_iter_counts,
759
- ) = (
760
- zip(*errored)
761
- if (
762
- errored := list(
763
- filter(
764
- lambda val: val[0] == "error",
765
- zip(
766
- request_types,
767
- requests,
768
- first_iter_times,
769
- iter_counts,
770
- first_iter_counts,
771
- ),
772
- )
773
- )
774
- )
775
- else ([], [], [], [], [])
776
- )
777
-
778
- return StatusDistributionSummary(
779
- total=DistributionSummary.from_iterable_request_times(
780
- requests,
781
- first_iter_times,
782
- iter_counts,
783
- first_iter_counts,
784
- include_cdf=include_cdf,
785
- epsilon=epsilon,
786
- ),
787
- successful=DistributionSummary.from_iterable_request_times(
788
- successful_requests, # type: ignore[arg-type]
789
- successful_first_iter_times, # type: ignore[arg-type]
790
- successful_iter_counts, # type: ignore[arg-type]
791
- successful_first_iter_counts, # type: ignore[arg-type]
792
- include_cdf=include_cdf,
793
- epsilon=epsilon,
794
- ),
795
- incomplete=DistributionSummary.from_iterable_request_times(
796
- incomplete_requests, # type: ignore[arg-type]
797
- incomplete_first_iter_times, # type: ignore[arg-type]
798
- incomplete_iter_counts, # type: ignore[arg-type]
799
- incomplete_first_iter_counts, # type: ignore[arg-type]
800
- include_cdf=include_cdf,
801
- epsilon=epsilon,
802
- ),
803
- errored=DistributionSummary.from_iterable_request_times(
804
- errored_requests, # type: ignore[arg-type]
805
- errored_first_iter_times, # type: ignore[arg-type]
806
- errored_iter_counts, # type: ignore[arg-type]
807
- errored_first_iter_counts, # type: ignore[arg-type]
808
- include_cdf=include_cdf,
809
- epsilon=epsilon,
810
- ),
811
- )
812
-
813
-
814
- class RunningStats(StandardBaseModel):
815
- """
816
- Create a running statistics object to track the mean, rate, and other
817
- statistics of a stream of values.
818
- 1. The start time is set to the time the object is created.
819
- 2. The count is set to 0.
820
- 3. The total is set to 0.
821
- 4. The last value is set to 0.
822
- 5. The mean is calculated as the total / count.
823
- """
824
-
825
- start_time: float = Field(
826
- default_factory=timer.time,
827
- description=(
828
- "The time the running statistics object was created. "
829
- "This is used to calculate the rate of the statistics."
830
- ),
831
- )
832
- count: int = Field(
833
- default=0,
834
- description="The number of values added to the running statistics.",
835
- )
836
- total: float = Field(
837
- default=0.0,
838
- description="The total sum of the values added to the running statistics.",
839
- )
840
- last: float = Field(
841
- default=0.0,
842
- description="The last value added to the running statistics.",
843
- )
844
-
845
- @computed_field # type: ignore[misc]
846
- @property
847
- def mean(self) -> float:
848
- """
849
- :return: The mean of the running statistics (total / count).
850
- If count is 0, return 0.0.
851
- """
852
- if self.count == 0:
853
- return 0.0
854
- return self.total / self.count
855
-
856
- @computed_field # type: ignore[misc]
857
- @property
858
- def rate(self) -> float:
859
- """
860
- :return: The rate of the running statistics
861
- (total / (time.time() - start_time)).
862
- If count is 0, return 0.0.
863
- """
864
- if self.count == 0:
865
- return 0.0
866
- return self.total / (timer.time() - self.start_time)
867
-
868
- def __add__(self, value: Any) -> float:
869
- """
870
- Enable the use of the + operator to add a value to the running statistics.
871
-
872
- :param value: The value to add to the running statistics.
873
- :return: The mean of the running statistics.
874
- """
875
- if not isinstance(value, (int, float)):
876
- raise ValueError(
877
- f"Value must be an int or float, got {type(value)} instead.",
878
- )
879
-
880
- self.update(value)
881
-
882
- return self.mean
883
-
884
- def __iadd__(self, value: Any) -> "RunningStats":
885
- """
886
- Enable the use of the += operator to add a value to the running statistics.
887
-
888
- :param value: The value to add to the running statistics.
889
- :return: The running statistics object.
890
- """
891
- if not isinstance(value, (int, float)):
892
- raise ValueError(
893
- f"Value must be an int or float, got {type(value)} instead.",
894
- )
895
-
896
- self.update(value)
897
-
898
- return self
899
-
900
- def update(self, value: float, count: int = 1) -> None:
901
- """
902
- Update the running statistics with a new value.
903
-
904
- :param value: The new value to add to the running statistics.
905
- :param count: The number of times to 'count' for the value.
906
- If not provided, defaults to 1.
907
- """
908
- self.count += count
909
- self.total += value
910
- self.last = value
911
-
912
-
913
- class TimeRunningStats(RunningStats):
914
- """
915
- Create a running statistics object to track the mean, rate, and other
916
- statistics of a stream of time values. This is used to track time values
917
- in milliseconds and seconds.
918
-
919
- Adds time specific computed_fields such as measurements in milliseconds and seconds.
920
- """
921
-
922
- @computed_field # type: ignore[misc]
923
- @property
924
- def total_ms(self) -> float:
925
- """
926
- :return: The total time multiplied by 1000.0 to convert to milliseconds.
927
- """
928
- return self.total * 1000.0
929
-
930
- @computed_field # type: ignore[misc]
931
- @property
932
- def last_ms(self) -> float:
933
- """
934
- :return: The last time multiplied by 1000.0 to convert to milliseconds.
935
- """
936
- return self.last * 1000.0
937
-
938
- @computed_field # type: ignore[misc]
939
- @property
940
- def mean_ms(self) -> float:
941
- """
942
- :return: The mean time multiplied by 1000.0 to convert to milliseconds.
943
- """
944
- return self.mean * 1000.0
945
-
946
- @computed_field # type: ignore[misc]
947
- @property
948
- def rate_ms(self) -> float:
949
- """
950
- :return: The rate of the running statistics multiplied by 1000.0
951
- to convert to milliseconds.
952
- """
953
- return self.rate * 1000.0