guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,927 @@
1
+ """
2
+ Metrics schemas for generative AI benchmark results and performance analysis.
3
+
4
+ This module defines comprehensive metric structures for tracking and analyzing
5
+ generative AI benchmark performance across multiple dimensions including request
6
+ statistics, token metrics, and domain-specific measurements for text, image, video,
7
+ and audio generation. It provides statistical summaries with distribution analysis
8
+ across successful, incomplete, and errored requests, along with scheduler-level
9
+ performance metrics for request processing and queueing behavior.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Literal
15
+
16
+ from pydantic import Field
17
+
18
+ from guidellm.benchmark.schemas.generative.accumulator import (
19
+ GenerativeBenchmarkAccumulator,
20
+ )
21
+ from guidellm.scheduler import SchedulerState
22
+ from guidellm.schemas import (
23
+ GenerativeRequestStats,
24
+ StandardBaseDict,
25
+ StatusBreakdown,
26
+ StatusDistributionSummary,
27
+ )
28
+
29
+ __all__ = [
30
+ "GenerativeAudioMetricsSummary",
31
+ "GenerativeImageMetricsSummary",
32
+ "GenerativeMetrics",
33
+ "GenerativeMetricsSummary",
34
+ "GenerativeTextMetricsSummary",
35
+ "GenerativeVideoMetricsSummary",
36
+ "SchedulerMetrics",
37
+ "StatusTypes",
38
+ "TimedMetricTypeAlias",
39
+ ]
40
+
41
+
42
+ TimedMetricTypeAlias = (
43
+ tuple[float, float, int | float | None, int | float | None] | None
44
+ )
45
+ """Timed metric tuple containing start_time, end_time, input_value, and output_value."""
46
+
47
+ StatusTypes = Literal["successful", "incomplete", "errored"]
48
+ """Request status category for metric compilation."""
49
+
50
+ # Constants for tuple indexing
51
+ _TIMED_METRIC_START_TIME_INDEX = 0
52
+ _TIMED_METRIC_END_TIME_INDEX = 1
53
+ _TIMED_METRIC_INPUT_VALUE_INDEX = 2
54
+ _TIMED_METRIC_OUTPUT_VALUE_INDEX = 3
55
+
56
+
57
+ class SchedulerMetrics(StandardBaseDict):
58
+ """
59
+ Scheduler timing and performance statistics.
60
+
61
+ Tracks overall benchmark timing, request counts by status, and detailed internal
62
+ scheduler performance metrics including queue times, processing delays, and
63
+ request execution statistics. Used to analyze scheduler efficiency and identify
64
+ bottlenecks in request processing pipelines.
65
+ """
66
+
67
+ # Overall timings for the scheduler
68
+ start_time: float = Field(
69
+ description="Unix timestamp when the benchmark run started"
70
+ )
71
+ request_start_time: float = Field(
72
+ description="Unix timestamp when first request was made"
73
+ )
74
+ measure_start_time: float = Field(
75
+ description="Unix timestamp when measurement period started"
76
+ )
77
+ measure_end_time: float = Field(
78
+ description="Unix timestamp when measurement period ended"
79
+ )
80
+ request_end_time: float = Field(
81
+ description="Unix timestamp when last request completed"
82
+ )
83
+ end_time: float = Field(description="Unix timestamp when the benchmark run ended")
84
+
85
+ # Request details tracked by the scheduler
86
+ requests_made: StatusBreakdown[int, int, int, int] = Field(
87
+ description="Request counts by status: successful, incomplete, errored, total"
88
+ )
89
+
90
+ # Scheduler internal performance timings
91
+ queued_time_avg: float = Field(
92
+ description="Avg time requests spent in the queue (seconds)"
93
+ )
94
+ resolve_start_delay_avg: float = Field(
95
+ description="Avg delay before worker begins resolving req after dequeue (sec)"
96
+ )
97
+ resolve_targeted_start_delay_avg: float = Field(
98
+ description="Avg delay to targeted resolve start time (seconds)"
99
+ )
100
+ request_start_delay_avg: float = Field(
101
+ description="Avg delay before request starts after resolve (seconds)"
102
+ )
103
+ request_targeted_start_delay_avg: float = Field(
104
+ description="Avg delay to targeted request start time (seconds)"
105
+ )
106
+ request_time_avg: float = Field(description="Avg request execution time (seconds)")
107
+ resolve_end_delay_avg: float = Field(
108
+ description="Avg delay after request completes before resolve ends (seconds)"
109
+ )
110
+ resolve_time_avg: float = Field(
111
+ description="Avg total resolve time including request (seconds)"
112
+ )
113
+ finalized_delay_avg: float = Field(
114
+ description="Avg delay from resolve end to request finalization (seconds)"
115
+ )
116
+ processed_delay_avg: float = Field(
117
+ description="Avg delay from finalization to processing completion (seconds)"
118
+ )
119
+
120
+ @classmethod
121
+ def compile(
122
+ cls,
123
+ accumulator: GenerativeBenchmarkAccumulator,
124
+ scheduler_state: SchedulerState,
125
+ ) -> SchedulerMetrics:
126
+ """
127
+ Compile scheduler metrics from accumulator and scheduler state.
128
+
129
+ :param accumulator: Benchmark accumulator containing timing and metric data
130
+ :param scheduler_state: Scheduler state with execution timing information
131
+ :return: Compiled scheduler metrics with performance statistics
132
+ """
133
+ return SchedulerMetrics(
134
+ # Overall timings for the scheduler
135
+ start_time=scheduler_state.start_time,
136
+ request_start_time=accumulator.timings.finalized_request_start,
137
+ measure_start_time=accumulator.timings.finalized_measure_start,
138
+ measure_end_time=accumulator.timings.finalized_measure_end,
139
+ request_end_time=accumulator.timings.finalized_request_end,
140
+ end_time=scheduler_state.end_time or -1.0,
141
+ # Request details tracked by the scheduler
142
+ requests_made=accumulator.scheduler_metrics.requests_made,
143
+ # Scheduler internal performance timings
144
+ queued_time_avg=accumulator.scheduler_metrics.queued_time.mean or -1.0,
145
+ resolve_start_delay_avg=(
146
+ accumulator.scheduler_metrics.resolve_start_delay.mean or -1.0
147
+ ),
148
+ resolve_targeted_start_delay_avg=(
149
+ accumulator.scheduler_metrics.resolve_targeted_start_delay.mean or -1.0
150
+ ),
151
+ request_start_delay_avg=(
152
+ accumulator.scheduler_metrics.request_start_delay.mean or -1.0
153
+ ),
154
+ request_targeted_start_delay_avg=(
155
+ accumulator.scheduler_metrics.request_targeted_start_delay.mean or -1.0
156
+ ),
157
+ request_time_avg=accumulator.scheduler_metrics.request_time.mean or -1.0,
158
+ resolve_end_delay_avg=(
159
+ accumulator.scheduler_metrics.resolve_end_delay.mean or -1.0
160
+ ),
161
+ resolve_time_avg=accumulator.scheduler_metrics.resolve_time.mean or -1.0,
162
+ finalized_delay_avg=(
163
+ accumulator.scheduler_metrics.finalized_delay.mean or -1.0
164
+ ),
165
+ processed_delay_avg=(
166
+ accumulator.scheduler_metrics.processed_delay.mean or -1.0
167
+ ),
168
+ )
169
+
170
+
171
+ class GenerativeMetricsSummary(StandardBaseDict):
172
+ """
173
+ Statistical summaries for input, output, and total metrics.
174
+
175
+ Provides distribution summaries across successful, incomplete, and errored
176
+ requests for absolute values, per-second rates, and concurrency levels.
177
+ """
178
+
179
+ input: StatusDistributionSummary | None = Field(
180
+ description="Distribution of input metric values"
181
+ )
182
+ input_per_second: StatusDistributionSummary | None = Field(
183
+ description="Distribution of input metric rates per second"
184
+ )
185
+ input_concurrency: StatusDistributionSummary | None = Field(
186
+ description="Distribution of concurrent input metric values"
187
+ )
188
+
189
+ output: StatusDistributionSummary | None = Field(
190
+ description="Distribution of output metric values"
191
+ )
192
+ output_per_second: StatusDistributionSummary | None = Field(
193
+ description="Distribution of output metric rates per second"
194
+ )
195
+ output_concurrency: StatusDistributionSummary | None = Field(
196
+ description="Distribution of concurrent output metric values"
197
+ )
198
+
199
+ total: StatusDistributionSummary | None = Field(
200
+ description="Distribution of total metric values (input + output)"
201
+ )
202
+ total_per_second: StatusDistributionSummary | None = Field(
203
+ description="Distribution of total metric rates per second"
204
+ )
205
+ total_concurrency: StatusDistributionSummary | None = Field(
206
+ description="Distribution of concurrent total metric values"
207
+ )
208
+
209
+ @classmethod
210
+ def compile(
211
+ cls,
212
+ property_name: str,
213
+ successful: list[GenerativeRequestStats],
214
+ incomplete: list[GenerativeRequestStats],
215
+ errored: list[GenerativeRequestStats],
216
+ ) -> GenerativeMetricsSummary | None:
217
+ """
218
+ Compile metrics summary from request statistics for a specific property.
219
+
220
+ :param property_name: Name of the property to extract from request metrics
221
+ :param successful: Successfully completed request statistics
222
+ :param incomplete: Incomplete or cancelled request statistics
223
+ :param errored: Failed request statistics
224
+ :return: Compiled metrics summary or None if no data available
225
+ """
226
+ successful_metrics = cls.extract_property_metrics_for_summary(
227
+ successful, property_name
228
+ )
229
+ incomplete_metrics = cls.extract_property_metrics_for_summary(
230
+ incomplete, property_name
231
+ )
232
+ errored_metrics = cls.extract_property_metrics_for_summary(
233
+ errored, property_name
234
+ )
235
+
236
+ return cls.compile_timed_metrics(
237
+ successful=successful_metrics,
238
+ incomplete=incomplete_metrics,
239
+ errored=errored_metrics,
240
+ )
241
+
242
+ @classmethod
243
+ def compile_timed_metrics(
244
+ cls,
245
+ successful: list[TimedMetricTypeAlias],
246
+ incomplete: list[TimedMetricTypeAlias],
247
+ errored: list[TimedMetricTypeAlias],
248
+ ) -> GenerativeMetricsSummary | None:
249
+ """
250
+ Compile metrics summary from timed metric tuples.
251
+
252
+ :param successful: Timed metrics from successful requests
253
+ :param incomplete: Timed metrics from incomplete requests
254
+ :param errored: Timed metrics from errored requests
255
+ :return: Compiled metrics summary or None if no data available
256
+ """
257
+
258
+ def _compile_metric_distributions(
259
+ metrics_by_status: dict[StatusTypes, list[TimedMetricTypeAlias]],
260
+ value_index: int,
261
+ ) -> tuple[
262
+ StatusDistributionSummary | None,
263
+ StatusDistributionSummary | None,
264
+ StatusDistributionSummary | None,
265
+ dict[StatusTypes, list[float]],
266
+ dict[StatusTypes, list[tuple[float, float]]],
267
+ dict[StatusTypes, list[tuple[float, float, float]]],
268
+ ]:
269
+ """Helper to compile value, rate, and concurrency distributions."""
270
+ value_lists: dict[StatusTypes, list[float]] = {
271
+ status: [
272
+ float(metric[value_index] or 0.0)
273
+ for metric in metrics
274
+ if metric is not None
275
+ ]
276
+ for status, metrics in metrics_by_status.items()
277
+ }
278
+ value_dist = StatusDistributionSummary.from_values(
279
+ successful=value_lists["successful"],
280
+ incomplete=value_lists["incomplete"],
281
+ errored=value_lists["errored"],
282
+ )
283
+
284
+ if value_dist.total_sum == 0.0:
285
+ return None, None, None, value_lists, {}, {}
286
+
287
+ rate_lists: dict[StatusTypes, list[tuple[float, float]]] = {
288
+ status: [
289
+ ( # type: ignore[misc]
290
+ metric[_TIMED_METRIC_END_TIME_INDEX],
291
+ float(metric[value_index] or 0.0),
292
+ )
293
+ for metric in metrics
294
+ if metric is not None
295
+ ]
296
+ for status, metrics in metrics_by_status.items()
297
+ }
298
+ rate_dist = StatusDistributionSummary.rate_distribution_from_timings(
299
+ successful=rate_lists["successful"],
300
+ incomplete=rate_lists["incomplete"],
301
+ errored=rate_lists["errored"],
302
+ )
303
+
304
+ concurrency_lists: dict[StatusTypes, list[tuple[float, float, float]]] = {
305
+ status: [
306
+ ( # type: ignore[misc]
307
+ metric[_TIMED_METRIC_START_TIME_INDEX],
308
+ metric[_TIMED_METRIC_END_TIME_INDEX],
309
+ float(metric[value_index] or 0.0),
310
+ )
311
+ for metric in metrics
312
+ if metric is not None
313
+ ]
314
+ for status, metrics in metrics_by_status.items()
315
+ }
316
+ concurrency_dist = (
317
+ StatusDistributionSummary.concurrency_distribution_from_timings(
318
+ successful=concurrency_lists["successful"],
319
+ incomplete=concurrency_lists["incomplete"],
320
+ errored=concurrency_lists["errored"],
321
+ )
322
+ )
323
+
324
+ return (
325
+ value_dist,
326
+ rate_dist,
327
+ concurrency_dist,
328
+ value_lists,
329
+ rate_lists,
330
+ concurrency_lists,
331
+ )
332
+
333
+ metrics_by_status: dict[StatusTypes, list[TimedMetricTypeAlias]] = {
334
+ "successful": successful,
335
+ "incomplete": incomplete,
336
+ "errored": errored,
337
+ }
338
+
339
+ # Calculate input distributions
340
+ (
341
+ input_value_dist,
342
+ input_rate_dist,
343
+ input_concurrency_dist,
344
+ input_value_lists,
345
+ input_rate_lists,
346
+ input_concurrency_lists,
347
+ ) = _compile_metric_distributions(
348
+ metrics_by_status, _TIMED_METRIC_INPUT_VALUE_INDEX
349
+ )
350
+
351
+ # Calculate output distributions
352
+ (
353
+ output_value_dist,
354
+ output_rate_dist,
355
+ output_concurrency_dist,
356
+ output_value_lists,
357
+ output_rate_lists,
358
+ output_concurrency_lists,
359
+ ) = _compile_metric_distributions(
360
+ metrics_by_status, _TIMED_METRIC_OUTPUT_VALUE_INDEX
361
+ )
362
+
363
+ # Calculate total distributions if both input and output have data
364
+ if input_value_dist is not None and output_value_dist is not None:
365
+ total_value_dist = StatusDistributionSummary.from_values(
366
+ successful=(
367
+ input_value_lists["successful"] + output_value_lists["successful"]
368
+ ),
369
+ incomplete=(
370
+ input_value_lists["incomplete"] + output_value_lists["incomplete"]
371
+ ),
372
+ errored=input_value_lists["errored"] + output_value_lists["errored"],
373
+ )
374
+ total_rate_dist = StatusDistributionSummary.rate_distribution_from_timings(
375
+ successful=(
376
+ input_rate_lists["successful"] + output_rate_lists["successful"]
377
+ ),
378
+ incomplete=(
379
+ input_rate_lists["incomplete"] + output_rate_lists["incomplete"]
380
+ ),
381
+ errored=input_rate_lists["errored"] + output_rate_lists["errored"],
382
+ )
383
+ total_concurrency_dist = (
384
+ StatusDistributionSummary.concurrency_distribution_from_timings(
385
+ successful=(
386
+ input_concurrency_lists["successful"]
387
+ + output_concurrency_lists["successful"]
388
+ ),
389
+ incomplete=(
390
+ input_concurrency_lists["incomplete"]
391
+ + output_concurrency_lists["incomplete"]
392
+ ),
393
+ errored=(
394
+ input_concurrency_lists["errored"]
395
+ + output_concurrency_lists["errored"]
396
+ ),
397
+ )
398
+ )
399
+ else:
400
+ total_value_dist = None
401
+ total_rate_dist = None
402
+ total_concurrency_dist = None
403
+
404
+ return GenerativeMetricsSummary(
405
+ input=input_value_dist,
406
+ input_per_second=input_rate_dist,
407
+ input_concurrency=input_concurrency_dist,
408
+ output=output_value_dist,
409
+ output_per_second=output_rate_dist,
410
+ output_concurrency=output_concurrency_dist,
411
+ total=total_value_dist,
412
+ total_per_second=total_rate_dist,
413
+ total_concurrency=total_concurrency_dist,
414
+ )
415
+
416
+ @classmethod
417
+ def extract_property_metrics_for_summary(
418
+ cls, stats_list: list[GenerativeRequestStats], property_name: str
419
+ ) -> list[TimedMetricTypeAlias]:
420
+ """
421
+ Extract timed metrics for a specific property from request statistics.
422
+
423
+ :param stats_list: List of request statistics to extract from
424
+ :param property_name: Name of the property to extract from metrics
425
+ :return: List of tuples containing
426
+ (start_time, end_time, input_value, output_value)
427
+ """
428
+ return [
429
+ (
430
+ stats.request_start_time,
431
+ stats.request_end_time,
432
+ getattr(stats.input_metrics, property_name),
433
+ getattr(stats.output_metrics, property_name),
434
+ )
435
+ for stats in stats_list
436
+ if (
437
+ stats.request_start_time
438
+ and stats.request_end_time
439
+ and (
440
+ getattr(stats.input_metrics, property_name) is not None
441
+ or getattr(stats.output_metrics, property_name) is not None
442
+ )
443
+ )
444
+ ]
445
+
446
+
447
+ class GenerativeTextMetricsSummary(StandardBaseDict):
448
+ """
449
+ Text-specific metric summaries for generative benchmarks.
450
+
451
+ Tracks token, word, and character-level metrics across input, output, and
452
+ total usage for text generation workloads.
453
+ """
454
+
455
+ tokens: GenerativeMetricsSummary | None = Field(
456
+ description="Token count metrics and distributions"
457
+ )
458
+ words: GenerativeMetricsSummary | None = Field(
459
+ description="Word count metrics and distributions"
460
+ )
461
+ characters: GenerativeMetricsSummary | None = Field(
462
+ description="Character count metrics and distributions"
463
+ )
464
+
465
+ @classmethod
466
+ def compile(
467
+ cls,
468
+ successful: list[GenerativeRequestStats],
469
+ incomplete: list[GenerativeRequestStats],
470
+ errored: list[GenerativeRequestStats],
471
+ ) -> GenerativeTextMetricsSummary:
472
+ """
473
+ Compile text metrics summary from request statistics.
474
+
475
+ :param successful: Successfully completed request statistics
476
+ :param incomplete: Incomplete/cancelled request statistics
477
+ :param errored: Failed request statistics
478
+ :return: Compiled text metrics summary
479
+ """
480
+ return GenerativeTextMetricsSummary(
481
+ tokens=GenerativeMetricsSummary.compile(
482
+ property_name="text_tokens",
483
+ successful=successful,
484
+ incomplete=incomplete,
485
+ errored=errored,
486
+ ),
487
+ words=GenerativeMetricsSummary.compile(
488
+ property_name="text_words",
489
+ successful=successful,
490
+ incomplete=incomplete,
491
+ errored=errored,
492
+ ),
493
+ characters=GenerativeMetricsSummary.compile(
494
+ property_name="text_characters",
495
+ successful=successful,
496
+ incomplete=incomplete,
497
+ errored=errored,
498
+ ),
499
+ )
500
+
501
+
502
+ class GenerativeImageMetricsSummary(StandardBaseDict):
503
+ """
504
+ Image-specific metric summaries for generative benchmarks.
505
+
506
+ Tracks token, image count, pixel, and byte-level metrics across input, output,
507
+ and total usage for image generation workloads.
508
+ """
509
+
510
+ tokens: GenerativeMetricsSummary | None = Field(
511
+ description="Image token count metrics and distributions"
512
+ )
513
+ images: GenerativeMetricsSummary | None = Field(
514
+ description="Image count metrics and distributions"
515
+ )
516
+ pixels: GenerativeMetricsSummary | None = Field(
517
+ description="Pixel count metrics and distributions"
518
+ )
519
+ bytes: GenerativeMetricsSummary | None = Field(
520
+ description="Byte size metrics and distributions"
521
+ )
522
+
523
+ @classmethod
524
+ def compile(
525
+ cls,
526
+ successful: list[GenerativeRequestStats],
527
+ incomplete: list[GenerativeRequestStats],
528
+ errored: list[GenerativeRequestStats],
529
+ ) -> GenerativeImageMetricsSummary:
530
+ """
531
+ Compile image metrics summary from request statistics.
532
+
533
+ :param successful: Successfully completed request statistics
534
+ :param incomplete: Incomplete/cancelled request statistics
535
+ :param errored: Failed request statistics
536
+ :return: Compiled image metrics summary
537
+ """
538
+ return GenerativeImageMetricsSummary(
539
+ tokens=GenerativeMetricsSummary.compile(
540
+ property_name="image_tokens",
541
+ successful=successful,
542
+ incomplete=incomplete,
543
+ errored=errored,
544
+ ),
545
+ images=GenerativeMetricsSummary.compile(
546
+ property_name="image_count",
547
+ successful=successful,
548
+ incomplete=incomplete,
549
+ errored=errored,
550
+ ),
551
+ pixels=GenerativeMetricsSummary.compile(
552
+ property_name="image_pixels",
553
+ successful=successful,
554
+ incomplete=incomplete,
555
+ errored=errored,
556
+ ),
557
+ bytes=GenerativeMetricsSummary.compile(
558
+ property_name="image_bytes",
559
+ successful=successful,
560
+ incomplete=incomplete,
561
+ errored=errored,
562
+ ),
563
+ )
564
+
565
+
566
+ class GenerativeVideoMetricsSummary(StandardBaseDict):
567
+ """
568
+ Video-specific metric summaries for generative benchmarks.
569
+
570
+ Tracks token, frame count, duration, and byte-level metrics across input,
571
+ output, and total usage for video generation workloads.
572
+ """
573
+
574
+ tokens: GenerativeMetricsSummary | None = Field(
575
+ description="Video token count metrics and distributions"
576
+ )
577
+ frames: GenerativeMetricsSummary | None = Field(
578
+ description="Frame count metrics and distributions"
579
+ )
580
+ seconds: GenerativeMetricsSummary | None = Field(
581
+ description="Duration metrics in seconds and distributions"
582
+ )
583
+ bytes: GenerativeMetricsSummary | None = Field(
584
+ description="Byte size metrics and distributions"
585
+ )
586
+
587
+ @classmethod
588
+ def compile(
589
+ cls,
590
+ successful: list[GenerativeRequestStats],
591
+ incomplete: list[GenerativeRequestStats],
592
+ errored: list[GenerativeRequestStats],
593
+ ) -> GenerativeVideoMetricsSummary:
594
+ """
595
+ Compile video metrics summary from request statistics.
596
+
597
+ :param successful: Successfully completed request statistics
598
+ :param incomplete: Incomplete/cancelled request statistics
599
+ :param errored: Failed request statistics
600
+ :return: Compiled video metrics summary
601
+ """
602
+ return GenerativeVideoMetricsSummary(
603
+ tokens=GenerativeMetricsSummary.compile(
604
+ property_name="video_tokens",
605
+ successful=successful,
606
+ incomplete=incomplete,
607
+ errored=errored,
608
+ ),
609
+ frames=GenerativeMetricsSummary.compile(
610
+ property_name="video_frames",
611
+ successful=successful,
612
+ incomplete=incomplete,
613
+ errored=errored,
614
+ ),
615
+ seconds=GenerativeMetricsSummary.compile(
616
+ property_name="video_seconds",
617
+ successful=successful,
618
+ incomplete=incomplete,
619
+ errored=errored,
620
+ ),
621
+ bytes=GenerativeMetricsSummary.compile(
622
+ property_name="video_bytes",
623
+ successful=successful,
624
+ incomplete=incomplete,
625
+ errored=errored,
626
+ ),
627
+ )
628
+
629
+
630
+ class GenerativeAudioMetricsSummary(StandardBaseDict):
631
+ """
632
+ Audio-specific metric summaries for generative benchmarks.
633
+
634
+ Tracks token, sample count, duration, and byte-level metrics across input,
635
+ output, and total usage for audio generation workloads.
636
+ """
637
+
638
+ tokens: GenerativeMetricsSummary | None = Field(
639
+ description="Audio token count metrics and distributions"
640
+ )
641
+ samples: GenerativeMetricsSummary | None = Field(
642
+ description="Sample count metrics and distributions"
643
+ )
644
+ seconds: GenerativeMetricsSummary | None = Field(
645
+ description="Duration metrics in seconds and distributions"
646
+ )
647
+ bytes: GenerativeMetricsSummary | None = Field(
648
+ description="Byte size metrics and distributions"
649
+ )
650
+
651
+ @classmethod
652
+ def compile(
653
+ cls,
654
+ successful: list[GenerativeRequestStats],
655
+ incomplete: list[GenerativeRequestStats],
656
+ errored: list[GenerativeRequestStats],
657
+ ) -> GenerativeAudioMetricsSummary:
658
+ """
659
+ Compile audio metrics summary from request statistics.
660
+
661
+ :param successful: Successfully completed request statistics
662
+ :param incomplete: Incomplete/cancelled request statistics
663
+ :param errored: Failed request statistics
664
+ :return: Compiled audio metrics summary
665
+ """
666
+ return GenerativeAudioMetricsSummary(
667
+ tokens=GenerativeMetricsSummary.compile(
668
+ property_name="audio_tokens",
669
+ successful=successful,
670
+ incomplete=incomplete,
671
+ errored=errored,
672
+ ),
673
+ samples=GenerativeMetricsSummary.compile(
674
+ property_name="audio_samples",
675
+ successful=successful,
676
+ incomplete=incomplete,
677
+ errored=errored,
678
+ ),
679
+ seconds=GenerativeMetricsSummary.compile(
680
+ property_name="audio_seconds",
681
+ successful=successful,
682
+ incomplete=incomplete,
683
+ errored=errored,
684
+ ),
685
+ bytes=GenerativeMetricsSummary.compile(
686
+ property_name="audio_bytes",
687
+ successful=successful,
688
+ incomplete=incomplete,
689
+ errored=errored,
690
+ ),
691
+ )
692
+
693
+
694
+ class GenerativeMetrics(StandardBaseDict):
695
+ """
696
+ Comprehensive metrics for generative AI benchmarks.
697
+
698
+ Aggregates request statistics, token metrics, timing distributions, and
699
+ domain-specific measurements across text, image, video, and audio modalities.
700
+ Provides detailed statistical summaries including distribution analysis for
701
+ throughput, latency, concurrency, and resource utilization metrics across
702
+ successful, incomplete, and errored requests.
703
+ """
704
+
705
+ # Request stats
706
+ request_totals: StatusBreakdown[int, int, int, int] = Field(
707
+ description="Request counts by status: successful, incomplete, errored, total"
708
+ )
709
+ requests_per_second: StatusDistributionSummary = Field(
710
+ description="Distribution of requests per second across benchmark execution"
711
+ )
712
+ request_concurrency: StatusDistributionSummary = Field(
713
+ description="Distribution of concurrent request counts during execution"
714
+ )
715
+ request_latency: StatusDistributionSummary = Field(
716
+ description="Distribution of request latencies for completed requests"
717
+ )
718
+ request_streaming_iterations_count: StatusDistributionSummary = Field(
719
+ description="Distribution of stream iterations for completed requests"
720
+ )
721
+
722
+ # General token stats
723
+ prompt_token_count: StatusDistributionSummary = Field(
724
+ description="Distribution of prompt token counts by request status"
725
+ )
726
+ output_token_count: StatusDistributionSummary = Field(
727
+ description="Distribution of output token counts by request status"
728
+ )
729
+ total_token_count: StatusDistributionSummary = Field(
730
+ description="Distribution of total token counts by request status"
731
+ )
732
+ time_to_first_token_ms: StatusDistributionSummary = Field(
733
+ description="Distribution of first token latencies in milliseconds"
734
+ )
735
+ time_per_output_token_ms: StatusDistributionSummary = Field(
736
+ description="Distribution of average time per output token in milliseconds"
737
+ )
738
+ inter_token_latency_ms: StatusDistributionSummary = Field(
739
+ description="Distribution of inter-token latencies in milliseconds"
740
+ )
741
+ prompt_tokens_per_second: StatusDistributionSummary = Field(
742
+ description="Distribution of prompt token processing rates"
743
+ )
744
+ output_tokens_per_second: StatusDistributionSummary = Field(
745
+ description="Distribution of output token generation rates"
746
+ )
747
+ tokens_per_second: StatusDistributionSummary = Field(
748
+ description="Distribution of total token throughput including prompt and output"
749
+ )
750
+ output_tokens_per_iteration: StatusDistributionSummary = Field(
751
+ description="Distribution of output tokens generated per streaming iteration"
752
+ )
753
+ iter_tokens_per_iteration: StatusDistributionSummary = Field(
754
+ description=(
755
+ "Distribution of output tokens (without first) generated per "
756
+ "streaming iteration"
757
+ )
758
+ )
759
+
760
+ # Domain specific stats
761
+ text: GenerativeTextMetricsSummary = Field(
762
+ description="Text-specific metrics for tokens, words, and characters"
763
+ )
764
+ image: GenerativeImageMetricsSummary = Field(
765
+ description="Image-specific metrics for tokens, images, pixels, and bytes"
766
+ )
767
+ video: GenerativeVideoMetricsSummary = Field(
768
+ description="Video-specific metrics for tokens, frames, duration, and bytes"
769
+ )
770
+ audio: GenerativeAudioMetricsSummary = Field(
771
+ description="Audio-specific metrics for tokens, samples, duration, and bytes"
772
+ )
773
+
774
+ @classmethod
775
+ def compile(cls, accumulator: GenerativeBenchmarkAccumulator) -> GenerativeMetrics:
776
+ """
777
+ Compile comprehensive generative metrics from benchmark accumulator.
778
+
779
+ :param accumulator: Benchmark accumulator with completed request statistics
780
+ :return: Compiled generative metrics with all distributions and summaries
781
+ :raises ValueError: If measure_start and measure_end/request_end are not set
782
+ """
783
+ start_time = accumulator.timings.finalized_measure_start
784
+ end_time = accumulator.timings.finalized_measure_end
785
+
786
+ if start_time == -1.0 or end_time == -1.0:
787
+ raise ValueError(
788
+ "Cannot compile GenerativeMetrics: "
789
+ "No measurement start or end times available."
790
+ )
791
+
792
+ successful = accumulator.completed.get_within_range(start_time, end_time)
793
+ incomplete = accumulator.incomplete.get_within_range(start_time, end_time)
794
+ errored = accumulator.errored.get_within_range(start_time, end_time)
795
+
796
+ return GenerativeMetrics(
797
+ # Request stats
798
+ request_totals=StatusBreakdown(
799
+ successful=len(successful),
800
+ incomplete=len(incomplete),
801
+ errored=len(errored),
802
+ total=(len(successful) + len(incomplete) + len(errored)),
803
+ ),
804
+ requests_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
805
+ function=lambda req: req.request_end_time,
806
+ successful=successful,
807
+ incomplete=incomplete,
808
+ errored=errored,
809
+ start_time=start_time,
810
+ end_time=end_time,
811
+ ),
812
+ request_concurrency=StatusDistributionSummary.concurrency_distribution_from_timings_function(
813
+ function=(
814
+ lambda req: (req.request_start_time, req.request_end_time)
815
+ if req.request_start_time is not None
816
+ and req.request_end_time is not None
817
+ else None
818
+ ),
819
+ successful=successful,
820
+ incomplete=incomplete,
821
+ errored=errored,
822
+ start_time=start_time,
823
+ end_time=end_time,
824
+ ),
825
+ request_latency=StatusDistributionSummary.from_values_function(
826
+ function=lambda req: req.request_latency or 0.0,
827
+ successful=successful,
828
+ incomplete=incomplete,
829
+ errored=errored,
830
+ ),
831
+ request_streaming_iterations_count=StatusDistributionSummary.from_values_function(
832
+ function=lambda req: req.info.timings.request_iterations or 0.0,
833
+ successful=successful,
834
+ incomplete=incomplete,
835
+ errored=errored,
836
+ ),
837
+ # General token stats
838
+ prompt_token_count=StatusDistributionSummary.from_values_function(
839
+ function=lambda req: req.prompt_tokens or 0.0,
840
+ successful=successful,
841
+ incomplete=incomplete,
842
+ errored=errored,
843
+ ),
844
+ output_token_count=StatusDistributionSummary.from_values_function(
845
+ function=lambda req: req.output_tokens or 0.0,
846
+ successful=successful,
847
+ incomplete=incomplete,
848
+ errored=errored,
849
+ ),
850
+ total_token_count=StatusDistributionSummary.from_values_function(
851
+ function=lambda req: req.total_tokens or 0.0,
852
+ successful=successful,
853
+ incomplete=incomplete,
854
+ errored=errored,
855
+ ),
856
+ time_to_first_token_ms=StatusDistributionSummary.from_values_function(
857
+ function=lambda req: req.time_to_first_token_ms or 0.0,
858
+ successful=successful,
859
+ incomplete=incomplete,
860
+ errored=errored,
861
+ ),
862
+ time_per_output_token_ms=StatusDistributionSummary.from_values_function(
863
+ function=lambda req: (
864
+ req.time_per_output_token_ms or 0.0,
865
+ req.output_tokens or 0.0,
866
+ ),
867
+ successful=successful,
868
+ incomplete=incomplete,
869
+ errored=errored,
870
+ ),
871
+ inter_token_latency_ms=StatusDistributionSummary.from_values_function(
872
+ function=lambda req: (
873
+ req.inter_token_latency_ms or 0.0,
874
+ (req.output_tokens or 1.0) - 1.0,
875
+ ),
876
+ successful=successful,
877
+ incomplete=incomplete,
878
+ errored=errored,
879
+ ),
880
+ prompt_tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
881
+ function=lambda req: req.prompt_tokens_timing,
882
+ successful=successful,
883
+ incomplete=incomplete,
884
+ errored=errored,
885
+ ),
886
+ output_tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
887
+ function=lambda req: req.output_tokens_timings,
888
+ successful=successful,
889
+ incomplete=incomplete,
890
+ errored=errored,
891
+ ),
892
+ tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
893
+ function=lambda req: req.total_tokens_timings,
894
+ successful=successful,
895
+ incomplete=incomplete,
896
+ errored=errored,
897
+ ),
898
+ output_tokens_per_iteration=StatusDistributionSummary.from_values_function(
899
+ function=lambda req: [
900
+ tokens for (_timing, tokens) in req.output_tokens_timings
901
+ ],
902
+ successful=successful,
903
+ incomplete=incomplete,
904
+ errored=errored,
905
+ ),
906
+ iter_tokens_per_iteration=StatusDistributionSummary.from_values_function(
907
+ function=lambda req: [
908
+ tokens for (_timing, tokens) in req.iter_tokens_timings
909
+ ],
910
+ successful=successful,
911
+ incomplete=incomplete,
912
+ errored=errored,
913
+ ),
914
+ # Domain-specific stats
915
+ text=GenerativeTextMetricsSummary.compile(
916
+ successful=successful, incomplete=incomplete, errored=errored
917
+ ),
918
+ image=GenerativeImageMetricsSummary.compile(
919
+ successful=successful, incomplete=incomplete, errored=errored
920
+ ),
921
+ video=GenerativeVideoMetricsSummary.compile(
922
+ successful=successful, incomplete=incomplete, errored=errored
923
+ ),
924
+ audio=GenerativeAudioMetricsSummary.compile(
925
+ successful=successful, incomplete=incomplete, errored=errored
926
+ ),
927
+ )