guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,721 @@
1
+ """
2
+ CSV output formatter for benchmark results.
3
+
4
+ This module provides the GenerativeBenchmarkerCSV class which exports benchmark
5
+ reports to CSV format with comprehensive metrics including timing, throughput,
6
+ latency, modality data, and scheduler information. The CSV output uses multi-row
7
+ headers to organize metrics hierarchically and includes both summary statistics
8
+ and distribution percentiles.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import csv
14
+ import json
15
+ from pathlib import Path
16
+ from typing import Annotated, Any, ClassVar, Literal
17
+
18
+ from pydantic import Field
19
+
20
+ from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput
21
+ from guidellm.benchmark.schemas import GenerativeBenchmark, GenerativeBenchmarksReport
22
+ from guidellm.schemas import DistributionSummary, StatusDistributionSummary
23
+ from guidellm.utils import safe_format_timestamp
24
+
25
+ __all__ = ["GenerativeBenchmarkerCSV"]
26
+
27
+ TIMESTAMP_FORMAT: Annotated[str, "Format string for timestamp output in CSV files"] = (
28
+ "%Y-%m-%d %H:%M:%S"
29
+ )
30
+ MODALITY_METRICS: Annotated[
31
+ dict[str, list[tuple[str, str]]],
32
+ "Mapping of modality types to their metric names and display labels",
33
+ ] = {
34
+ "text": [
35
+ ("tokens", "Tokens"),
36
+ ("words", "Words"),
37
+ ("characters", "Characters"),
38
+ ],
39
+ "image": [
40
+ ("tokens", "Tokens"),
41
+ ("images", "Images"),
42
+ ("pixels", "Pixels"),
43
+ ("bytes", "Bytes"),
44
+ ],
45
+ "video": [
46
+ ("tokens", "Tokens"),
47
+ ("frames", "Frames"),
48
+ ("seconds", "Seconds"),
49
+ ("bytes", "Bytes"),
50
+ ],
51
+ "audio": [
52
+ ("tokens", "Tokens"),
53
+ ("samples", "Samples"),
54
+ ("seconds", "Seconds"),
55
+ ("bytes", "Bytes"),
56
+ ],
57
+ }
58
+
59
+
60
+ @GenerativeBenchmarkerOutput.register("csv")
61
+ class GenerativeBenchmarkerCSV(GenerativeBenchmarkerOutput):
62
+ """
63
+ CSV output formatter for benchmark results.
64
+
65
+ Exports comprehensive benchmark data to CSV format with multi-row headers
66
+ organizing metrics into categories including run information, timing, request
67
+ counts, latency, throughput, modality-specific data, and scheduler state. Each
68
+ benchmark run becomes a row with statistical distributions represented as
69
+ mean, median, standard deviation, and percentiles.
70
+
71
+ :cvar DEFAULT_FILE: Default filename for CSV output
72
+ """
73
+
74
+ DEFAULT_FILE: ClassVar[str] = "benchmarks.csv"
75
+
76
+ @classmethod
77
+ def validated_kwargs(
78
+ cls, output_path: str | Path | None, **_kwargs
79
+ ) -> dict[str, Any]:
80
+ """
81
+ Validate and normalize constructor keyword arguments.
82
+
83
+ :param output_path: Path for CSV output file or directory
84
+ :param _kwargs: Additional keyword arguments (ignored)
85
+ :return: Normalized keyword arguments dictionary
86
+ """
87
+ new_kwargs = {}
88
+ if output_path is not None:
89
+ new_kwargs["output_path"] = (
90
+ Path(output_path) if not isinstance(output_path, Path) else output_path
91
+ )
92
+ return new_kwargs
93
+
94
+ output_path: Path = Field(
95
+ default_factory=lambda: Path.cwd(),
96
+ description=(
97
+ "Path where the CSV file will be saved, defaults to current directory"
98
+ ),
99
+ )
100
+
101
+ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
102
+ """
103
+ Save the benchmark report as a CSV file.
104
+
105
+ :param report: The completed benchmark report
106
+ :return: Path to the saved CSV file
107
+ """
108
+ output_path = self.output_path
109
+ if output_path.is_dir():
110
+ output_path = output_path / GenerativeBenchmarkerCSV.DEFAULT_FILE
111
+ output_path.parent.mkdir(parents=True, exist_ok=True)
112
+
113
+ with output_path.open("w", newline="") as file:
114
+ writer = csv.writer(file)
115
+ headers: list[list[str]] = []
116
+ rows: list[list[str | int | float]] = []
117
+
118
+ for benchmark in report.benchmarks:
119
+ benchmark_headers: list[list[str]] = []
120
+ benchmark_values: list[str | int | float] = []
121
+
122
+ self._add_run_info(benchmark, benchmark_headers, benchmark_values)
123
+ self._add_benchmark_info(benchmark, benchmark_headers, benchmark_values)
124
+ self._add_timing_info(benchmark, benchmark_headers, benchmark_values)
125
+ self._add_request_counts(benchmark, benchmark_headers, benchmark_values)
126
+ self._add_request_latency_metrics(
127
+ benchmark, benchmark_headers, benchmark_values
128
+ )
129
+ self._add_server_throughput_metrics(
130
+ benchmark, benchmark_headers, benchmark_values
131
+ )
132
+ for modality_name in ["text", "image", "video", "audio"]:
133
+ self._add_modality_metrics(
134
+ benchmark,
135
+ modality_name, # type: ignore[arg-type]
136
+ benchmark_headers,
137
+ benchmark_values,
138
+ )
139
+ self._add_scheduler_info(benchmark, benchmark_headers, benchmark_values)
140
+ self._add_runtime_info(report, benchmark_headers, benchmark_values)
141
+
142
+ if not headers:
143
+ headers = benchmark_headers
144
+ rows.append(benchmark_values)
145
+
146
+ self._write_multirow_header(writer, headers)
147
+ for row in rows:
148
+ writer.writerow(row)
149
+
150
+ return output_path
151
+
152
+ def _write_multirow_header(self, writer: Any, headers: list[list[str]]) -> None:
153
+ """
154
+ Write multi-row header to CSV for hierarchical metric organization.
155
+
156
+ :param writer: CSV writer instance
157
+ :param headers: List of column header hierarchies as string lists
158
+ """
159
+ max_rows = max((len(col) for col in headers), default=0)
160
+ for row_idx in range(max_rows):
161
+ row = [col[row_idx] if row_idx < len(col) else "" for col in headers]
162
+ writer.writerow(row)
163
+
164
+ def _add_field(
165
+ self,
166
+ headers: list[list[str]],
167
+ values: list[str | int | float],
168
+ group: str,
169
+ field_name: str,
170
+ value: Any,
171
+ units: str = "",
172
+ ) -> None:
173
+ """
174
+ Add a single field to headers and values lists.
175
+
176
+ :param headers: List of header hierarchies to append to
177
+ :param values: List of values to append to
178
+ :param group: Top-level category for the field
179
+ :param field_name: Name of the field
180
+ :param value: Value for the field
181
+ :param units: Optional units for the field
182
+ """
183
+ headers.append([group, field_name, units])
184
+ values.append(value)
185
+
186
+ def _add_runtime_info(
187
+ self,
188
+ report: GenerativeBenchmarksReport,
189
+ headers: list[list[str]],
190
+ values: list[str | int | float],
191
+ ) -> None:
192
+ """
193
+ Add global metadata and environment information.
194
+
195
+ :param report: Benchmark report to extract global info from
196
+ :param headers: List of header hierarchies to append to
197
+ :param values: List of values to append to
198
+ """
199
+ self._add_field(
200
+ headers,
201
+ values,
202
+ "Runtime Info",
203
+ "Metadata",
204
+ report.metadata.model_dump_json(),
205
+ )
206
+ self._add_field(
207
+ headers,
208
+ values,
209
+ "Runtime Info",
210
+ "Arguments",
211
+ report.args.model_dump_json(),
212
+ )
213
+
214
+ def _add_run_info(
215
+ self,
216
+ benchmark: GenerativeBenchmark,
217
+ headers: list[list[str]],
218
+ values: list[str | int | float],
219
+ ) -> None:
220
+ """
221
+ Add overall run identification and configuration information.
222
+
223
+ :param benchmark: Benchmark data to extract run info from
224
+ :param headers: List of header hierarchies to append to
225
+ :param values: List of values to append to
226
+ """
227
+ self._add_field(headers, values, "Run Info", "Run ID", benchmark.config.run_id)
228
+ self._add_field(
229
+ headers, values, "Run Info", "Run Index", benchmark.config.run_index
230
+ )
231
+ self._add_field(
232
+ headers,
233
+ values,
234
+ "Run Info",
235
+ "Profile",
236
+ benchmark.config.profile.model_dump_json(),
237
+ )
238
+ self._add_field(
239
+ headers,
240
+ values,
241
+ "Run Info",
242
+ "Requests",
243
+ json.dumps(benchmark.config.requests),
244
+ )
245
+ self._add_field(
246
+ headers, values, "Run Info", "Backend", json.dumps(benchmark.config.backend)
247
+ )
248
+ self._add_field(
249
+ headers,
250
+ values,
251
+ "Run Info",
252
+ "Environment",
253
+ json.dumps(benchmark.config.environment),
254
+ )
255
+
256
+ def _add_benchmark_info(
257
+ self,
258
+ benchmark: GenerativeBenchmark,
259
+ headers: list[list[str]],
260
+ values: list[str | int | float],
261
+ ) -> None:
262
+ """
263
+ Add individual benchmark configuration details.
264
+
265
+ :param benchmark: Benchmark data to extract configuration from
266
+ :param headers: List of header hierarchies to append to
267
+ :param values: List of values to append to
268
+ """
269
+ self._add_field(headers, values, "Benchmark", "Type", benchmark.type_)
270
+ self._add_field(headers, values, "Benchmark", "ID", benchmark.config.id_)
271
+ self._add_field(
272
+ headers, values, "Benchmark", "Strategy", benchmark.config.strategy.type_
273
+ )
274
+ self._add_field(
275
+ headers,
276
+ values,
277
+ "Benchmark",
278
+ "Constraints",
279
+ json.dumps(benchmark.config.constraints),
280
+ )
281
+
282
+ def _add_timing_info(
283
+ self,
284
+ benchmark: GenerativeBenchmark,
285
+ headers: list[list[str]],
286
+ values: list[str | int | float],
287
+ ) -> None:
288
+ """
289
+ Add timing information including start, end, duration, warmup, and cooldown.
290
+
291
+ :param benchmark: Benchmark data to extract timing from
292
+ :param headers: List of header hierarchies to append to
293
+ :param values: List of values to append to
294
+ """
295
+ timing_fields: list[tuple[str, Any]] = [
296
+ ("Start Time", benchmark.scheduler_metrics.start_time),
297
+ ("Request Start Time", benchmark.scheduler_metrics.request_start_time),
298
+ ("Measure Start Time", benchmark.scheduler_metrics.measure_start_time),
299
+ ("Measure End Time", benchmark.scheduler_metrics.measure_end_time),
300
+ ("Request End Time", benchmark.scheduler_metrics.request_end_time),
301
+ ("End Time", benchmark.scheduler_metrics.end_time),
302
+ ]
303
+ for field_name, timestamp in timing_fields:
304
+ self._add_field(
305
+ headers,
306
+ values,
307
+ "Timings",
308
+ field_name,
309
+ safe_format_timestamp(timestamp, TIMESTAMP_FORMAT),
310
+ )
311
+
312
+ duration_fields: list[tuple[str, float | str]] = [
313
+ ("Duration", benchmark.duration),
314
+ ("Warmup", benchmark.warmup_duration),
315
+ ("Cooldown", benchmark.cooldown_duration),
316
+ ]
317
+ for field_name, duration_value in duration_fields:
318
+ self._add_field(
319
+ headers, values, "Timings", field_name, duration_value, "Sec"
320
+ )
321
+
322
+ def _add_request_counts(
323
+ self,
324
+ benchmark: GenerativeBenchmark,
325
+ headers: list[list[str]],
326
+ values: list[str | int | float],
327
+ ) -> None:
328
+ """
329
+ Add request count totals by status.
330
+
331
+ :param benchmark: Benchmark data to extract request counts from
332
+ :param headers: List of header hierarchies to append to
333
+ :param values: List of values to append to
334
+ """
335
+ for status in ["successful", "incomplete", "errored", "total"]:
336
+ self._add_field(
337
+ headers,
338
+ values,
339
+ "Request Counts",
340
+ status.capitalize(),
341
+ getattr(benchmark.metrics.request_totals, status),
342
+ )
343
+
344
+ def _add_request_latency_metrics(
345
+ self,
346
+ benchmark: GenerativeBenchmark,
347
+ headers: list[list[str]],
348
+ values: list[str | int | float],
349
+ ) -> None:
350
+ """
351
+ Add request latency and streaming metrics.
352
+
353
+ :param benchmark: Benchmark data to extract latency metrics from
354
+ :param headers: List of header hierarchies to append to
355
+ :param values: List of values to append to
356
+ """
357
+ self._add_stats_for_metric(
358
+ headers, values, benchmark.metrics.request_latency, "Request Latency", "Sec"
359
+ )
360
+ self._add_stats_for_metric(
361
+ headers,
362
+ values,
363
+ benchmark.metrics.request_streaming_iterations_count,
364
+ "Streaming Iterations",
365
+ "Count",
366
+ )
367
+ self._add_stats_for_metric(
368
+ headers,
369
+ values,
370
+ benchmark.metrics.time_to_first_token_ms,
371
+ "Time to First Token",
372
+ "ms",
373
+ )
374
+ self._add_stats_for_metric(
375
+ headers,
376
+ values,
377
+ benchmark.metrics.time_per_output_token_ms,
378
+ "Time per Output Token",
379
+ "ms",
380
+ )
381
+ self._add_stats_for_metric(
382
+ headers,
383
+ values,
384
+ benchmark.metrics.inter_token_latency_ms,
385
+ "Inter Token Latency",
386
+ "ms",
387
+ )
388
+
389
+ def _add_server_throughput_metrics(
390
+ self,
391
+ benchmark: GenerativeBenchmark,
392
+ headers: list[list[str]],
393
+ values: list[str | int | float],
394
+ ) -> None:
395
+ """
396
+ Add server throughput metrics including requests, tokens, and concurrency.
397
+
398
+ :param benchmark: Benchmark data to extract throughput metrics from
399
+ :param headers: List of header hierarchies to append to
400
+ :param values: List of values to append to
401
+ """
402
+ self._add_stats_for_metric(
403
+ headers,
404
+ values,
405
+ benchmark.metrics.requests_per_second,
406
+ "Server Throughput",
407
+ "Requests/Sec",
408
+ )
409
+ self._add_stats_for_metric(
410
+ headers,
411
+ values,
412
+ benchmark.metrics.request_concurrency,
413
+ "Server Throughput",
414
+ "Concurrency",
415
+ )
416
+ self._add_stats_for_metric(
417
+ headers,
418
+ values,
419
+ benchmark.metrics.prompt_token_count,
420
+ "Token Metrics",
421
+ "Input Tokens",
422
+ )
423
+ self._add_stats_for_metric(
424
+ headers,
425
+ values,
426
+ benchmark.metrics.output_token_count,
427
+ "Token Metrics",
428
+ "Output Tokens",
429
+ )
430
+ self._add_stats_for_metric(
431
+ headers,
432
+ values,
433
+ benchmark.metrics.total_token_count,
434
+ "Token Metrics",
435
+ "Total Tokens",
436
+ )
437
+ self._add_stats_for_metric(
438
+ headers,
439
+ values,
440
+ benchmark.metrics.prompt_tokens_per_second,
441
+ "Token Throughput",
442
+ "Input Tokens/Sec",
443
+ )
444
+ self._add_stats_for_metric(
445
+ headers,
446
+ values,
447
+ benchmark.metrics.output_tokens_per_second,
448
+ "Token Throughput",
449
+ "Output Tokens/Sec",
450
+ )
451
+ self._add_stats_for_metric(
452
+ headers,
453
+ values,
454
+ benchmark.metrics.tokens_per_second,
455
+ "Token Throughput",
456
+ "Total Tokens/Sec",
457
+ )
458
+ self._add_stats_for_metric(
459
+ headers,
460
+ values,
461
+ benchmark.metrics.output_tokens_per_iteration,
462
+ "Token Streaming",
463
+ "Output Tokens/Iter",
464
+ )
465
+ self._add_stats_for_metric(
466
+ headers,
467
+ values,
468
+ benchmark.metrics.iter_tokens_per_iteration,
469
+ "Token Streaming",
470
+ "Iter Tokens/Iter",
471
+ )
472
+
473
+ def _add_modality_metrics(
474
+ self,
475
+ benchmark: GenerativeBenchmark,
476
+ modality: Literal["text", "image", "video", "audio"],
477
+ headers: list[list[str]],
478
+ values: list[str | int | float],
479
+ ) -> None:
480
+ """
481
+ Add modality-specific metrics for text, image, video, or audio data.
482
+
483
+ :param benchmark: Benchmark data to extract modality metrics from
484
+ :param modality: Type of modality to extract metrics for
485
+ :param headers: List of header hierarchies to append to
486
+ :param values: List of values to append to
487
+ """
488
+ modality_summary = getattr(benchmark.metrics, modality)
489
+ metric_definitions = MODALITY_METRICS[modality]
490
+
491
+ for metric_name, display_name in metric_definitions:
492
+ metric_obj = getattr(modality_summary, metric_name, None)
493
+ if metric_obj is None:
494
+ continue
495
+
496
+ for io_type in ["input", "output", "total"]:
497
+ dist_summary = getattr(metric_obj, io_type, None)
498
+ if dist_summary is None:
499
+ continue
500
+
501
+ if not self._has_distribution_data(dist_summary):
502
+ continue
503
+
504
+ self._add_stats_for_metric(
505
+ headers,
506
+ values,
507
+ dist_summary,
508
+ f"{modality.capitalize()} {display_name}",
509
+ io_type.capitalize(),
510
+ )
511
+
512
+ def _has_distribution_data(self, dist_summary: StatusDistributionSummary) -> bool:
513
+ """
514
+ Check if distribution summary contains any data.
515
+
516
+ :param dist_summary: Distribution summary to check
517
+ :return: True if summary contains data, False otherwise
518
+ """
519
+ return any(
520
+ getattr(dist_summary, status, None) is not None
521
+ and getattr(dist_summary, status).total_sum > 0.0
522
+ for status in ["successful", "incomplete", "errored"]
523
+ )
524
+
525
+ def _add_scheduler_info(
526
+ self,
527
+ benchmark: GenerativeBenchmark,
528
+ headers: list[list[str]],
529
+ values: list[str | int | float],
530
+ ) -> None:
531
+ """
532
+ Add scheduler state and performance information.
533
+
534
+ :param benchmark: Benchmark data to extract scheduler info from
535
+ :param headers: List of header hierarchies to append to
536
+ :param values: List of values to append to
537
+ """
538
+ self._add_scheduler_state(benchmark, headers, values)
539
+ self._add_scheduler_metrics(benchmark, headers, values)
540
+
541
+ def _add_scheduler_state(
542
+ self,
543
+ benchmark: GenerativeBenchmark,
544
+ headers: list[list[str]],
545
+ values: list[str | int | float],
546
+ ) -> None:
547
+ """
548
+ Add scheduler state information including request counts and timing.
549
+
550
+ :param benchmark: Benchmark data to extract scheduler state from
551
+ :param headers: List of header hierarchies to append to
552
+ :param values: List of values to append to
553
+ """
554
+ state = benchmark.scheduler_state
555
+
556
+ state_fields: list[tuple[str, Any]] = [
557
+ ("Node ID", state.node_id),
558
+ ("Num Processes", state.num_processes),
559
+ ("Created Requests", state.created_requests),
560
+ ("Processed Requests", state.processed_requests),
561
+ ("Successful Requests", state.successful_requests),
562
+ ("Errored Requests", state.errored_requests),
563
+ ("Cancelled Requests", state.cancelled_requests),
564
+ ]
565
+
566
+ for field_name, value in state_fields:
567
+ self._add_field(headers, values, "Scheduler State", field_name, value)
568
+
569
+ if state.end_queuing_time:
570
+ self._add_field(
571
+ headers,
572
+ values,
573
+ "Scheduler State",
574
+ "End Queuing Time",
575
+ safe_format_timestamp(state.end_queuing_time, TIMESTAMP_FORMAT),
576
+ )
577
+ end_queuing_constraints_dict = {
578
+ key: constraint.model_dump()
579
+ for key, constraint in state.end_queuing_constraints.items()
580
+ }
581
+ self._add_field(
582
+ headers,
583
+ values,
584
+ "Scheduler State",
585
+ "End Queuing Constraints",
586
+ json.dumps(end_queuing_constraints_dict),
587
+ )
588
+
589
+ if state.end_processing_time:
590
+ self._add_field(
591
+ headers,
592
+ values,
593
+ "Scheduler State",
594
+ "End Processing Time",
595
+ safe_format_timestamp(state.end_processing_time, TIMESTAMP_FORMAT),
596
+ )
597
+ end_processing_constraints_dict = {
598
+ key: constraint.model_dump()
599
+ for key, constraint in state.end_processing_constraints.items()
600
+ }
601
+ self._add_field(
602
+ headers,
603
+ values,
604
+ "Scheduler State",
605
+ "End Processing Constraints",
606
+ json.dumps(end_processing_constraints_dict),
607
+ )
608
+
609
+ def _add_scheduler_metrics(
610
+ self,
611
+ benchmark: GenerativeBenchmark,
612
+ headers: list[list[str]],
613
+ values: list[str | int | float],
614
+ ) -> None:
615
+ """
616
+ Add scheduler performance metrics including delays and processing times.
617
+
618
+ :param benchmark: Benchmark data to extract scheduler metrics from
619
+ :param headers: List of header hierarchies to append to
620
+ :param values: List of values to append to
621
+ """
622
+ metrics = benchmark.scheduler_metrics
623
+
624
+ requests_made_fields: list[tuple[str, int]] = [
625
+ ("Requests Made Successful", metrics.requests_made.successful),
626
+ ("Requests Made Incomplete", metrics.requests_made.incomplete),
627
+ ("Requests Made Errored", metrics.requests_made.errored),
628
+ ("Requests Made Total", metrics.requests_made.total),
629
+ ]
630
+ for field_name, value in requests_made_fields:
631
+ self._add_field(headers, values, "Scheduler Metrics", field_name, value)
632
+
633
+ timing_metrics: list[tuple[str, float]] = [
634
+ ("Queued Time Avg", metrics.queued_time_avg),
635
+ ("Resolve Start Delay Avg", metrics.resolve_start_delay_avg),
636
+ (
637
+ "Resolve Targeted Start Delay Avg",
638
+ metrics.resolve_targeted_start_delay_avg,
639
+ ),
640
+ ("Request Start Delay Avg", metrics.request_start_delay_avg),
641
+ (
642
+ "Request Targeted Start Delay Avg",
643
+ metrics.request_targeted_start_delay_avg,
644
+ ),
645
+ ("Request Time Avg", metrics.request_time_avg),
646
+ ("Resolve End Delay Avg", metrics.resolve_end_delay_avg),
647
+ ("Resolve Time Avg", metrics.resolve_time_avg),
648
+ ("Finalized Delay Avg", metrics.finalized_delay_avg),
649
+ ("Processed Delay Avg", metrics.processed_delay_avg),
650
+ ]
651
+ for field_name, timing in timing_metrics:
652
+ self._add_field(
653
+ headers, values, "Scheduler Metrics", field_name, timing, "Sec"
654
+ )
655
+
656
+ def _add_stats_for_metric(
657
+ self,
658
+ headers: list[list[str]],
659
+ values: list[str | int | float],
660
+ metric: StatusDistributionSummary | DistributionSummary,
661
+ group: str,
662
+ units: str,
663
+ ) -> None:
664
+ """
665
+ Add statistical summaries for a metric across all statuses.
666
+
667
+ :param headers: List of header hierarchies to append to
668
+ :param values: List of values to append to
669
+ :param metric: Distribution summary to extract statistics from
670
+ :param group: Top-level category for the metric
671
+ :param units: Units for the metric values
672
+ """
673
+ if isinstance(metric, StatusDistributionSummary):
674
+ for status in ["successful", "incomplete", "errored"]:
675
+ dist = getattr(metric, status, None)
676
+ if dist is None or dist.total_sum == 0.0:
677
+ continue
678
+ self._add_distribution_stats(
679
+ headers, values, dist, group, units, status
680
+ )
681
+ else:
682
+ self._add_distribution_stats(headers, values, metric, group, units, None)
683
+
684
+ def _add_distribution_stats(
685
+ self,
686
+ headers: list[list[str]],
687
+ values: list[str | int | float],
688
+ dist: DistributionSummary,
689
+ group: str,
690
+ units: str,
691
+ status: str | None,
692
+ ) -> None:
693
+ """
694
+ Add distribution statistics including mean, median, and percentiles.
695
+
696
+ :param headers: List of header hierarchies to append to
697
+ :param values: List of values to append to
698
+ :param dist: Distribution summary with statistical data
699
+ :param group: Top-level category for the metric
700
+ :param units: Units for the metric values
701
+ :param status: Request status (successful, incomplete, errored) or None
702
+ """
703
+ status_prefix = f"{status.capitalize()} " if status else ""
704
+
705
+ headers.append([group, f"{status_prefix}{units}", "Mean"])
706
+ values.append(dist.mean)
707
+
708
+ headers.append([group, f"{status_prefix}{units}", "Median"])
709
+ values.append(dist.median)
710
+
711
+ headers.append([group, f"{status_prefix}{units}", "Std Dev"])
712
+ values.append(dist.std_dev)
713
+
714
+ headers.append([group, f"{status_prefix}{units}", "Percentiles"])
715
+ percentiles_str = (
716
+ f"[{dist.min}, {dist.percentiles.p001}, {dist.percentiles.p01}, "
717
+ f"{dist.percentiles.p05}, {dist.percentiles.p10}, {dist.percentiles.p25}, "
718
+ f"{dist.percentiles.p75}, {dist.percentiles.p90}, {dist.percentiles.p95}, "
719
+ f"{dist.percentiles.p99}, {dist.max}]"
720
+ )
721
+ values.append(percentiles_str)