guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,721 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CSV output formatter for benchmark results.
|
|
3
|
+
|
|
4
|
+
This module provides the GenerativeBenchmarkerCSV class which exports benchmark
|
|
5
|
+
reports to CSV format with comprehensive metrics including timing, throughput,
|
|
6
|
+
latency, modality data, and scheduler information. The CSV output uses multi-row
|
|
7
|
+
headers to organize metrics hierarchically and includes both summary statistics
|
|
8
|
+
and distribution percentiles.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import csv
|
|
14
|
+
import json
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Annotated, Any, ClassVar, Literal
|
|
17
|
+
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput
|
|
21
|
+
from guidellm.benchmark.schemas import GenerativeBenchmark, GenerativeBenchmarksReport
|
|
22
|
+
from guidellm.schemas import DistributionSummary, StatusDistributionSummary
|
|
23
|
+
from guidellm.utils import safe_format_timestamp
|
|
24
|
+
|
|
25
|
+
__all__ = ["GenerativeBenchmarkerCSV"]
|
|
26
|
+
|
|
27
|
+
TIMESTAMP_FORMAT: Annotated[str, "Format string for timestamp output in CSV files"] = (
|
|
28
|
+
"%Y-%m-%d %H:%M:%S"
|
|
29
|
+
)
|
|
30
|
+
MODALITY_METRICS: Annotated[
|
|
31
|
+
dict[str, list[tuple[str, str]]],
|
|
32
|
+
"Mapping of modality types to their metric names and display labels",
|
|
33
|
+
] = {
|
|
34
|
+
"text": [
|
|
35
|
+
("tokens", "Tokens"),
|
|
36
|
+
("words", "Words"),
|
|
37
|
+
("characters", "Characters"),
|
|
38
|
+
],
|
|
39
|
+
"image": [
|
|
40
|
+
("tokens", "Tokens"),
|
|
41
|
+
("images", "Images"),
|
|
42
|
+
("pixels", "Pixels"),
|
|
43
|
+
("bytes", "Bytes"),
|
|
44
|
+
],
|
|
45
|
+
"video": [
|
|
46
|
+
("tokens", "Tokens"),
|
|
47
|
+
("frames", "Frames"),
|
|
48
|
+
("seconds", "Seconds"),
|
|
49
|
+
("bytes", "Bytes"),
|
|
50
|
+
],
|
|
51
|
+
"audio": [
|
|
52
|
+
("tokens", "Tokens"),
|
|
53
|
+
("samples", "Samples"),
|
|
54
|
+
("seconds", "Seconds"),
|
|
55
|
+
("bytes", "Bytes"),
|
|
56
|
+
],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@GenerativeBenchmarkerOutput.register("csv")
|
|
61
|
+
class GenerativeBenchmarkerCSV(GenerativeBenchmarkerOutput):
|
|
62
|
+
"""
|
|
63
|
+
CSV output formatter for benchmark results.
|
|
64
|
+
|
|
65
|
+
Exports comprehensive benchmark data to CSV format with multi-row headers
|
|
66
|
+
organizing metrics into categories including run information, timing, request
|
|
67
|
+
counts, latency, throughput, modality-specific data, and scheduler state. Each
|
|
68
|
+
benchmark run becomes a row with statistical distributions represented as
|
|
69
|
+
mean, median, standard deviation, and percentiles.
|
|
70
|
+
|
|
71
|
+
:cvar DEFAULT_FILE: Default filename for CSV output
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
DEFAULT_FILE: ClassVar[str] = "benchmarks.csv"
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def validated_kwargs(
|
|
78
|
+
cls, output_path: str | Path | None, **_kwargs
|
|
79
|
+
) -> dict[str, Any]:
|
|
80
|
+
"""
|
|
81
|
+
Validate and normalize constructor keyword arguments.
|
|
82
|
+
|
|
83
|
+
:param output_path: Path for CSV output file or directory
|
|
84
|
+
:param _kwargs: Additional keyword arguments (ignored)
|
|
85
|
+
:return: Normalized keyword arguments dictionary
|
|
86
|
+
"""
|
|
87
|
+
new_kwargs = {}
|
|
88
|
+
if output_path is not None:
|
|
89
|
+
new_kwargs["output_path"] = (
|
|
90
|
+
Path(output_path) if not isinstance(output_path, Path) else output_path
|
|
91
|
+
)
|
|
92
|
+
return new_kwargs
|
|
93
|
+
|
|
94
|
+
output_path: Path = Field(
|
|
95
|
+
default_factory=lambda: Path.cwd(),
|
|
96
|
+
description=(
|
|
97
|
+
"Path where the CSV file will be saved, defaults to current directory"
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
|
|
102
|
+
"""
|
|
103
|
+
Save the benchmark report as a CSV file.
|
|
104
|
+
|
|
105
|
+
:param report: The completed benchmark report
|
|
106
|
+
:return: Path to the saved CSV file
|
|
107
|
+
"""
|
|
108
|
+
output_path = self.output_path
|
|
109
|
+
if output_path.is_dir():
|
|
110
|
+
output_path = output_path / GenerativeBenchmarkerCSV.DEFAULT_FILE
|
|
111
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
|
|
113
|
+
with output_path.open("w", newline="") as file:
|
|
114
|
+
writer = csv.writer(file)
|
|
115
|
+
headers: list[list[str]] = []
|
|
116
|
+
rows: list[list[str | int | float]] = []
|
|
117
|
+
|
|
118
|
+
for benchmark in report.benchmarks:
|
|
119
|
+
benchmark_headers: list[list[str]] = []
|
|
120
|
+
benchmark_values: list[str | int | float] = []
|
|
121
|
+
|
|
122
|
+
self._add_run_info(benchmark, benchmark_headers, benchmark_values)
|
|
123
|
+
self._add_benchmark_info(benchmark, benchmark_headers, benchmark_values)
|
|
124
|
+
self._add_timing_info(benchmark, benchmark_headers, benchmark_values)
|
|
125
|
+
self._add_request_counts(benchmark, benchmark_headers, benchmark_values)
|
|
126
|
+
self._add_request_latency_metrics(
|
|
127
|
+
benchmark, benchmark_headers, benchmark_values
|
|
128
|
+
)
|
|
129
|
+
self._add_server_throughput_metrics(
|
|
130
|
+
benchmark, benchmark_headers, benchmark_values
|
|
131
|
+
)
|
|
132
|
+
for modality_name in ["text", "image", "video", "audio"]:
|
|
133
|
+
self._add_modality_metrics(
|
|
134
|
+
benchmark,
|
|
135
|
+
modality_name, # type: ignore[arg-type]
|
|
136
|
+
benchmark_headers,
|
|
137
|
+
benchmark_values,
|
|
138
|
+
)
|
|
139
|
+
self._add_scheduler_info(benchmark, benchmark_headers, benchmark_values)
|
|
140
|
+
self._add_runtime_info(report, benchmark_headers, benchmark_values)
|
|
141
|
+
|
|
142
|
+
if not headers:
|
|
143
|
+
headers = benchmark_headers
|
|
144
|
+
rows.append(benchmark_values)
|
|
145
|
+
|
|
146
|
+
self._write_multirow_header(writer, headers)
|
|
147
|
+
for row in rows:
|
|
148
|
+
writer.writerow(row)
|
|
149
|
+
|
|
150
|
+
return output_path
|
|
151
|
+
|
|
152
|
+
def _write_multirow_header(self, writer: Any, headers: list[list[str]]) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Write multi-row header to CSV for hierarchical metric organization.
|
|
155
|
+
|
|
156
|
+
:param writer: CSV writer instance
|
|
157
|
+
:param headers: List of column header hierarchies as string lists
|
|
158
|
+
"""
|
|
159
|
+
max_rows = max((len(col) for col in headers), default=0)
|
|
160
|
+
for row_idx in range(max_rows):
|
|
161
|
+
row = [col[row_idx] if row_idx < len(col) else "" for col in headers]
|
|
162
|
+
writer.writerow(row)
|
|
163
|
+
|
|
164
|
+
def _add_field(
|
|
165
|
+
self,
|
|
166
|
+
headers: list[list[str]],
|
|
167
|
+
values: list[str | int | float],
|
|
168
|
+
group: str,
|
|
169
|
+
field_name: str,
|
|
170
|
+
value: Any,
|
|
171
|
+
units: str = "",
|
|
172
|
+
) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Add a single field to headers and values lists.
|
|
175
|
+
|
|
176
|
+
:param headers: List of header hierarchies to append to
|
|
177
|
+
:param values: List of values to append to
|
|
178
|
+
:param group: Top-level category for the field
|
|
179
|
+
:param field_name: Name of the field
|
|
180
|
+
:param value: Value for the field
|
|
181
|
+
:param units: Optional units for the field
|
|
182
|
+
"""
|
|
183
|
+
headers.append([group, field_name, units])
|
|
184
|
+
values.append(value)
|
|
185
|
+
|
|
186
|
+
def _add_runtime_info(
|
|
187
|
+
self,
|
|
188
|
+
report: GenerativeBenchmarksReport,
|
|
189
|
+
headers: list[list[str]],
|
|
190
|
+
values: list[str | int | float],
|
|
191
|
+
) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Add global metadata and environment information.
|
|
194
|
+
|
|
195
|
+
:param report: Benchmark report to extract global info from
|
|
196
|
+
:param headers: List of header hierarchies to append to
|
|
197
|
+
:param values: List of values to append to
|
|
198
|
+
"""
|
|
199
|
+
self._add_field(
|
|
200
|
+
headers,
|
|
201
|
+
values,
|
|
202
|
+
"Runtime Info",
|
|
203
|
+
"Metadata",
|
|
204
|
+
report.metadata.model_dump_json(),
|
|
205
|
+
)
|
|
206
|
+
self._add_field(
|
|
207
|
+
headers,
|
|
208
|
+
values,
|
|
209
|
+
"Runtime Info",
|
|
210
|
+
"Arguments",
|
|
211
|
+
report.args.model_dump_json(),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def _add_run_info(
|
|
215
|
+
self,
|
|
216
|
+
benchmark: GenerativeBenchmark,
|
|
217
|
+
headers: list[list[str]],
|
|
218
|
+
values: list[str | int | float],
|
|
219
|
+
) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Add overall run identification and configuration information.
|
|
222
|
+
|
|
223
|
+
:param benchmark: Benchmark data to extract run info from
|
|
224
|
+
:param headers: List of header hierarchies to append to
|
|
225
|
+
:param values: List of values to append to
|
|
226
|
+
"""
|
|
227
|
+
self._add_field(headers, values, "Run Info", "Run ID", benchmark.config.run_id)
|
|
228
|
+
self._add_field(
|
|
229
|
+
headers, values, "Run Info", "Run Index", benchmark.config.run_index
|
|
230
|
+
)
|
|
231
|
+
self._add_field(
|
|
232
|
+
headers,
|
|
233
|
+
values,
|
|
234
|
+
"Run Info",
|
|
235
|
+
"Profile",
|
|
236
|
+
benchmark.config.profile.model_dump_json(),
|
|
237
|
+
)
|
|
238
|
+
self._add_field(
|
|
239
|
+
headers,
|
|
240
|
+
values,
|
|
241
|
+
"Run Info",
|
|
242
|
+
"Requests",
|
|
243
|
+
json.dumps(benchmark.config.requests),
|
|
244
|
+
)
|
|
245
|
+
self._add_field(
|
|
246
|
+
headers, values, "Run Info", "Backend", json.dumps(benchmark.config.backend)
|
|
247
|
+
)
|
|
248
|
+
self._add_field(
|
|
249
|
+
headers,
|
|
250
|
+
values,
|
|
251
|
+
"Run Info",
|
|
252
|
+
"Environment",
|
|
253
|
+
json.dumps(benchmark.config.environment),
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def _add_benchmark_info(
|
|
257
|
+
self,
|
|
258
|
+
benchmark: GenerativeBenchmark,
|
|
259
|
+
headers: list[list[str]],
|
|
260
|
+
values: list[str | int | float],
|
|
261
|
+
) -> None:
|
|
262
|
+
"""
|
|
263
|
+
Add individual benchmark configuration details.
|
|
264
|
+
|
|
265
|
+
:param benchmark: Benchmark data to extract configuration from
|
|
266
|
+
:param headers: List of header hierarchies to append to
|
|
267
|
+
:param values: List of values to append to
|
|
268
|
+
"""
|
|
269
|
+
self._add_field(headers, values, "Benchmark", "Type", benchmark.type_)
|
|
270
|
+
self._add_field(headers, values, "Benchmark", "ID", benchmark.config.id_)
|
|
271
|
+
self._add_field(
|
|
272
|
+
headers, values, "Benchmark", "Strategy", benchmark.config.strategy.type_
|
|
273
|
+
)
|
|
274
|
+
self._add_field(
|
|
275
|
+
headers,
|
|
276
|
+
values,
|
|
277
|
+
"Benchmark",
|
|
278
|
+
"Constraints",
|
|
279
|
+
json.dumps(benchmark.config.constraints),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def _add_timing_info(
|
|
283
|
+
self,
|
|
284
|
+
benchmark: GenerativeBenchmark,
|
|
285
|
+
headers: list[list[str]],
|
|
286
|
+
values: list[str | int | float],
|
|
287
|
+
) -> None:
|
|
288
|
+
"""
|
|
289
|
+
Add timing information including start, end, duration, warmup, and cooldown.
|
|
290
|
+
|
|
291
|
+
:param benchmark: Benchmark data to extract timing from
|
|
292
|
+
:param headers: List of header hierarchies to append to
|
|
293
|
+
:param values: List of values to append to
|
|
294
|
+
"""
|
|
295
|
+
timing_fields: list[tuple[str, Any]] = [
|
|
296
|
+
("Start Time", benchmark.scheduler_metrics.start_time),
|
|
297
|
+
("Request Start Time", benchmark.scheduler_metrics.request_start_time),
|
|
298
|
+
("Measure Start Time", benchmark.scheduler_metrics.measure_start_time),
|
|
299
|
+
("Measure End Time", benchmark.scheduler_metrics.measure_end_time),
|
|
300
|
+
("Request End Time", benchmark.scheduler_metrics.request_end_time),
|
|
301
|
+
("End Time", benchmark.scheduler_metrics.end_time),
|
|
302
|
+
]
|
|
303
|
+
for field_name, timestamp in timing_fields:
|
|
304
|
+
self._add_field(
|
|
305
|
+
headers,
|
|
306
|
+
values,
|
|
307
|
+
"Timings",
|
|
308
|
+
field_name,
|
|
309
|
+
safe_format_timestamp(timestamp, TIMESTAMP_FORMAT),
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
duration_fields: list[tuple[str, float | str]] = [
|
|
313
|
+
("Duration", benchmark.duration),
|
|
314
|
+
("Warmup", benchmark.warmup_duration),
|
|
315
|
+
("Cooldown", benchmark.cooldown_duration),
|
|
316
|
+
]
|
|
317
|
+
for field_name, duration_value in duration_fields:
|
|
318
|
+
self._add_field(
|
|
319
|
+
headers, values, "Timings", field_name, duration_value, "Sec"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
def _add_request_counts(
|
|
323
|
+
self,
|
|
324
|
+
benchmark: GenerativeBenchmark,
|
|
325
|
+
headers: list[list[str]],
|
|
326
|
+
values: list[str | int | float],
|
|
327
|
+
) -> None:
|
|
328
|
+
"""
|
|
329
|
+
Add request count totals by status.
|
|
330
|
+
|
|
331
|
+
:param benchmark: Benchmark data to extract request counts from
|
|
332
|
+
:param headers: List of header hierarchies to append to
|
|
333
|
+
:param values: List of values to append to
|
|
334
|
+
"""
|
|
335
|
+
for status in ["successful", "incomplete", "errored", "total"]:
|
|
336
|
+
self._add_field(
|
|
337
|
+
headers,
|
|
338
|
+
values,
|
|
339
|
+
"Request Counts",
|
|
340
|
+
status.capitalize(),
|
|
341
|
+
getattr(benchmark.metrics.request_totals, status),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
def _add_request_latency_metrics(
|
|
345
|
+
self,
|
|
346
|
+
benchmark: GenerativeBenchmark,
|
|
347
|
+
headers: list[list[str]],
|
|
348
|
+
values: list[str | int | float],
|
|
349
|
+
) -> None:
|
|
350
|
+
"""
|
|
351
|
+
Add request latency and streaming metrics.
|
|
352
|
+
|
|
353
|
+
:param benchmark: Benchmark data to extract latency metrics from
|
|
354
|
+
:param headers: List of header hierarchies to append to
|
|
355
|
+
:param values: List of values to append to
|
|
356
|
+
"""
|
|
357
|
+
self._add_stats_for_metric(
|
|
358
|
+
headers, values, benchmark.metrics.request_latency, "Request Latency", "Sec"
|
|
359
|
+
)
|
|
360
|
+
self._add_stats_for_metric(
|
|
361
|
+
headers,
|
|
362
|
+
values,
|
|
363
|
+
benchmark.metrics.request_streaming_iterations_count,
|
|
364
|
+
"Streaming Iterations",
|
|
365
|
+
"Count",
|
|
366
|
+
)
|
|
367
|
+
self._add_stats_for_metric(
|
|
368
|
+
headers,
|
|
369
|
+
values,
|
|
370
|
+
benchmark.metrics.time_to_first_token_ms,
|
|
371
|
+
"Time to First Token",
|
|
372
|
+
"ms",
|
|
373
|
+
)
|
|
374
|
+
self._add_stats_for_metric(
|
|
375
|
+
headers,
|
|
376
|
+
values,
|
|
377
|
+
benchmark.metrics.time_per_output_token_ms,
|
|
378
|
+
"Time per Output Token",
|
|
379
|
+
"ms",
|
|
380
|
+
)
|
|
381
|
+
self._add_stats_for_metric(
|
|
382
|
+
headers,
|
|
383
|
+
values,
|
|
384
|
+
benchmark.metrics.inter_token_latency_ms,
|
|
385
|
+
"Inter Token Latency",
|
|
386
|
+
"ms",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def _add_server_throughput_metrics(
|
|
390
|
+
self,
|
|
391
|
+
benchmark: GenerativeBenchmark,
|
|
392
|
+
headers: list[list[str]],
|
|
393
|
+
values: list[str | int | float],
|
|
394
|
+
) -> None:
|
|
395
|
+
"""
|
|
396
|
+
Add server throughput metrics including requests, tokens, and concurrency.
|
|
397
|
+
|
|
398
|
+
:param benchmark: Benchmark data to extract throughput metrics from
|
|
399
|
+
:param headers: List of header hierarchies to append to
|
|
400
|
+
:param values: List of values to append to
|
|
401
|
+
"""
|
|
402
|
+
self._add_stats_for_metric(
|
|
403
|
+
headers,
|
|
404
|
+
values,
|
|
405
|
+
benchmark.metrics.requests_per_second,
|
|
406
|
+
"Server Throughput",
|
|
407
|
+
"Requests/Sec",
|
|
408
|
+
)
|
|
409
|
+
self._add_stats_for_metric(
|
|
410
|
+
headers,
|
|
411
|
+
values,
|
|
412
|
+
benchmark.metrics.request_concurrency,
|
|
413
|
+
"Server Throughput",
|
|
414
|
+
"Concurrency",
|
|
415
|
+
)
|
|
416
|
+
self._add_stats_for_metric(
|
|
417
|
+
headers,
|
|
418
|
+
values,
|
|
419
|
+
benchmark.metrics.prompt_token_count,
|
|
420
|
+
"Token Metrics",
|
|
421
|
+
"Input Tokens",
|
|
422
|
+
)
|
|
423
|
+
self._add_stats_for_metric(
|
|
424
|
+
headers,
|
|
425
|
+
values,
|
|
426
|
+
benchmark.metrics.output_token_count,
|
|
427
|
+
"Token Metrics",
|
|
428
|
+
"Output Tokens",
|
|
429
|
+
)
|
|
430
|
+
self._add_stats_for_metric(
|
|
431
|
+
headers,
|
|
432
|
+
values,
|
|
433
|
+
benchmark.metrics.total_token_count,
|
|
434
|
+
"Token Metrics",
|
|
435
|
+
"Total Tokens",
|
|
436
|
+
)
|
|
437
|
+
self._add_stats_for_metric(
|
|
438
|
+
headers,
|
|
439
|
+
values,
|
|
440
|
+
benchmark.metrics.prompt_tokens_per_second,
|
|
441
|
+
"Token Throughput",
|
|
442
|
+
"Input Tokens/Sec",
|
|
443
|
+
)
|
|
444
|
+
self._add_stats_for_metric(
|
|
445
|
+
headers,
|
|
446
|
+
values,
|
|
447
|
+
benchmark.metrics.output_tokens_per_second,
|
|
448
|
+
"Token Throughput",
|
|
449
|
+
"Output Tokens/Sec",
|
|
450
|
+
)
|
|
451
|
+
self._add_stats_for_metric(
|
|
452
|
+
headers,
|
|
453
|
+
values,
|
|
454
|
+
benchmark.metrics.tokens_per_second,
|
|
455
|
+
"Token Throughput",
|
|
456
|
+
"Total Tokens/Sec",
|
|
457
|
+
)
|
|
458
|
+
self._add_stats_for_metric(
|
|
459
|
+
headers,
|
|
460
|
+
values,
|
|
461
|
+
benchmark.metrics.output_tokens_per_iteration,
|
|
462
|
+
"Token Streaming",
|
|
463
|
+
"Output Tokens/Iter",
|
|
464
|
+
)
|
|
465
|
+
self._add_stats_for_metric(
|
|
466
|
+
headers,
|
|
467
|
+
values,
|
|
468
|
+
benchmark.metrics.iter_tokens_per_iteration,
|
|
469
|
+
"Token Streaming",
|
|
470
|
+
"Iter Tokens/Iter",
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
def _add_modality_metrics(
|
|
474
|
+
self,
|
|
475
|
+
benchmark: GenerativeBenchmark,
|
|
476
|
+
modality: Literal["text", "image", "video", "audio"],
|
|
477
|
+
headers: list[list[str]],
|
|
478
|
+
values: list[str | int | float],
|
|
479
|
+
) -> None:
|
|
480
|
+
"""
|
|
481
|
+
Add modality-specific metrics for text, image, video, or audio data.
|
|
482
|
+
|
|
483
|
+
:param benchmark: Benchmark data to extract modality metrics from
|
|
484
|
+
:param modality: Type of modality to extract metrics for
|
|
485
|
+
:param headers: List of header hierarchies to append to
|
|
486
|
+
:param values: List of values to append to
|
|
487
|
+
"""
|
|
488
|
+
modality_summary = getattr(benchmark.metrics, modality)
|
|
489
|
+
metric_definitions = MODALITY_METRICS[modality]
|
|
490
|
+
|
|
491
|
+
for metric_name, display_name in metric_definitions:
|
|
492
|
+
metric_obj = getattr(modality_summary, metric_name, None)
|
|
493
|
+
if metric_obj is None:
|
|
494
|
+
continue
|
|
495
|
+
|
|
496
|
+
for io_type in ["input", "output", "total"]:
|
|
497
|
+
dist_summary = getattr(metric_obj, io_type, None)
|
|
498
|
+
if dist_summary is None:
|
|
499
|
+
continue
|
|
500
|
+
|
|
501
|
+
if not self._has_distribution_data(dist_summary):
|
|
502
|
+
continue
|
|
503
|
+
|
|
504
|
+
self._add_stats_for_metric(
|
|
505
|
+
headers,
|
|
506
|
+
values,
|
|
507
|
+
dist_summary,
|
|
508
|
+
f"{modality.capitalize()} {display_name}",
|
|
509
|
+
io_type.capitalize(),
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def _has_distribution_data(self, dist_summary: StatusDistributionSummary) -> bool:
|
|
513
|
+
"""
|
|
514
|
+
Check if distribution summary contains any data.
|
|
515
|
+
|
|
516
|
+
:param dist_summary: Distribution summary to check
|
|
517
|
+
:return: True if summary contains data, False otherwise
|
|
518
|
+
"""
|
|
519
|
+
return any(
|
|
520
|
+
getattr(dist_summary, status, None) is not None
|
|
521
|
+
and getattr(dist_summary, status).total_sum > 0.0
|
|
522
|
+
for status in ["successful", "incomplete", "errored"]
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
def _add_scheduler_info(
|
|
526
|
+
self,
|
|
527
|
+
benchmark: GenerativeBenchmark,
|
|
528
|
+
headers: list[list[str]],
|
|
529
|
+
values: list[str | int | float],
|
|
530
|
+
) -> None:
|
|
531
|
+
"""
|
|
532
|
+
Add scheduler state and performance information.
|
|
533
|
+
|
|
534
|
+
:param benchmark: Benchmark data to extract scheduler info from
|
|
535
|
+
:param headers: List of header hierarchies to append to
|
|
536
|
+
:param values: List of values to append to
|
|
537
|
+
"""
|
|
538
|
+
self._add_scheduler_state(benchmark, headers, values)
|
|
539
|
+
self._add_scheduler_metrics(benchmark, headers, values)
|
|
540
|
+
|
|
541
|
+
def _add_scheduler_state(
|
|
542
|
+
self,
|
|
543
|
+
benchmark: GenerativeBenchmark,
|
|
544
|
+
headers: list[list[str]],
|
|
545
|
+
values: list[str | int | float],
|
|
546
|
+
) -> None:
|
|
547
|
+
"""
|
|
548
|
+
Add scheduler state information including request counts and timing.
|
|
549
|
+
|
|
550
|
+
:param benchmark: Benchmark data to extract scheduler state from
|
|
551
|
+
:param headers: List of header hierarchies to append to
|
|
552
|
+
:param values: List of values to append to
|
|
553
|
+
"""
|
|
554
|
+
state = benchmark.scheduler_state
|
|
555
|
+
|
|
556
|
+
state_fields: list[tuple[str, Any]] = [
|
|
557
|
+
("Node ID", state.node_id),
|
|
558
|
+
("Num Processes", state.num_processes),
|
|
559
|
+
("Created Requests", state.created_requests),
|
|
560
|
+
("Processed Requests", state.processed_requests),
|
|
561
|
+
("Successful Requests", state.successful_requests),
|
|
562
|
+
("Errored Requests", state.errored_requests),
|
|
563
|
+
("Cancelled Requests", state.cancelled_requests),
|
|
564
|
+
]
|
|
565
|
+
|
|
566
|
+
for field_name, value in state_fields:
|
|
567
|
+
self._add_field(headers, values, "Scheduler State", field_name, value)
|
|
568
|
+
|
|
569
|
+
if state.end_queuing_time:
|
|
570
|
+
self._add_field(
|
|
571
|
+
headers,
|
|
572
|
+
values,
|
|
573
|
+
"Scheduler State",
|
|
574
|
+
"End Queuing Time",
|
|
575
|
+
safe_format_timestamp(state.end_queuing_time, TIMESTAMP_FORMAT),
|
|
576
|
+
)
|
|
577
|
+
end_queuing_constraints_dict = {
|
|
578
|
+
key: constraint.model_dump()
|
|
579
|
+
for key, constraint in state.end_queuing_constraints.items()
|
|
580
|
+
}
|
|
581
|
+
self._add_field(
|
|
582
|
+
headers,
|
|
583
|
+
values,
|
|
584
|
+
"Scheduler State",
|
|
585
|
+
"End Queuing Constraints",
|
|
586
|
+
json.dumps(end_queuing_constraints_dict),
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
if state.end_processing_time:
|
|
590
|
+
self._add_field(
|
|
591
|
+
headers,
|
|
592
|
+
values,
|
|
593
|
+
"Scheduler State",
|
|
594
|
+
"End Processing Time",
|
|
595
|
+
safe_format_timestamp(state.end_processing_time, TIMESTAMP_FORMAT),
|
|
596
|
+
)
|
|
597
|
+
end_processing_constraints_dict = {
|
|
598
|
+
key: constraint.model_dump()
|
|
599
|
+
for key, constraint in state.end_processing_constraints.items()
|
|
600
|
+
}
|
|
601
|
+
self._add_field(
|
|
602
|
+
headers,
|
|
603
|
+
values,
|
|
604
|
+
"Scheduler State",
|
|
605
|
+
"End Processing Constraints",
|
|
606
|
+
json.dumps(end_processing_constraints_dict),
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
def _add_scheduler_metrics(
|
|
610
|
+
self,
|
|
611
|
+
benchmark: GenerativeBenchmark,
|
|
612
|
+
headers: list[list[str]],
|
|
613
|
+
values: list[str | int | float],
|
|
614
|
+
) -> None:
|
|
615
|
+
"""
|
|
616
|
+
Add scheduler performance metrics including delays and processing times.
|
|
617
|
+
|
|
618
|
+
:param benchmark: Benchmark data to extract scheduler metrics from
|
|
619
|
+
:param headers: List of header hierarchies to append to
|
|
620
|
+
:param values: List of values to append to
|
|
621
|
+
"""
|
|
622
|
+
metrics = benchmark.scheduler_metrics
|
|
623
|
+
|
|
624
|
+
requests_made_fields: list[tuple[str, int]] = [
|
|
625
|
+
("Requests Made Successful", metrics.requests_made.successful),
|
|
626
|
+
("Requests Made Incomplete", metrics.requests_made.incomplete),
|
|
627
|
+
("Requests Made Errored", metrics.requests_made.errored),
|
|
628
|
+
("Requests Made Total", metrics.requests_made.total),
|
|
629
|
+
]
|
|
630
|
+
for field_name, value in requests_made_fields:
|
|
631
|
+
self._add_field(headers, values, "Scheduler Metrics", field_name, value)
|
|
632
|
+
|
|
633
|
+
timing_metrics: list[tuple[str, float]] = [
|
|
634
|
+
("Queued Time Avg", metrics.queued_time_avg),
|
|
635
|
+
("Resolve Start Delay Avg", metrics.resolve_start_delay_avg),
|
|
636
|
+
(
|
|
637
|
+
"Resolve Targeted Start Delay Avg",
|
|
638
|
+
metrics.resolve_targeted_start_delay_avg,
|
|
639
|
+
),
|
|
640
|
+
("Request Start Delay Avg", metrics.request_start_delay_avg),
|
|
641
|
+
(
|
|
642
|
+
"Request Targeted Start Delay Avg",
|
|
643
|
+
metrics.request_targeted_start_delay_avg,
|
|
644
|
+
),
|
|
645
|
+
("Request Time Avg", metrics.request_time_avg),
|
|
646
|
+
("Resolve End Delay Avg", metrics.resolve_end_delay_avg),
|
|
647
|
+
("Resolve Time Avg", metrics.resolve_time_avg),
|
|
648
|
+
("Finalized Delay Avg", metrics.finalized_delay_avg),
|
|
649
|
+
("Processed Delay Avg", metrics.processed_delay_avg),
|
|
650
|
+
]
|
|
651
|
+
for field_name, timing in timing_metrics:
|
|
652
|
+
self._add_field(
|
|
653
|
+
headers, values, "Scheduler Metrics", field_name, timing, "Sec"
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
def _add_stats_for_metric(
|
|
657
|
+
self,
|
|
658
|
+
headers: list[list[str]],
|
|
659
|
+
values: list[str | int | float],
|
|
660
|
+
metric: StatusDistributionSummary | DistributionSummary,
|
|
661
|
+
group: str,
|
|
662
|
+
units: str,
|
|
663
|
+
) -> None:
|
|
664
|
+
"""
|
|
665
|
+
Add statistical summaries for a metric across all statuses.
|
|
666
|
+
|
|
667
|
+
:param headers: List of header hierarchies to append to
|
|
668
|
+
:param values: List of values to append to
|
|
669
|
+
:param metric: Distribution summary to extract statistics from
|
|
670
|
+
:param group: Top-level category for the metric
|
|
671
|
+
:param units: Units for the metric values
|
|
672
|
+
"""
|
|
673
|
+
if isinstance(metric, StatusDistributionSummary):
|
|
674
|
+
for status in ["successful", "incomplete", "errored"]:
|
|
675
|
+
dist = getattr(metric, status, None)
|
|
676
|
+
if dist is None or dist.total_sum == 0.0:
|
|
677
|
+
continue
|
|
678
|
+
self._add_distribution_stats(
|
|
679
|
+
headers, values, dist, group, units, status
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
self._add_distribution_stats(headers, values, metric, group, units, None)
|
|
683
|
+
|
|
684
|
+
def _add_distribution_stats(
|
|
685
|
+
self,
|
|
686
|
+
headers: list[list[str]],
|
|
687
|
+
values: list[str | int | float],
|
|
688
|
+
dist: DistributionSummary,
|
|
689
|
+
group: str,
|
|
690
|
+
units: str,
|
|
691
|
+
status: str | None,
|
|
692
|
+
) -> None:
|
|
693
|
+
"""
|
|
694
|
+
Add distribution statistics including mean, median, and percentiles.
|
|
695
|
+
|
|
696
|
+
:param headers: List of header hierarchies to append to
|
|
697
|
+
:param values: List of values to append to
|
|
698
|
+
:param dist: Distribution summary with statistical data
|
|
699
|
+
:param group: Top-level category for the metric
|
|
700
|
+
:param units: Units for the metric values
|
|
701
|
+
:param status: Request status (successful, incomplete, errored) or None
|
|
702
|
+
"""
|
|
703
|
+
status_prefix = f"{status.capitalize()} " if status else ""
|
|
704
|
+
|
|
705
|
+
headers.append([group, f"{status_prefix}{units}", "Mean"])
|
|
706
|
+
values.append(dist.mean)
|
|
707
|
+
|
|
708
|
+
headers.append([group, f"{status_prefix}{units}", "Median"])
|
|
709
|
+
values.append(dist.median)
|
|
710
|
+
|
|
711
|
+
headers.append([group, f"{status_prefix}{units}", "Std Dev"])
|
|
712
|
+
values.append(dist.std_dev)
|
|
713
|
+
|
|
714
|
+
headers.append([group, f"{status_prefix}{units}", "Percentiles"])
|
|
715
|
+
percentiles_str = (
|
|
716
|
+
f"[{dist.min}, {dist.percentiles.p001}, {dist.percentiles.p01}, "
|
|
717
|
+
f"{dist.percentiles.p05}, {dist.percentiles.p10}, {dist.percentiles.p25}, "
|
|
718
|
+
f"{dist.percentiles.p75}, {dist.percentiles.p90}, {dist.percentiles.p95}, "
|
|
719
|
+
f"{dist.percentiles.p99}, {dist.max}]"
|
|
720
|
+
)
|
|
721
|
+
values.append(percentiles_str)
|