guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output formatters for benchmark results.
|
|
3
|
+
|
|
4
|
+
Provides output formatter implementations that transform benchmark reports into
|
|
5
|
+
various file formats including JSON, CSV, HTML, and console display. All formatters
|
|
6
|
+
extend the base GenerativeBenchmarkerOutput interface, enabling dynamic resolution
|
|
7
|
+
and flexible output configuration for benchmark result persistence and analysis.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .console import GenerativeBenchmarkerConsole
|
|
13
|
+
from .csv import GenerativeBenchmarkerCSV
|
|
14
|
+
from .html import GenerativeBenchmarkerHTML
|
|
15
|
+
from .output import GenerativeBenchmarkerOutput
|
|
16
|
+
from .serialized import GenerativeBenchmarkerSerialized
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"GenerativeBenchmarkerCSV",
|
|
20
|
+
"GenerativeBenchmarkerConsole",
|
|
21
|
+
"GenerativeBenchmarkerHTML",
|
|
22
|
+
"GenerativeBenchmarkerOutput",
|
|
23
|
+
"GenerativeBenchmarkerSerialized",
|
|
24
|
+
]
|
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Console output formatter for generative benchmarker results.
|
|
3
|
+
|
|
4
|
+
This module provides console-based output formatting for benchmark reports, organizing
|
|
5
|
+
metrics into structured tables that display request statistics, latency measurements,
|
|
6
|
+
throughput data, and modality-specific metrics (text, image, video, audio). It uses
|
|
7
|
+
the Console utility to render multi-column tables with proper alignment and formatting
|
|
8
|
+
for terminal display.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from collections.abc import Sequence
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, Literal, cast
|
|
17
|
+
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput
|
|
21
|
+
from guidellm.benchmark.schemas import GenerativeBenchmarksReport
|
|
22
|
+
from guidellm.schemas import DistributionSummary, StatusDistributionSummary
|
|
23
|
+
from guidellm.utils import Console, safe_format_number, safe_format_timestamp
|
|
24
|
+
|
|
25
|
+
__all__ = ["GenerativeBenchmarkerConsole"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
StatTypesAlias = Literal["mean", "median", "p95"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ConsoleTableColumn:
|
|
33
|
+
"""
|
|
34
|
+
Data structure for a single console table column.
|
|
35
|
+
|
|
36
|
+
Stores column metadata (group, name, units, type) and accumulated values for
|
|
37
|
+
rendering formatted table output with proper type-specific formatting and precision.
|
|
38
|
+
|
|
39
|
+
:cvar group: Optional group header for related columns
|
|
40
|
+
:cvar name: Column name displayed in header
|
|
41
|
+
:cvar units: Optional unit label for numeric values
|
|
42
|
+
:cvar type_: Data type determining formatting (number, text, timestamp)
|
|
43
|
+
:cvar precision: Decimal precision for numeric formatting
|
|
44
|
+
:cvar values: Accumulated values for this column across rows
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
group: str | None = None
|
|
48
|
+
name: str | None = None
|
|
49
|
+
units: str | None = None
|
|
50
|
+
type_: Literal["number", "text", "timestamp"] = "number"
|
|
51
|
+
precision: int = 1
|
|
52
|
+
values: list[str | float | int | None] = field(default_factory=list)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ConsoleTableColumnsCollection(dict[str, ConsoleTableColumn]):
|
|
56
|
+
"""
|
|
57
|
+
Collection manager for console table columns.
|
|
58
|
+
|
|
59
|
+
Extends dict to provide specialized methods for adding values and statistics to
|
|
60
|
+
columns, automatically creating columns as needed and organizing them by composite
|
|
61
|
+
keys for consistent table rendering.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def add_value(
|
|
65
|
+
self,
|
|
66
|
+
value: str | float | int | None,
|
|
67
|
+
group: str | None = None,
|
|
68
|
+
name: str | None = None,
|
|
69
|
+
units: str | None = None,
|
|
70
|
+
type_: Literal["number", "text", "timestamp"] = "number",
|
|
71
|
+
precision: int = 1,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Add a value to a column, creating the column if it doesn't exist.
|
|
75
|
+
|
|
76
|
+
:param value: The value to add to the column
|
|
77
|
+
:param group: Optional group header for the column
|
|
78
|
+
:param name: Column name for display
|
|
79
|
+
:param units: Optional unit label
|
|
80
|
+
:param type_: Data type for formatting
|
|
81
|
+
:param precision: Decimal precision for numbers
|
|
82
|
+
"""
|
|
83
|
+
key = f"{group}_{name}_{units}"
|
|
84
|
+
|
|
85
|
+
if key not in self:
|
|
86
|
+
self[key] = ConsoleTableColumn(
|
|
87
|
+
group=group, name=name, units=units, type_=type_, precision=precision
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self[key].values.append(value)
|
|
91
|
+
|
|
92
|
+
def add_stats(
|
|
93
|
+
self,
|
|
94
|
+
stats: StatusDistributionSummary | None,
|
|
95
|
+
status: Literal["successful", "incomplete", "errored", "total"] = "successful",
|
|
96
|
+
group: str | None = None,
|
|
97
|
+
name: str | None = None,
|
|
98
|
+
precision: int = 1,
|
|
99
|
+
types: Sequence[StatTypesAlias] = ("median", "p95"),
|
|
100
|
+
):
|
|
101
|
+
"""
|
|
102
|
+
Add statistical summary columns (mean and p95) for a metric.
|
|
103
|
+
|
|
104
|
+
Creates paired mean/p95 columns automatically and appends values from the
|
|
105
|
+
specified status category of the distribution summary.
|
|
106
|
+
|
|
107
|
+
:param stats: Distribution summary containing status-specific statistics
|
|
108
|
+
:param status: Status category to extract statistics from
|
|
109
|
+
:param group: Optional group header for the columns
|
|
110
|
+
:param name: Column name for display
|
|
111
|
+
:param precision: Decimal precision for numbers
|
|
112
|
+
"""
|
|
113
|
+
key = f"{group}_{name}"
|
|
114
|
+
status_stats: DistributionSummary | None = (
|
|
115
|
+
getattr(stats, status) if stats else None
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
for stat_type in types:
|
|
119
|
+
col_key = f"{key}_{stat_type}"
|
|
120
|
+
col_name, col_value = self._get_stat_type_name_val(stat_type, status_stats)
|
|
121
|
+
if col_key not in self:
|
|
122
|
+
self[col_key] = ConsoleTableColumn(
|
|
123
|
+
group=group,
|
|
124
|
+
name=name,
|
|
125
|
+
units=col_name,
|
|
126
|
+
precision=precision,
|
|
127
|
+
)
|
|
128
|
+
self[col_key].values.append(col_value)
|
|
129
|
+
|
|
130
|
+
def get_table_data(self) -> tuple[list[list[str]], list[list[str]]]:
|
|
131
|
+
"""
|
|
132
|
+
Convert column collection to formatted table data.
|
|
133
|
+
|
|
134
|
+
Transforms stored columns and values into header and value lists suitable for
|
|
135
|
+
console table rendering, applying type-specific formatting.
|
|
136
|
+
|
|
137
|
+
:return: Tuple of (headers, values) where each is a list of column string lists
|
|
138
|
+
"""
|
|
139
|
+
headers: list[list[str]] = []
|
|
140
|
+
values: list[list[str]] = []
|
|
141
|
+
|
|
142
|
+
for column in self.values():
|
|
143
|
+
headers.append([column.group or "", column.name or "", column.units or ""])
|
|
144
|
+
formatted_values: list[str] = []
|
|
145
|
+
for value in column.values:
|
|
146
|
+
if column.type_ == "text":
|
|
147
|
+
formatted_values.append(str(value))
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
if not isinstance(value, float | int) and value is not None:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f"Expected numeric value for column '{column.name}', "
|
|
153
|
+
f"got: {value}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if column.type_ == "timestamp":
|
|
157
|
+
formatted_values.append(
|
|
158
|
+
safe_format_timestamp(cast("float | None", value))
|
|
159
|
+
)
|
|
160
|
+
elif column.type_ == "number":
|
|
161
|
+
formatted_values.append(
|
|
162
|
+
safe_format_number(
|
|
163
|
+
value,
|
|
164
|
+
precision=column.precision,
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError(f"Unsupported column type: {column.type_}")
|
|
169
|
+
values.append(formatted_values)
|
|
170
|
+
|
|
171
|
+
return headers, values
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def _get_stat_type_name_val(
|
|
175
|
+
cls, stat_type: StatTypesAlias, stats: DistributionSummary | None
|
|
176
|
+
) -> tuple[str, float | None]:
|
|
177
|
+
if stat_type == "mean":
|
|
178
|
+
return "Mean", stats.mean if stats else None
|
|
179
|
+
elif stat_type == "median":
|
|
180
|
+
return "Mdn", stats.median if stats else None
|
|
181
|
+
elif stat_type == "p95":
|
|
182
|
+
return "p95", stats.percentiles.p95 if stats else None
|
|
183
|
+
else:
|
|
184
|
+
raise ValueError(f"Unsupported stat type: {stat_type}")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@GenerativeBenchmarkerOutput.register("console")
|
|
188
|
+
class GenerativeBenchmarkerConsole(GenerativeBenchmarkerOutput):
|
|
189
|
+
"""
|
|
190
|
+
Console output formatter for benchmark reports.
|
|
191
|
+
|
|
192
|
+
Renders benchmark results as formatted tables in the terminal, organizing metrics
|
|
193
|
+
by category (run summary, request counts, latency, throughput, modality-specific)
|
|
194
|
+
with proper alignment and type-specific formatting for readability.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
@classmethod
|
|
198
|
+
def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
|
|
199
|
+
"""
|
|
200
|
+
Validate and return keyword arguments for initialization.
|
|
201
|
+
|
|
202
|
+
:return: Empty dict as no additional kwargs are required
|
|
203
|
+
"""
|
|
204
|
+
return {}
|
|
205
|
+
|
|
206
|
+
console: Console = Field(
|
|
207
|
+
default_factory=Console,
|
|
208
|
+
description="Console utility for rendering formatted tables",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
async def finalize(self, report: GenerativeBenchmarksReport) -> str:
|
|
212
|
+
"""
|
|
213
|
+
Print the complete benchmark report to the console.
|
|
214
|
+
|
|
215
|
+
Renders all metric tables including run summary, request counts, latency,
|
|
216
|
+
throughput, and modality-specific statistics to the console.
|
|
217
|
+
|
|
218
|
+
:param report: The completed benchmark report
|
|
219
|
+
:return: Status message indicating output location
|
|
220
|
+
"""
|
|
221
|
+
self.print_run_summary_table(report)
|
|
222
|
+
self.print_text_table(report)
|
|
223
|
+
self.print_image_table(report)
|
|
224
|
+
self.print_video_table(report)
|
|
225
|
+
self.print_audio_table(report)
|
|
226
|
+
self.print_request_counts_table(report)
|
|
227
|
+
self.print_request_latency_table(report)
|
|
228
|
+
self.print_server_throughput_table(report)
|
|
229
|
+
|
|
230
|
+
return "printed to console"
|
|
231
|
+
|
|
232
|
+
def print_run_summary_table(self, report: GenerativeBenchmarksReport):
|
|
233
|
+
"""
|
|
234
|
+
Print the run summary table with timing and token information.
|
|
235
|
+
|
|
236
|
+
:param report: The benchmark report containing run metadata
|
|
237
|
+
"""
|
|
238
|
+
columns = ConsoleTableColumnsCollection()
|
|
239
|
+
|
|
240
|
+
for benchmark in report.benchmarks:
|
|
241
|
+
columns.add_value(
|
|
242
|
+
benchmark.config.strategy.type_,
|
|
243
|
+
group="Benchmark",
|
|
244
|
+
name="Strategy",
|
|
245
|
+
type_="text",
|
|
246
|
+
)
|
|
247
|
+
columns.add_value(
|
|
248
|
+
benchmark.start_time, group="Timings", name="Start", type_="timestamp"
|
|
249
|
+
)
|
|
250
|
+
columns.add_value(
|
|
251
|
+
benchmark.end_time, group="Timings", name="End", type_="timestamp"
|
|
252
|
+
)
|
|
253
|
+
columns.add_value(
|
|
254
|
+
benchmark.duration, group="Timings", name="Dur", units="Sec"
|
|
255
|
+
)
|
|
256
|
+
columns.add_value(
|
|
257
|
+
benchmark.warmup_duration, group="Timings", name="Warm", units="Sec"
|
|
258
|
+
)
|
|
259
|
+
columns.add_value(
|
|
260
|
+
benchmark.cooldown_duration, group="Timings", name="Cool", units="Sec"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
for token_metrics, group in [
|
|
264
|
+
(benchmark.metrics.prompt_token_count, "Input Tokens"),
|
|
265
|
+
(benchmark.metrics.output_token_count, "Output Tokens"),
|
|
266
|
+
]:
|
|
267
|
+
columns.add_value(
|
|
268
|
+
token_metrics.successful.total_sum,
|
|
269
|
+
group=group,
|
|
270
|
+
name="Comp",
|
|
271
|
+
units="Tot",
|
|
272
|
+
)
|
|
273
|
+
columns.add_value(
|
|
274
|
+
token_metrics.incomplete.total_sum,
|
|
275
|
+
group=group,
|
|
276
|
+
name="Inc",
|
|
277
|
+
units="Tot",
|
|
278
|
+
)
|
|
279
|
+
columns.add_value(
|
|
280
|
+
token_metrics.errored.total_sum,
|
|
281
|
+
group=group,
|
|
282
|
+
name="Err",
|
|
283
|
+
units="Tot",
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
headers, values = columns.get_table_data()
|
|
287
|
+
self.console.print("\n")
|
|
288
|
+
self.console.print_table(headers, values, title="Run Summary Info")
|
|
289
|
+
|
|
290
|
+
def print_text_table(self, report: GenerativeBenchmarksReport):
|
|
291
|
+
"""
|
|
292
|
+
Print text-specific metrics table if any text data exists.
|
|
293
|
+
|
|
294
|
+
:param report: The benchmark report containing text metrics
|
|
295
|
+
"""
|
|
296
|
+
self._print_modality_table(
|
|
297
|
+
report=report,
|
|
298
|
+
modality="text",
|
|
299
|
+
title="Text Metrics Statistics (Completed Requests)",
|
|
300
|
+
metric_groups=[
|
|
301
|
+
("tokens", "Tokens"),
|
|
302
|
+
("words", "Words"),
|
|
303
|
+
("characters", "Characters"),
|
|
304
|
+
],
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def print_image_table(self, report: GenerativeBenchmarksReport):
|
|
308
|
+
"""
|
|
309
|
+
Print image-specific metrics table if any image data exists.
|
|
310
|
+
|
|
311
|
+
:param report: The benchmark report containing image metrics
|
|
312
|
+
"""
|
|
313
|
+
self._print_modality_table(
|
|
314
|
+
report=report,
|
|
315
|
+
modality="image",
|
|
316
|
+
title="Image Metrics Statistics (Completed Requests)",
|
|
317
|
+
metric_groups=[
|
|
318
|
+
("tokens", "Tokens"),
|
|
319
|
+
("images", "Images"),
|
|
320
|
+
("pixels", "Pixels"),
|
|
321
|
+
("bytes", "Bytes"),
|
|
322
|
+
],
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def print_video_table(self, report: GenerativeBenchmarksReport):
|
|
326
|
+
"""
|
|
327
|
+
Print video-specific metrics table if any video data exists.
|
|
328
|
+
|
|
329
|
+
:param report: The benchmark report containing video metrics
|
|
330
|
+
"""
|
|
331
|
+
self._print_modality_table(
|
|
332
|
+
report=report,
|
|
333
|
+
modality="video",
|
|
334
|
+
title="Video Metrics Statistics (Completed Requests)",
|
|
335
|
+
metric_groups=[
|
|
336
|
+
("tokens", "Tokens"),
|
|
337
|
+
("frames", "Frames"),
|
|
338
|
+
("seconds", "Seconds"),
|
|
339
|
+
("bytes", "Bytes"),
|
|
340
|
+
],
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def print_audio_table(self, report: GenerativeBenchmarksReport):
|
|
344
|
+
"""
|
|
345
|
+
Print audio-specific metrics table if any audio data exists.
|
|
346
|
+
|
|
347
|
+
:param report: The benchmark report containing audio metrics
|
|
348
|
+
"""
|
|
349
|
+
self._print_modality_table(
|
|
350
|
+
report=report,
|
|
351
|
+
modality="audio",
|
|
352
|
+
title="Audio Metrics Statistics (Completed Requests)",
|
|
353
|
+
metric_groups=[
|
|
354
|
+
("tokens", "Tokens"),
|
|
355
|
+
("samples", "Samples"),
|
|
356
|
+
("seconds", "Seconds"),
|
|
357
|
+
("bytes", "Bytes"),
|
|
358
|
+
],
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def print_request_counts_table(self, report: GenerativeBenchmarksReport):
|
|
362
|
+
"""
|
|
363
|
+
Print request token count statistics table.
|
|
364
|
+
|
|
365
|
+
:param report: The benchmark report containing request count metrics
|
|
366
|
+
"""
|
|
367
|
+
columns = ConsoleTableColumnsCollection()
|
|
368
|
+
|
|
369
|
+
for benchmark in report.benchmarks:
|
|
370
|
+
columns.add_value(
|
|
371
|
+
benchmark.config.strategy.type_,
|
|
372
|
+
group="Benchmark",
|
|
373
|
+
name="Strategy",
|
|
374
|
+
type_="text",
|
|
375
|
+
)
|
|
376
|
+
columns.add_stats(
|
|
377
|
+
benchmark.metrics.prompt_token_count,
|
|
378
|
+
group="Input Tok",
|
|
379
|
+
name="Per Req",
|
|
380
|
+
)
|
|
381
|
+
columns.add_stats(
|
|
382
|
+
benchmark.metrics.output_token_count,
|
|
383
|
+
group="Output Tok",
|
|
384
|
+
name="Per Req",
|
|
385
|
+
)
|
|
386
|
+
columns.add_stats(
|
|
387
|
+
benchmark.metrics.total_token_count,
|
|
388
|
+
group="Total Tok",
|
|
389
|
+
name="Per Req",
|
|
390
|
+
)
|
|
391
|
+
columns.add_stats(
|
|
392
|
+
benchmark.metrics.request_streaming_iterations_count,
|
|
393
|
+
group="Stream Iter",
|
|
394
|
+
name="Per Req",
|
|
395
|
+
)
|
|
396
|
+
columns.add_stats(
|
|
397
|
+
benchmark.metrics.output_tokens_per_iteration,
|
|
398
|
+
group="Output Tok",
|
|
399
|
+
name="Per Stream Iter",
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
headers, values = columns.get_table_data()
|
|
403
|
+
self.console.print("\n")
|
|
404
|
+
self.console.print_table(
|
|
405
|
+
headers,
|
|
406
|
+
values,
|
|
407
|
+
title="Request Token Statistics (Completed Requests)",
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
def print_request_latency_table(self, report: GenerativeBenchmarksReport):
|
|
411
|
+
"""
|
|
412
|
+
Print request latency metrics table.
|
|
413
|
+
|
|
414
|
+
:param report: The benchmark report containing latency metrics
|
|
415
|
+
"""
|
|
416
|
+
columns = ConsoleTableColumnsCollection()
|
|
417
|
+
|
|
418
|
+
for benchmark in report.benchmarks:
|
|
419
|
+
columns.add_value(
|
|
420
|
+
benchmark.config.strategy.type_,
|
|
421
|
+
group="Benchmark",
|
|
422
|
+
name="Strategy",
|
|
423
|
+
type_="text",
|
|
424
|
+
)
|
|
425
|
+
columns.add_stats(
|
|
426
|
+
benchmark.metrics.request_latency,
|
|
427
|
+
group="Request Latency",
|
|
428
|
+
name="Sec",
|
|
429
|
+
)
|
|
430
|
+
columns.add_stats(
|
|
431
|
+
benchmark.metrics.time_to_first_token_ms,
|
|
432
|
+
group="TTFT",
|
|
433
|
+
name="ms",
|
|
434
|
+
)
|
|
435
|
+
columns.add_stats(
|
|
436
|
+
benchmark.metrics.inter_token_latency_ms,
|
|
437
|
+
group="ITL",
|
|
438
|
+
name="ms",
|
|
439
|
+
)
|
|
440
|
+
columns.add_stats(
|
|
441
|
+
benchmark.metrics.time_per_output_token_ms,
|
|
442
|
+
group="TPOT",
|
|
443
|
+
name="ms",
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
headers, values = columns.get_table_data()
|
|
447
|
+
self.console.print("\n")
|
|
448
|
+
self.console.print_table(
|
|
449
|
+
headers,
|
|
450
|
+
values,
|
|
451
|
+
title="Request Latency Statistics (Completed Requests)",
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
def print_server_throughput_table(self, report: GenerativeBenchmarksReport):
|
|
455
|
+
"""
|
|
456
|
+
Print server throughput metrics table.
|
|
457
|
+
|
|
458
|
+
:param report: The benchmark report containing throughput metrics
|
|
459
|
+
"""
|
|
460
|
+
columns = ConsoleTableColumnsCollection()
|
|
461
|
+
|
|
462
|
+
for benchmark in report.benchmarks:
|
|
463
|
+
columns.add_value(
|
|
464
|
+
benchmark.config.strategy.type_,
|
|
465
|
+
group="Benchmark",
|
|
466
|
+
name="Strategy",
|
|
467
|
+
type_="text",
|
|
468
|
+
)
|
|
469
|
+
columns.add_stats(
|
|
470
|
+
benchmark.metrics.requests_per_second,
|
|
471
|
+
group="Requests",
|
|
472
|
+
name="Per Sec",
|
|
473
|
+
types=("median", "mean"),
|
|
474
|
+
)
|
|
475
|
+
columns.add_stats(
|
|
476
|
+
benchmark.metrics.request_concurrency,
|
|
477
|
+
group="Requests",
|
|
478
|
+
name="Concurrency",
|
|
479
|
+
types=("median", "mean"),
|
|
480
|
+
)
|
|
481
|
+
columns.add_stats(
|
|
482
|
+
benchmark.metrics.prompt_tokens_per_second,
|
|
483
|
+
group="Input Tokens",
|
|
484
|
+
name="Per Sec",
|
|
485
|
+
types=("median", "mean"),
|
|
486
|
+
)
|
|
487
|
+
columns.add_stats(
|
|
488
|
+
benchmark.metrics.output_tokens_per_second,
|
|
489
|
+
group="Output Tokens",
|
|
490
|
+
name="Per Sec",
|
|
491
|
+
types=("median", "mean"),
|
|
492
|
+
)
|
|
493
|
+
columns.add_stats(
|
|
494
|
+
benchmark.metrics.tokens_per_second,
|
|
495
|
+
group="Total Tokens",
|
|
496
|
+
name="Per Sec",
|
|
497
|
+
types=("median", "mean"),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
headers, values = columns.get_table_data()
|
|
501
|
+
self.console.print("\n")
|
|
502
|
+
self.console.print_table(headers, values, title="Server Throughput Statistics")
|
|
503
|
+
|
|
504
|
+
def _print_modality_table(
|
|
505
|
+
self,
|
|
506
|
+
report: GenerativeBenchmarksReport,
|
|
507
|
+
modality: Literal["text", "image", "video", "audio"],
|
|
508
|
+
title: str,
|
|
509
|
+
metric_groups: list[tuple[str, str]],
|
|
510
|
+
):
|
|
511
|
+
columns: dict[str, ConsoleTableColumnsCollection] = defaultdict(
|
|
512
|
+
ConsoleTableColumnsCollection
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
for benchmark in report.benchmarks:
|
|
516
|
+
columns["labels"].add_value(
|
|
517
|
+
benchmark.config.strategy.type_,
|
|
518
|
+
group="Benchmark",
|
|
519
|
+
name="Strategy",
|
|
520
|
+
type_="text",
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
modality_metrics = getattr(benchmark.metrics, modality)
|
|
524
|
+
|
|
525
|
+
for metric_attr, display_name in metric_groups:
|
|
526
|
+
metric_obj = getattr(modality_metrics, metric_attr, None)
|
|
527
|
+
input_stats: StatusDistributionSummary | None = (
|
|
528
|
+
getattr(metric_obj, "input", None) if metric_obj else None
|
|
529
|
+
)
|
|
530
|
+
columns[f"{metric_attr}.input"].add_stats(
|
|
531
|
+
input_stats,
|
|
532
|
+
group=f"Input {display_name}",
|
|
533
|
+
name="Per Request",
|
|
534
|
+
)
|
|
535
|
+
input_per_second_stats: StatusDistributionSummary | None = (
|
|
536
|
+
getattr(metric_obj, "input_per_second", None)
|
|
537
|
+
if metric_obj
|
|
538
|
+
else None
|
|
539
|
+
)
|
|
540
|
+
columns[f"{metric_attr}.input"].add_stats(
|
|
541
|
+
input_per_second_stats,
|
|
542
|
+
group=f"Input {display_name}",
|
|
543
|
+
name="Per Second",
|
|
544
|
+
types=("median", "mean"),
|
|
545
|
+
)
|
|
546
|
+
output_stats: StatusDistributionSummary | None = (
|
|
547
|
+
getattr(metric_obj, "output", None) if metric_obj else None
|
|
548
|
+
)
|
|
549
|
+
columns[f"{metric_attr}.output"].add_stats(
|
|
550
|
+
output_stats,
|
|
551
|
+
group=f"Output {display_name}",
|
|
552
|
+
name="Per Request",
|
|
553
|
+
)
|
|
554
|
+
output_per_second_stats: StatusDistributionSummary | None = (
|
|
555
|
+
getattr(metric_obj, "output_per_second", None)
|
|
556
|
+
if metric_obj
|
|
557
|
+
else None
|
|
558
|
+
)
|
|
559
|
+
columns[f"{metric_attr}.output"].add_stats(
|
|
560
|
+
output_per_second_stats,
|
|
561
|
+
group=f"Output {display_name}",
|
|
562
|
+
name="Per Second",
|
|
563
|
+
types=("median", "mean"),
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
self._print_inp_out_tables(
|
|
567
|
+
title=title,
|
|
568
|
+
labels=columns["labels"],
|
|
569
|
+
groups=[
|
|
570
|
+
(columns[f"{metric_attr}.input"], columns[f"{metric_attr}.output"])
|
|
571
|
+
for metric_attr, _ in metric_groups
|
|
572
|
+
],
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
def _print_inp_out_tables(
|
|
576
|
+
self,
|
|
577
|
+
title: str,
|
|
578
|
+
labels: ConsoleTableColumnsCollection,
|
|
579
|
+
groups: list[
|
|
580
|
+
tuple[ConsoleTableColumnsCollection, ConsoleTableColumnsCollection]
|
|
581
|
+
],
|
|
582
|
+
):
|
|
583
|
+
input_headers, input_values = [], []
|
|
584
|
+
output_headers, output_values = [], []
|
|
585
|
+
input_has_data = False
|
|
586
|
+
output_has_data = False
|
|
587
|
+
|
|
588
|
+
for input_columns, output_columns in groups:
|
|
589
|
+
# Check if columns have any non-None values
|
|
590
|
+
type_input_has_data = any(
|
|
591
|
+
any(value is not None for value in column.values)
|
|
592
|
+
for column in input_columns.values()
|
|
593
|
+
)
|
|
594
|
+
type_output_has_data = any(
|
|
595
|
+
any(value is not None for value in column.values)
|
|
596
|
+
for column in output_columns.values()
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
if not (type_input_has_data or type_output_has_data):
|
|
600
|
+
continue
|
|
601
|
+
|
|
602
|
+
input_has_data = input_has_data or type_input_has_data
|
|
603
|
+
output_has_data = output_has_data or type_output_has_data
|
|
604
|
+
|
|
605
|
+
input_type_headers, input_type_columns = input_columns.get_table_data()
|
|
606
|
+
output_type_headers, output_type_columns = output_columns.get_table_data()
|
|
607
|
+
|
|
608
|
+
input_headers.extend(input_type_headers)
|
|
609
|
+
input_values.extend(input_type_columns)
|
|
610
|
+
output_headers.extend(output_type_headers)
|
|
611
|
+
output_values.extend(output_type_columns)
|
|
612
|
+
|
|
613
|
+
if not (input_has_data or output_has_data):
|
|
614
|
+
return
|
|
615
|
+
|
|
616
|
+
labels_headers, labels_values = labels.get_table_data()
|
|
617
|
+
header_cols_groups = []
|
|
618
|
+
value_cols_groups = []
|
|
619
|
+
|
|
620
|
+
if input_has_data:
|
|
621
|
+
header_cols_groups.append(labels_headers + input_headers)
|
|
622
|
+
value_cols_groups.append(labels_values + input_values)
|
|
623
|
+
if output_has_data:
|
|
624
|
+
header_cols_groups.append(labels_headers + output_headers)
|
|
625
|
+
value_cols_groups.append(labels_values + output_values)
|
|
626
|
+
|
|
627
|
+
if header_cols_groups and value_cols_groups:
|
|
628
|
+
self.console.print("\n")
|
|
629
|
+
self.console.print_tables(
|
|
630
|
+
header_cols_groups=header_cols_groups,
|
|
631
|
+
value_cols_groups=value_cols_groups,
|
|
632
|
+
title=title,
|
|
633
|
+
)
|