guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,927 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics schemas for generative AI benchmark results and performance analysis.
|
|
3
|
+
|
|
4
|
+
This module defines comprehensive metric structures for tracking and analyzing
|
|
5
|
+
generative AI benchmark performance across multiple dimensions including request
|
|
6
|
+
statistics, token metrics, and domain-specific measurements for text, image, video,
|
|
7
|
+
and audio generation. It provides statistical summaries with distribution analysis
|
|
8
|
+
across successful, incomplete, and errored requests, along with scheduler-level
|
|
9
|
+
performance metrics for request processing and queueing behavior.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Literal
|
|
15
|
+
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
|
|
18
|
+
from guidellm.benchmark.schemas.generative.accumulator import (
|
|
19
|
+
GenerativeBenchmarkAccumulator,
|
|
20
|
+
)
|
|
21
|
+
from guidellm.scheduler import SchedulerState
|
|
22
|
+
from guidellm.schemas import (
|
|
23
|
+
GenerativeRequestStats,
|
|
24
|
+
StandardBaseDict,
|
|
25
|
+
StatusBreakdown,
|
|
26
|
+
StatusDistributionSummary,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"GenerativeAudioMetricsSummary",
|
|
31
|
+
"GenerativeImageMetricsSummary",
|
|
32
|
+
"GenerativeMetrics",
|
|
33
|
+
"GenerativeMetricsSummary",
|
|
34
|
+
"GenerativeTextMetricsSummary",
|
|
35
|
+
"GenerativeVideoMetricsSummary",
|
|
36
|
+
"SchedulerMetrics",
|
|
37
|
+
"StatusTypes",
|
|
38
|
+
"TimedMetricTypeAlias",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
TimedMetricTypeAlias = (
|
|
43
|
+
tuple[float, float, int | float | None, int | float | None] | None
|
|
44
|
+
)
|
|
45
|
+
"""Timed metric tuple containing start_time, end_time, input_value, and output_value."""
|
|
46
|
+
|
|
47
|
+
StatusTypes = Literal["successful", "incomplete", "errored"]
|
|
48
|
+
"""Request status category for metric compilation."""
|
|
49
|
+
|
|
50
|
+
# Constants for tuple indexing
|
|
51
|
+
_TIMED_METRIC_START_TIME_INDEX = 0
|
|
52
|
+
_TIMED_METRIC_END_TIME_INDEX = 1
|
|
53
|
+
_TIMED_METRIC_INPUT_VALUE_INDEX = 2
|
|
54
|
+
_TIMED_METRIC_OUTPUT_VALUE_INDEX = 3
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class SchedulerMetrics(StandardBaseDict):
|
|
58
|
+
"""
|
|
59
|
+
Scheduler timing and performance statistics.
|
|
60
|
+
|
|
61
|
+
Tracks overall benchmark timing, request counts by status, and detailed internal
|
|
62
|
+
scheduler performance metrics including queue times, processing delays, and
|
|
63
|
+
request execution statistics. Used to analyze scheduler efficiency and identify
|
|
64
|
+
bottlenecks in request processing pipelines.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# Overall timings for the scheduler
|
|
68
|
+
start_time: float = Field(
|
|
69
|
+
description="Unix timestamp when the benchmark run started"
|
|
70
|
+
)
|
|
71
|
+
request_start_time: float = Field(
|
|
72
|
+
description="Unix timestamp when first request was made"
|
|
73
|
+
)
|
|
74
|
+
measure_start_time: float = Field(
|
|
75
|
+
description="Unix timestamp when measurement period started"
|
|
76
|
+
)
|
|
77
|
+
measure_end_time: float = Field(
|
|
78
|
+
description="Unix timestamp when measurement period ended"
|
|
79
|
+
)
|
|
80
|
+
request_end_time: float = Field(
|
|
81
|
+
description="Unix timestamp when last request completed"
|
|
82
|
+
)
|
|
83
|
+
end_time: float = Field(description="Unix timestamp when the benchmark run ended")
|
|
84
|
+
|
|
85
|
+
# Request details tracked by the scheduler
|
|
86
|
+
requests_made: StatusBreakdown[int, int, int, int] = Field(
|
|
87
|
+
description="Request counts by status: successful, incomplete, errored, total"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Scheduler internal performance timings
|
|
91
|
+
queued_time_avg: float = Field(
|
|
92
|
+
description="Avg time requests spent in the queue (seconds)"
|
|
93
|
+
)
|
|
94
|
+
resolve_start_delay_avg: float = Field(
|
|
95
|
+
description="Avg delay before worker begins resolving req after dequeue (sec)"
|
|
96
|
+
)
|
|
97
|
+
resolve_targeted_start_delay_avg: float = Field(
|
|
98
|
+
description="Avg delay to targeted resolve start time (seconds)"
|
|
99
|
+
)
|
|
100
|
+
request_start_delay_avg: float = Field(
|
|
101
|
+
description="Avg delay before request starts after resolve (seconds)"
|
|
102
|
+
)
|
|
103
|
+
request_targeted_start_delay_avg: float = Field(
|
|
104
|
+
description="Avg delay to targeted request start time (seconds)"
|
|
105
|
+
)
|
|
106
|
+
request_time_avg: float = Field(description="Avg request execution time (seconds)")
|
|
107
|
+
resolve_end_delay_avg: float = Field(
|
|
108
|
+
description="Avg delay after request completes before resolve ends (seconds)"
|
|
109
|
+
)
|
|
110
|
+
resolve_time_avg: float = Field(
|
|
111
|
+
description="Avg total resolve time including request (seconds)"
|
|
112
|
+
)
|
|
113
|
+
finalized_delay_avg: float = Field(
|
|
114
|
+
description="Avg delay from resolve end to request finalization (seconds)"
|
|
115
|
+
)
|
|
116
|
+
processed_delay_avg: float = Field(
|
|
117
|
+
description="Avg delay from finalization to processing completion (seconds)"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def compile(
|
|
122
|
+
cls,
|
|
123
|
+
accumulator: GenerativeBenchmarkAccumulator,
|
|
124
|
+
scheduler_state: SchedulerState,
|
|
125
|
+
) -> SchedulerMetrics:
|
|
126
|
+
"""
|
|
127
|
+
Compile scheduler metrics from accumulator and scheduler state.
|
|
128
|
+
|
|
129
|
+
:param accumulator: Benchmark accumulator containing timing and metric data
|
|
130
|
+
:param scheduler_state: Scheduler state with execution timing information
|
|
131
|
+
:return: Compiled scheduler metrics with performance statistics
|
|
132
|
+
"""
|
|
133
|
+
return SchedulerMetrics(
|
|
134
|
+
# Overall timings for the scheduler
|
|
135
|
+
start_time=scheduler_state.start_time,
|
|
136
|
+
request_start_time=accumulator.timings.finalized_request_start,
|
|
137
|
+
measure_start_time=accumulator.timings.finalized_measure_start,
|
|
138
|
+
measure_end_time=accumulator.timings.finalized_measure_end,
|
|
139
|
+
request_end_time=accumulator.timings.finalized_request_end,
|
|
140
|
+
end_time=scheduler_state.end_time or -1.0,
|
|
141
|
+
# Request details tracked by the scheduler
|
|
142
|
+
requests_made=accumulator.scheduler_metrics.requests_made,
|
|
143
|
+
# Scheduler internal performance timings
|
|
144
|
+
queued_time_avg=accumulator.scheduler_metrics.queued_time.mean or -1.0,
|
|
145
|
+
resolve_start_delay_avg=(
|
|
146
|
+
accumulator.scheduler_metrics.resolve_start_delay.mean or -1.0
|
|
147
|
+
),
|
|
148
|
+
resolve_targeted_start_delay_avg=(
|
|
149
|
+
accumulator.scheduler_metrics.resolve_targeted_start_delay.mean or -1.0
|
|
150
|
+
),
|
|
151
|
+
request_start_delay_avg=(
|
|
152
|
+
accumulator.scheduler_metrics.request_start_delay.mean or -1.0
|
|
153
|
+
),
|
|
154
|
+
request_targeted_start_delay_avg=(
|
|
155
|
+
accumulator.scheduler_metrics.request_targeted_start_delay.mean or -1.0
|
|
156
|
+
),
|
|
157
|
+
request_time_avg=accumulator.scheduler_metrics.request_time.mean or -1.0,
|
|
158
|
+
resolve_end_delay_avg=(
|
|
159
|
+
accumulator.scheduler_metrics.resolve_end_delay.mean or -1.0
|
|
160
|
+
),
|
|
161
|
+
resolve_time_avg=accumulator.scheduler_metrics.resolve_time.mean or -1.0,
|
|
162
|
+
finalized_delay_avg=(
|
|
163
|
+
accumulator.scheduler_metrics.finalized_delay.mean or -1.0
|
|
164
|
+
),
|
|
165
|
+
processed_delay_avg=(
|
|
166
|
+
accumulator.scheduler_metrics.processed_delay.mean or -1.0
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class GenerativeMetricsSummary(StandardBaseDict):
|
|
172
|
+
"""
|
|
173
|
+
Statistical summaries for input, output, and total metrics.
|
|
174
|
+
|
|
175
|
+
Provides distribution summaries across successful, incomplete, and errored
|
|
176
|
+
requests for absolute values, per-second rates, and concurrency levels.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
input: StatusDistributionSummary | None = Field(
|
|
180
|
+
description="Distribution of input metric values"
|
|
181
|
+
)
|
|
182
|
+
input_per_second: StatusDistributionSummary | None = Field(
|
|
183
|
+
description="Distribution of input metric rates per second"
|
|
184
|
+
)
|
|
185
|
+
input_concurrency: StatusDistributionSummary | None = Field(
|
|
186
|
+
description="Distribution of concurrent input metric values"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
output: StatusDistributionSummary | None = Field(
|
|
190
|
+
description="Distribution of output metric values"
|
|
191
|
+
)
|
|
192
|
+
output_per_second: StatusDistributionSummary | None = Field(
|
|
193
|
+
description="Distribution of output metric rates per second"
|
|
194
|
+
)
|
|
195
|
+
output_concurrency: StatusDistributionSummary | None = Field(
|
|
196
|
+
description="Distribution of concurrent output metric values"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
total: StatusDistributionSummary | None = Field(
|
|
200
|
+
description="Distribution of total metric values (input + output)"
|
|
201
|
+
)
|
|
202
|
+
total_per_second: StatusDistributionSummary | None = Field(
|
|
203
|
+
description="Distribution of total metric rates per second"
|
|
204
|
+
)
|
|
205
|
+
total_concurrency: StatusDistributionSummary | None = Field(
|
|
206
|
+
description="Distribution of concurrent total metric values"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
@classmethod
|
|
210
|
+
def compile(
|
|
211
|
+
cls,
|
|
212
|
+
property_name: str,
|
|
213
|
+
successful: list[GenerativeRequestStats],
|
|
214
|
+
incomplete: list[GenerativeRequestStats],
|
|
215
|
+
errored: list[GenerativeRequestStats],
|
|
216
|
+
) -> GenerativeMetricsSummary | None:
|
|
217
|
+
"""
|
|
218
|
+
Compile metrics summary from request statistics for a specific property.
|
|
219
|
+
|
|
220
|
+
:param property_name: Name of the property to extract from request metrics
|
|
221
|
+
:param successful: Successfully completed request statistics
|
|
222
|
+
:param incomplete: Incomplete or cancelled request statistics
|
|
223
|
+
:param errored: Failed request statistics
|
|
224
|
+
:return: Compiled metrics summary or None if no data available
|
|
225
|
+
"""
|
|
226
|
+
successful_metrics = cls.extract_property_metrics_for_summary(
|
|
227
|
+
successful, property_name
|
|
228
|
+
)
|
|
229
|
+
incomplete_metrics = cls.extract_property_metrics_for_summary(
|
|
230
|
+
incomplete, property_name
|
|
231
|
+
)
|
|
232
|
+
errored_metrics = cls.extract_property_metrics_for_summary(
|
|
233
|
+
errored, property_name
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return cls.compile_timed_metrics(
|
|
237
|
+
successful=successful_metrics,
|
|
238
|
+
incomplete=incomplete_metrics,
|
|
239
|
+
errored=errored_metrics,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def compile_timed_metrics(
|
|
244
|
+
cls,
|
|
245
|
+
successful: list[TimedMetricTypeAlias],
|
|
246
|
+
incomplete: list[TimedMetricTypeAlias],
|
|
247
|
+
errored: list[TimedMetricTypeAlias],
|
|
248
|
+
) -> GenerativeMetricsSummary | None:
|
|
249
|
+
"""
|
|
250
|
+
Compile metrics summary from timed metric tuples.
|
|
251
|
+
|
|
252
|
+
:param successful: Timed metrics from successful requests
|
|
253
|
+
:param incomplete: Timed metrics from incomplete requests
|
|
254
|
+
:param errored: Timed metrics from errored requests
|
|
255
|
+
:return: Compiled metrics summary or None if no data available
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
def _compile_metric_distributions(
|
|
259
|
+
metrics_by_status: dict[StatusTypes, list[TimedMetricTypeAlias]],
|
|
260
|
+
value_index: int,
|
|
261
|
+
) -> tuple[
|
|
262
|
+
StatusDistributionSummary | None,
|
|
263
|
+
StatusDistributionSummary | None,
|
|
264
|
+
StatusDistributionSummary | None,
|
|
265
|
+
dict[StatusTypes, list[float]],
|
|
266
|
+
dict[StatusTypes, list[tuple[float, float]]],
|
|
267
|
+
dict[StatusTypes, list[tuple[float, float, float]]],
|
|
268
|
+
]:
|
|
269
|
+
"""Helper to compile value, rate, and concurrency distributions."""
|
|
270
|
+
value_lists: dict[StatusTypes, list[float]] = {
|
|
271
|
+
status: [
|
|
272
|
+
float(metric[value_index] or 0.0)
|
|
273
|
+
for metric in metrics
|
|
274
|
+
if metric is not None
|
|
275
|
+
]
|
|
276
|
+
for status, metrics in metrics_by_status.items()
|
|
277
|
+
}
|
|
278
|
+
value_dist = StatusDistributionSummary.from_values(
|
|
279
|
+
successful=value_lists["successful"],
|
|
280
|
+
incomplete=value_lists["incomplete"],
|
|
281
|
+
errored=value_lists["errored"],
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if value_dist.total_sum == 0.0:
|
|
285
|
+
return None, None, None, value_lists, {}, {}
|
|
286
|
+
|
|
287
|
+
rate_lists: dict[StatusTypes, list[tuple[float, float]]] = {
|
|
288
|
+
status: [
|
|
289
|
+
( # type: ignore[misc]
|
|
290
|
+
metric[_TIMED_METRIC_END_TIME_INDEX],
|
|
291
|
+
float(metric[value_index] or 0.0),
|
|
292
|
+
)
|
|
293
|
+
for metric in metrics
|
|
294
|
+
if metric is not None
|
|
295
|
+
]
|
|
296
|
+
for status, metrics in metrics_by_status.items()
|
|
297
|
+
}
|
|
298
|
+
rate_dist = StatusDistributionSummary.rate_distribution_from_timings(
|
|
299
|
+
successful=rate_lists["successful"],
|
|
300
|
+
incomplete=rate_lists["incomplete"],
|
|
301
|
+
errored=rate_lists["errored"],
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
concurrency_lists: dict[StatusTypes, list[tuple[float, float, float]]] = {
|
|
305
|
+
status: [
|
|
306
|
+
( # type: ignore[misc]
|
|
307
|
+
metric[_TIMED_METRIC_START_TIME_INDEX],
|
|
308
|
+
metric[_TIMED_METRIC_END_TIME_INDEX],
|
|
309
|
+
float(metric[value_index] or 0.0),
|
|
310
|
+
)
|
|
311
|
+
for metric in metrics
|
|
312
|
+
if metric is not None
|
|
313
|
+
]
|
|
314
|
+
for status, metrics in metrics_by_status.items()
|
|
315
|
+
}
|
|
316
|
+
concurrency_dist = (
|
|
317
|
+
StatusDistributionSummary.concurrency_distribution_from_timings(
|
|
318
|
+
successful=concurrency_lists["successful"],
|
|
319
|
+
incomplete=concurrency_lists["incomplete"],
|
|
320
|
+
errored=concurrency_lists["errored"],
|
|
321
|
+
)
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return (
|
|
325
|
+
value_dist,
|
|
326
|
+
rate_dist,
|
|
327
|
+
concurrency_dist,
|
|
328
|
+
value_lists,
|
|
329
|
+
rate_lists,
|
|
330
|
+
concurrency_lists,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
metrics_by_status: dict[StatusTypes, list[TimedMetricTypeAlias]] = {
|
|
334
|
+
"successful": successful,
|
|
335
|
+
"incomplete": incomplete,
|
|
336
|
+
"errored": errored,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
# Calculate input distributions
|
|
340
|
+
(
|
|
341
|
+
input_value_dist,
|
|
342
|
+
input_rate_dist,
|
|
343
|
+
input_concurrency_dist,
|
|
344
|
+
input_value_lists,
|
|
345
|
+
input_rate_lists,
|
|
346
|
+
input_concurrency_lists,
|
|
347
|
+
) = _compile_metric_distributions(
|
|
348
|
+
metrics_by_status, _TIMED_METRIC_INPUT_VALUE_INDEX
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Calculate output distributions
|
|
352
|
+
(
|
|
353
|
+
output_value_dist,
|
|
354
|
+
output_rate_dist,
|
|
355
|
+
output_concurrency_dist,
|
|
356
|
+
output_value_lists,
|
|
357
|
+
output_rate_lists,
|
|
358
|
+
output_concurrency_lists,
|
|
359
|
+
) = _compile_metric_distributions(
|
|
360
|
+
metrics_by_status, _TIMED_METRIC_OUTPUT_VALUE_INDEX
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Calculate total distributions if both input and output have data
|
|
364
|
+
if input_value_dist is not None and output_value_dist is not None:
|
|
365
|
+
total_value_dist = StatusDistributionSummary.from_values(
|
|
366
|
+
successful=(
|
|
367
|
+
input_value_lists["successful"] + output_value_lists["successful"]
|
|
368
|
+
),
|
|
369
|
+
incomplete=(
|
|
370
|
+
input_value_lists["incomplete"] + output_value_lists["incomplete"]
|
|
371
|
+
),
|
|
372
|
+
errored=input_value_lists["errored"] + output_value_lists["errored"],
|
|
373
|
+
)
|
|
374
|
+
total_rate_dist = StatusDistributionSummary.rate_distribution_from_timings(
|
|
375
|
+
successful=(
|
|
376
|
+
input_rate_lists["successful"] + output_rate_lists["successful"]
|
|
377
|
+
),
|
|
378
|
+
incomplete=(
|
|
379
|
+
input_rate_lists["incomplete"] + output_rate_lists["incomplete"]
|
|
380
|
+
),
|
|
381
|
+
errored=input_rate_lists["errored"] + output_rate_lists["errored"],
|
|
382
|
+
)
|
|
383
|
+
total_concurrency_dist = (
|
|
384
|
+
StatusDistributionSummary.concurrency_distribution_from_timings(
|
|
385
|
+
successful=(
|
|
386
|
+
input_concurrency_lists["successful"]
|
|
387
|
+
+ output_concurrency_lists["successful"]
|
|
388
|
+
),
|
|
389
|
+
incomplete=(
|
|
390
|
+
input_concurrency_lists["incomplete"]
|
|
391
|
+
+ output_concurrency_lists["incomplete"]
|
|
392
|
+
),
|
|
393
|
+
errored=(
|
|
394
|
+
input_concurrency_lists["errored"]
|
|
395
|
+
+ output_concurrency_lists["errored"]
|
|
396
|
+
),
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
else:
|
|
400
|
+
total_value_dist = None
|
|
401
|
+
total_rate_dist = None
|
|
402
|
+
total_concurrency_dist = None
|
|
403
|
+
|
|
404
|
+
return GenerativeMetricsSummary(
|
|
405
|
+
input=input_value_dist,
|
|
406
|
+
input_per_second=input_rate_dist,
|
|
407
|
+
input_concurrency=input_concurrency_dist,
|
|
408
|
+
output=output_value_dist,
|
|
409
|
+
output_per_second=output_rate_dist,
|
|
410
|
+
output_concurrency=output_concurrency_dist,
|
|
411
|
+
total=total_value_dist,
|
|
412
|
+
total_per_second=total_rate_dist,
|
|
413
|
+
total_concurrency=total_concurrency_dist,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
@classmethod
|
|
417
|
+
def extract_property_metrics_for_summary(
|
|
418
|
+
cls, stats_list: list[GenerativeRequestStats], property_name: str
|
|
419
|
+
) -> list[TimedMetricTypeAlias]:
|
|
420
|
+
"""
|
|
421
|
+
Extract timed metrics for a specific property from request statistics.
|
|
422
|
+
|
|
423
|
+
:param stats_list: List of request statistics to extract from
|
|
424
|
+
:param property_name: Name of the property to extract from metrics
|
|
425
|
+
:return: List of tuples containing
|
|
426
|
+
(start_time, end_time, input_value, output_value)
|
|
427
|
+
"""
|
|
428
|
+
return [
|
|
429
|
+
(
|
|
430
|
+
stats.request_start_time,
|
|
431
|
+
stats.request_end_time,
|
|
432
|
+
getattr(stats.input_metrics, property_name),
|
|
433
|
+
getattr(stats.output_metrics, property_name),
|
|
434
|
+
)
|
|
435
|
+
for stats in stats_list
|
|
436
|
+
if (
|
|
437
|
+
stats.request_start_time
|
|
438
|
+
and stats.request_end_time
|
|
439
|
+
and (
|
|
440
|
+
getattr(stats.input_metrics, property_name) is not None
|
|
441
|
+
or getattr(stats.output_metrics, property_name) is not None
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
]
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class GenerativeTextMetricsSummary(StandardBaseDict):
|
|
448
|
+
"""
|
|
449
|
+
Text-specific metric summaries for generative benchmarks.
|
|
450
|
+
|
|
451
|
+
Tracks token, word, and character-level metrics across input, output, and
|
|
452
|
+
total usage for text generation workloads.
|
|
453
|
+
"""
|
|
454
|
+
|
|
455
|
+
tokens: GenerativeMetricsSummary | None = Field(
|
|
456
|
+
description="Token count metrics and distributions"
|
|
457
|
+
)
|
|
458
|
+
words: GenerativeMetricsSummary | None = Field(
|
|
459
|
+
description="Word count metrics and distributions"
|
|
460
|
+
)
|
|
461
|
+
characters: GenerativeMetricsSummary | None = Field(
|
|
462
|
+
description="Character count metrics and distributions"
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
@classmethod
|
|
466
|
+
def compile(
|
|
467
|
+
cls,
|
|
468
|
+
successful: list[GenerativeRequestStats],
|
|
469
|
+
incomplete: list[GenerativeRequestStats],
|
|
470
|
+
errored: list[GenerativeRequestStats],
|
|
471
|
+
) -> GenerativeTextMetricsSummary:
|
|
472
|
+
"""
|
|
473
|
+
Compile text metrics summary from request statistics.
|
|
474
|
+
|
|
475
|
+
:param successful: Successfully completed request statistics
|
|
476
|
+
:param incomplete: Incomplete/cancelled request statistics
|
|
477
|
+
:param errored: Failed request statistics
|
|
478
|
+
:return: Compiled text metrics summary
|
|
479
|
+
"""
|
|
480
|
+
return GenerativeTextMetricsSummary(
|
|
481
|
+
tokens=GenerativeMetricsSummary.compile(
|
|
482
|
+
property_name="text_tokens",
|
|
483
|
+
successful=successful,
|
|
484
|
+
incomplete=incomplete,
|
|
485
|
+
errored=errored,
|
|
486
|
+
),
|
|
487
|
+
words=GenerativeMetricsSummary.compile(
|
|
488
|
+
property_name="text_words",
|
|
489
|
+
successful=successful,
|
|
490
|
+
incomplete=incomplete,
|
|
491
|
+
errored=errored,
|
|
492
|
+
),
|
|
493
|
+
characters=GenerativeMetricsSummary.compile(
|
|
494
|
+
property_name="text_characters",
|
|
495
|
+
successful=successful,
|
|
496
|
+
incomplete=incomplete,
|
|
497
|
+
errored=errored,
|
|
498
|
+
),
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
class GenerativeImageMetricsSummary(StandardBaseDict):
|
|
503
|
+
"""
|
|
504
|
+
Image-specific metric summaries for generative benchmarks.
|
|
505
|
+
|
|
506
|
+
Tracks token, image count, pixel, and byte-level metrics across input, output,
|
|
507
|
+
and total usage for image generation workloads.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
tokens: GenerativeMetricsSummary | None = Field(
|
|
511
|
+
description="Image token count metrics and distributions"
|
|
512
|
+
)
|
|
513
|
+
images: GenerativeMetricsSummary | None = Field(
|
|
514
|
+
description="Image count metrics and distributions"
|
|
515
|
+
)
|
|
516
|
+
pixels: GenerativeMetricsSummary | None = Field(
|
|
517
|
+
description="Pixel count metrics and distributions"
|
|
518
|
+
)
|
|
519
|
+
bytes: GenerativeMetricsSummary | None = Field(
|
|
520
|
+
description="Byte size metrics and distributions"
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
@classmethod
|
|
524
|
+
def compile(
|
|
525
|
+
cls,
|
|
526
|
+
successful: list[GenerativeRequestStats],
|
|
527
|
+
incomplete: list[GenerativeRequestStats],
|
|
528
|
+
errored: list[GenerativeRequestStats],
|
|
529
|
+
) -> GenerativeImageMetricsSummary:
|
|
530
|
+
"""
|
|
531
|
+
Compile image metrics summary from request statistics.
|
|
532
|
+
|
|
533
|
+
:param successful: Successfully completed request statistics
|
|
534
|
+
:param incomplete: Incomplete/cancelled request statistics
|
|
535
|
+
:param errored: Failed request statistics
|
|
536
|
+
:return: Compiled image metrics summary
|
|
537
|
+
"""
|
|
538
|
+
return GenerativeImageMetricsSummary(
|
|
539
|
+
tokens=GenerativeMetricsSummary.compile(
|
|
540
|
+
property_name="image_tokens",
|
|
541
|
+
successful=successful,
|
|
542
|
+
incomplete=incomplete,
|
|
543
|
+
errored=errored,
|
|
544
|
+
),
|
|
545
|
+
images=GenerativeMetricsSummary.compile(
|
|
546
|
+
property_name="image_count",
|
|
547
|
+
successful=successful,
|
|
548
|
+
incomplete=incomplete,
|
|
549
|
+
errored=errored,
|
|
550
|
+
),
|
|
551
|
+
pixels=GenerativeMetricsSummary.compile(
|
|
552
|
+
property_name="image_pixels",
|
|
553
|
+
successful=successful,
|
|
554
|
+
incomplete=incomplete,
|
|
555
|
+
errored=errored,
|
|
556
|
+
),
|
|
557
|
+
bytes=GenerativeMetricsSummary.compile(
|
|
558
|
+
property_name="image_bytes",
|
|
559
|
+
successful=successful,
|
|
560
|
+
incomplete=incomplete,
|
|
561
|
+
errored=errored,
|
|
562
|
+
),
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
class GenerativeVideoMetricsSummary(StandardBaseDict):
|
|
567
|
+
"""
|
|
568
|
+
Video-specific metric summaries for generative benchmarks.
|
|
569
|
+
|
|
570
|
+
Tracks token, frame count, duration, and byte-level metrics across input,
|
|
571
|
+
output, and total usage for video generation workloads.
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
tokens: GenerativeMetricsSummary | None = Field(
|
|
575
|
+
description="Video token count metrics and distributions"
|
|
576
|
+
)
|
|
577
|
+
frames: GenerativeMetricsSummary | None = Field(
|
|
578
|
+
description="Frame count metrics and distributions"
|
|
579
|
+
)
|
|
580
|
+
seconds: GenerativeMetricsSummary | None = Field(
|
|
581
|
+
description="Duration metrics in seconds and distributions"
|
|
582
|
+
)
|
|
583
|
+
bytes: GenerativeMetricsSummary | None = Field(
|
|
584
|
+
description="Byte size metrics and distributions"
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
@classmethod
|
|
588
|
+
def compile(
|
|
589
|
+
cls,
|
|
590
|
+
successful: list[GenerativeRequestStats],
|
|
591
|
+
incomplete: list[GenerativeRequestStats],
|
|
592
|
+
errored: list[GenerativeRequestStats],
|
|
593
|
+
) -> GenerativeVideoMetricsSummary:
|
|
594
|
+
"""
|
|
595
|
+
Compile video metrics summary from request statistics.
|
|
596
|
+
|
|
597
|
+
:param successful: Successfully completed request statistics
|
|
598
|
+
:param incomplete: Incomplete/cancelled request statistics
|
|
599
|
+
:param errored: Failed request statistics
|
|
600
|
+
:return: Compiled video metrics summary
|
|
601
|
+
"""
|
|
602
|
+
return GenerativeVideoMetricsSummary(
|
|
603
|
+
tokens=GenerativeMetricsSummary.compile(
|
|
604
|
+
property_name="video_tokens",
|
|
605
|
+
successful=successful,
|
|
606
|
+
incomplete=incomplete,
|
|
607
|
+
errored=errored,
|
|
608
|
+
),
|
|
609
|
+
frames=GenerativeMetricsSummary.compile(
|
|
610
|
+
property_name="video_frames",
|
|
611
|
+
successful=successful,
|
|
612
|
+
incomplete=incomplete,
|
|
613
|
+
errored=errored,
|
|
614
|
+
),
|
|
615
|
+
seconds=GenerativeMetricsSummary.compile(
|
|
616
|
+
property_name="video_seconds",
|
|
617
|
+
successful=successful,
|
|
618
|
+
incomplete=incomplete,
|
|
619
|
+
errored=errored,
|
|
620
|
+
),
|
|
621
|
+
bytes=GenerativeMetricsSummary.compile(
|
|
622
|
+
property_name="video_bytes",
|
|
623
|
+
successful=successful,
|
|
624
|
+
incomplete=incomplete,
|
|
625
|
+
errored=errored,
|
|
626
|
+
),
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class GenerativeAudioMetricsSummary(StandardBaseDict):
|
|
631
|
+
"""
|
|
632
|
+
Audio-specific metric summaries for generative benchmarks.
|
|
633
|
+
|
|
634
|
+
Tracks token, sample count, duration, and byte-level metrics across input,
|
|
635
|
+
output, and total usage for audio generation workloads.
|
|
636
|
+
"""
|
|
637
|
+
|
|
638
|
+
tokens: GenerativeMetricsSummary | None = Field(
|
|
639
|
+
description="Audio token count metrics and distributions"
|
|
640
|
+
)
|
|
641
|
+
samples: GenerativeMetricsSummary | None = Field(
|
|
642
|
+
description="Sample count metrics and distributions"
|
|
643
|
+
)
|
|
644
|
+
seconds: GenerativeMetricsSummary | None = Field(
|
|
645
|
+
description="Duration metrics in seconds and distributions"
|
|
646
|
+
)
|
|
647
|
+
bytes: GenerativeMetricsSummary | None = Field(
|
|
648
|
+
description="Byte size metrics and distributions"
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
@classmethod
|
|
652
|
+
def compile(
|
|
653
|
+
cls,
|
|
654
|
+
successful: list[GenerativeRequestStats],
|
|
655
|
+
incomplete: list[GenerativeRequestStats],
|
|
656
|
+
errored: list[GenerativeRequestStats],
|
|
657
|
+
) -> GenerativeAudioMetricsSummary:
|
|
658
|
+
"""
|
|
659
|
+
Compile audio metrics summary from request statistics.
|
|
660
|
+
|
|
661
|
+
:param successful: Successfully completed request statistics
|
|
662
|
+
:param incomplete: Incomplete/cancelled request statistics
|
|
663
|
+
:param errored: Failed request statistics
|
|
664
|
+
:return: Compiled audio metrics summary
|
|
665
|
+
"""
|
|
666
|
+
return GenerativeAudioMetricsSummary(
|
|
667
|
+
tokens=GenerativeMetricsSummary.compile(
|
|
668
|
+
property_name="audio_tokens",
|
|
669
|
+
successful=successful,
|
|
670
|
+
incomplete=incomplete,
|
|
671
|
+
errored=errored,
|
|
672
|
+
),
|
|
673
|
+
samples=GenerativeMetricsSummary.compile(
|
|
674
|
+
property_name="audio_samples",
|
|
675
|
+
successful=successful,
|
|
676
|
+
incomplete=incomplete,
|
|
677
|
+
errored=errored,
|
|
678
|
+
),
|
|
679
|
+
seconds=GenerativeMetricsSummary.compile(
|
|
680
|
+
property_name="audio_seconds",
|
|
681
|
+
successful=successful,
|
|
682
|
+
incomplete=incomplete,
|
|
683
|
+
errored=errored,
|
|
684
|
+
),
|
|
685
|
+
bytes=GenerativeMetricsSummary.compile(
|
|
686
|
+
property_name="audio_bytes",
|
|
687
|
+
successful=successful,
|
|
688
|
+
incomplete=incomplete,
|
|
689
|
+
errored=errored,
|
|
690
|
+
),
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
class GenerativeMetrics(StandardBaseDict):
|
|
695
|
+
"""
|
|
696
|
+
Comprehensive metrics for generative AI benchmarks.
|
|
697
|
+
|
|
698
|
+
Aggregates request statistics, token metrics, timing distributions, and
|
|
699
|
+
domain-specific measurements across text, image, video, and audio modalities.
|
|
700
|
+
Provides detailed statistical summaries including distribution analysis for
|
|
701
|
+
throughput, latency, concurrency, and resource utilization metrics across
|
|
702
|
+
successful, incomplete, and errored requests.
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
# Request stats
|
|
706
|
+
request_totals: StatusBreakdown[int, int, int, int] = Field(
|
|
707
|
+
description="Request counts by status: successful, incomplete, errored, total"
|
|
708
|
+
)
|
|
709
|
+
requests_per_second: StatusDistributionSummary = Field(
|
|
710
|
+
description="Distribution of requests per second across benchmark execution"
|
|
711
|
+
)
|
|
712
|
+
request_concurrency: StatusDistributionSummary = Field(
|
|
713
|
+
description="Distribution of concurrent request counts during execution"
|
|
714
|
+
)
|
|
715
|
+
request_latency: StatusDistributionSummary = Field(
|
|
716
|
+
description="Distribution of request latencies for completed requests"
|
|
717
|
+
)
|
|
718
|
+
request_streaming_iterations_count: StatusDistributionSummary = Field(
|
|
719
|
+
description="Distribution of stream iterations for completed requests"
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
# General token stats
|
|
723
|
+
prompt_token_count: StatusDistributionSummary = Field(
|
|
724
|
+
description="Distribution of prompt token counts by request status"
|
|
725
|
+
)
|
|
726
|
+
output_token_count: StatusDistributionSummary = Field(
|
|
727
|
+
description="Distribution of output token counts by request status"
|
|
728
|
+
)
|
|
729
|
+
total_token_count: StatusDistributionSummary = Field(
|
|
730
|
+
description="Distribution of total token counts by request status"
|
|
731
|
+
)
|
|
732
|
+
time_to_first_token_ms: StatusDistributionSummary = Field(
|
|
733
|
+
description="Distribution of first token latencies in milliseconds"
|
|
734
|
+
)
|
|
735
|
+
time_per_output_token_ms: StatusDistributionSummary = Field(
|
|
736
|
+
description="Distribution of average time per output token in milliseconds"
|
|
737
|
+
)
|
|
738
|
+
inter_token_latency_ms: StatusDistributionSummary = Field(
|
|
739
|
+
description="Distribution of inter-token latencies in milliseconds"
|
|
740
|
+
)
|
|
741
|
+
prompt_tokens_per_second: StatusDistributionSummary = Field(
|
|
742
|
+
description="Distribution of prompt token processing rates"
|
|
743
|
+
)
|
|
744
|
+
output_tokens_per_second: StatusDistributionSummary = Field(
|
|
745
|
+
description="Distribution of output token generation rates"
|
|
746
|
+
)
|
|
747
|
+
tokens_per_second: StatusDistributionSummary = Field(
|
|
748
|
+
description="Distribution of total token throughput including prompt and output"
|
|
749
|
+
)
|
|
750
|
+
output_tokens_per_iteration: StatusDistributionSummary = Field(
|
|
751
|
+
description="Distribution of output tokens generated per streaming iteration"
|
|
752
|
+
)
|
|
753
|
+
iter_tokens_per_iteration: StatusDistributionSummary = Field(
|
|
754
|
+
description=(
|
|
755
|
+
"Distribution of output tokens (without first) generated per "
|
|
756
|
+
"streaming iteration"
|
|
757
|
+
)
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
# Domain specific stats
|
|
761
|
+
text: GenerativeTextMetricsSummary = Field(
|
|
762
|
+
description="Text-specific metrics for tokens, words, and characters"
|
|
763
|
+
)
|
|
764
|
+
image: GenerativeImageMetricsSummary = Field(
|
|
765
|
+
description="Image-specific metrics for tokens, images, pixels, and bytes"
|
|
766
|
+
)
|
|
767
|
+
video: GenerativeVideoMetricsSummary = Field(
|
|
768
|
+
description="Video-specific metrics for tokens, frames, duration, and bytes"
|
|
769
|
+
)
|
|
770
|
+
audio: GenerativeAudioMetricsSummary = Field(
|
|
771
|
+
description="Audio-specific metrics for tokens, samples, duration, and bytes"
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
@classmethod
|
|
775
|
+
def compile(cls, accumulator: GenerativeBenchmarkAccumulator) -> GenerativeMetrics:
|
|
776
|
+
"""
|
|
777
|
+
Compile comprehensive generative metrics from benchmark accumulator.
|
|
778
|
+
|
|
779
|
+
:param accumulator: Benchmark accumulator with completed request statistics
|
|
780
|
+
:return: Compiled generative metrics with all distributions and summaries
|
|
781
|
+
:raises ValueError: If measure_start and measure_end/request_end are not set
|
|
782
|
+
"""
|
|
783
|
+
start_time = accumulator.timings.finalized_measure_start
|
|
784
|
+
end_time = accumulator.timings.finalized_measure_end
|
|
785
|
+
|
|
786
|
+
if start_time == -1.0 or end_time == -1.0:
|
|
787
|
+
raise ValueError(
|
|
788
|
+
"Cannot compile GenerativeMetrics: "
|
|
789
|
+
"No measurement start or end times available."
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
successful = accumulator.completed.get_within_range(start_time, end_time)
|
|
793
|
+
incomplete = accumulator.incomplete.get_within_range(start_time, end_time)
|
|
794
|
+
errored = accumulator.errored.get_within_range(start_time, end_time)
|
|
795
|
+
|
|
796
|
+
return GenerativeMetrics(
|
|
797
|
+
# Request stats
|
|
798
|
+
request_totals=StatusBreakdown(
|
|
799
|
+
successful=len(successful),
|
|
800
|
+
incomplete=len(incomplete),
|
|
801
|
+
errored=len(errored),
|
|
802
|
+
total=(len(successful) + len(incomplete) + len(errored)),
|
|
803
|
+
),
|
|
804
|
+
requests_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
|
|
805
|
+
function=lambda req: req.request_end_time,
|
|
806
|
+
successful=successful,
|
|
807
|
+
incomplete=incomplete,
|
|
808
|
+
errored=errored,
|
|
809
|
+
start_time=start_time,
|
|
810
|
+
end_time=end_time,
|
|
811
|
+
),
|
|
812
|
+
request_concurrency=StatusDistributionSummary.concurrency_distribution_from_timings_function(
|
|
813
|
+
function=(
|
|
814
|
+
lambda req: (req.request_start_time, req.request_end_time)
|
|
815
|
+
if req.request_start_time is not None
|
|
816
|
+
and req.request_end_time is not None
|
|
817
|
+
else None
|
|
818
|
+
),
|
|
819
|
+
successful=successful,
|
|
820
|
+
incomplete=incomplete,
|
|
821
|
+
errored=errored,
|
|
822
|
+
start_time=start_time,
|
|
823
|
+
end_time=end_time,
|
|
824
|
+
),
|
|
825
|
+
request_latency=StatusDistributionSummary.from_values_function(
|
|
826
|
+
function=lambda req: req.request_latency or 0.0,
|
|
827
|
+
successful=successful,
|
|
828
|
+
incomplete=incomplete,
|
|
829
|
+
errored=errored,
|
|
830
|
+
),
|
|
831
|
+
request_streaming_iterations_count=StatusDistributionSummary.from_values_function(
|
|
832
|
+
function=lambda req: req.info.timings.request_iterations or 0.0,
|
|
833
|
+
successful=successful,
|
|
834
|
+
incomplete=incomplete,
|
|
835
|
+
errored=errored,
|
|
836
|
+
),
|
|
837
|
+
# General token stats
|
|
838
|
+
prompt_token_count=StatusDistributionSummary.from_values_function(
|
|
839
|
+
function=lambda req: req.prompt_tokens or 0.0,
|
|
840
|
+
successful=successful,
|
|
841
|
+
incomplete=incomplete,
|
|
842
|
+
errored=errored,
|
|
843
|
+
),
|
|
844
|
+
output_token_count=StatusDistributionSummary.from_values_function(
|
|
845
|
+
function=lambda req: req.output_tokens or 0.0,
|
|
846
|
+
successful=successful,
|
|
847
|
+
incomplete=incomplete,
|
|
848
|
+
errored=errored,
|
|
849
|
+
),
|
|
850
|
+
total_token_count=StatusDistributionSummary.from_values_function(
|
|
851
|
+
function=lambda req: req.total_tokens or 0.0,
|
|
852
|
+
successful=successful,
|
|
853
|
+
incomplete=incomplete,
|
|
854
|
+
errored=errored,
|
|
855
|
+
),
|
|
856
|
+
time_to_first_token_ms=StatusDistributionSummary.from_values_function(
|
|
857
|
+
function=lambda req: req.time_to_first_token_ms or 0.0,
|
|
858
|
+
successful=successful,
|
|
859
|
+
incomplete=incomplete,
|
|
860
|
+
errored=errored,
|
|
861
|
+
),
|
|
862
|
+
time_per_output_token_ms=StatusDistributionSummary.from_values_function(
|
|
863
|
+
function=lambda req: (
|
|
864
|
+
req.time_per_output_token_ms or 0.0,
|
|
865
|
+
req.output_tokens or 0.0,
|
|
866
|
+
),
|
|
867
|
+
successful=successful,
|
|
868
|
+
incomplete=incomplete,
|
|
869
|
+
errored=errored,
|
|
870
|
+
),
|
|
871
|
+
inter_token_latency_ms=StatusDistributionSummary.from_values_function(
|
|
872
|
+
function=lambda req: (
|
|
873
|
+
req.inter_token_latency_ms or 0.0,
|
|
874
|
+
(req.output_tokens or 1.0) - 1.0,
|
|
875
|
+
),
|
|
876
|
+
successful=successful,
|
|
877
|
+
incomplete=incomplete,
|
|
878
|
+
errored=errored,
|
|
879
|
+
),
|
|
880
|
+
prompt_tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
|
|
881
|
+
function=lambda req: req.prompt_tokens_timing,
|
|
882
|
+
successful=successful,
|
|
883
|
+
incomplete=incomplete,
|
|
884
|
+
errored=errored,
|
|
885
|
+
),
|
|
886
|
+
output_tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
|
|
887
|
+
function=lambda req: req.output_tokens_timings,
|
|
888
|
+
successful=successful,
|
|
889
|
+
incomplete=incomplete,
|
|
890
|
+
errored=errored,
|
|
891
|
+
),
|
|
892
|
+
tokens_per_second=StatusDistributionSummary.rate_distribution_from_timings_function(
|
|
893
|
+
function=lambda req: req.total_tokens_timings,
|
|
894
|
+
successful=successful,
|
|
895
|
+
incomplete=incomplete,
|
|
896
|
+
errored=errored,
|
|
897
|
+
),
|
|
898
|
+
output_tokens_per_iteration=StatusDistributionSummary.from_values_function(
|
|
899
|
+
function=lambda req: [
|
|
900
|
+
tokens for (_timing, tokens) in req.output_tokens_timings
|
|
901
|
+
],
|
|
902
|
+
successful=successful,
|
|
903
|
+
incomplete=incomplete,
|
|
904
|
+
errored=errored,
|
|
905
|
+
),
|
|
906
|
+
iter_tokens_per_iteration=StatusDistributionSummary.from_values_function(
|
|
907
|
+
function=lambda req: [
|
|
908
|
+
tokens for (_timing, tokens) in req.iter_tokens_timings
|
|
909
|
+
],
|
|
910
|
+
successful=successful,
|
|
911
|
+
incomplete=incomplete,
|
|
912
|
+
errored=errored,
|
|
913
|
+
),
|
|
914
|
+
# Domain-specific stats
|
|
915
|
+
text=GenerativeTextMetricsSummary.compile(
|
|
916
|
+
successful=successful, incomplete=incomplete, errored=errored
|
|
917
|
+
),
|
|
918
|
+
image=GenerativeImageMetricsSummary.compile(
|
|
919
|
+
successful=successful, incomplete=incomplete, errored=errored
|
|
920
|
+
),
|
|
921
|
+
video=GenerativeVideoMetricsSummary.compile(
|
|
922
|
+
successful=successful, incomplete=incomplete, errored=errored
|
|
923
|
+
),
|
|
924
|
+
audio=GenerativeAudioMetricsSummary.compile(
|
|
925
|
+
successful=successful, incomplete=incomplete, errored=errored
|
|
926
|
+
),
|
|
927
|
+
)
|