guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -1,334 +1,178 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
Optional,
|
|
11
|
-
Union,
|
|
12
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
Benchmark execution orchestration and lifecycle management.
|
|
3
|
+
|
|
4
|
+
Provides the core benchmarking engine that coordinates request scheduling,
|
|
5
|
+
data aggregation, and result compilation across execution strategies and
|
|
6
|
+
environments. The Benchmarker manages the complete benchmark lifecycle from
|
|
7
|
+
request submission through result compilation while implementing thread-safe
|
|
8
|
+
singleton operations for consistent state management across concurrent workflows.
|
|
9
|
+
"""
|
|
13
10
|
|
|
14
|
-
from
|
|
15
|
-
from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
|
|
11
|
+
from __future__ import annotations
|
|
16
12
|
|
|
17
|
-
|
|
18
|
-
from
|
|
19
|
-
|
|
13
|
+
import uuid
|
|
14
|
+
from abc import ABC
|
|
15
|
+
from collections.abc import AsyncIterator, Iterable
|
|
16
|
+
from typing import Generic
|
|
17
|
+
|
|
18
|
+
from guidellm.benchmark.profiles import Profile
|
|
19
|
+
from guidellm.benchmark.progress import BenchmarkerProgress
|
|
20
|
+
from guidellm.benchmark.schemas import (
|
|
21
|
+
BenchmarkAccumulatorT,
|
|
22
|
+
BenchmarkConfig,
|
|
20
23
|
BenchmarkT,
|
|
21
|
-
GenerativeBenchmarkAggregator,
|
|
22
24
|
)
|
|
23
|
-
from guidellm.benchmark.
|
|
24
|
-
from guidellm.
|
|
25
|
-
from guidellm.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
from guidellm.benchmark.schemas.base import TransientPhaseConfig
|
|
26
|
+
from guidellm.logger import logger
|
|
27
|
+
from guidellm.scheduler import (
|
|
28
|
+
BackendInterface,
|
|
29
|
+
Constraint,
|
|
30
|
+
Environment,
|
|
31
|
+
MultiTurnRequestT,
|
|
30
32
|
RequestT,
|
|
31
33
|
ResponseT,
|
|
32
|
-
)
|
|
33
|
-
from guidellm.scheduler import (
|
|
34
|
-
GenerativeRequestsWorker,
|
|
35
|
-
RequestsWorker,
|
|
36
34
|
Scheduler,
|
|
37
|
-
SchedulerRequestResult,
|
|
38
35
|
SchedulingStrategy,
|
|
39
36
|
)
|
|
37
|
+
from guidellm.utils import ThreadSafeSingletonMixin
|
|
38
|
+
from guidellm.utils.mixins import InfoMixin
|
|
40
39
|
|
|
41
|
-
__all__ = ["Benchmarker"
|
|
40
|
+
__all__ = ["Benchmarker"]
|
|
42
41
|
|
|
43
42
|
|
|
44
|
-
class
|
|
45
|
-
|
|
43
|
+
class Benchmarker(
|
|
44
|
+
Generic[BenchmarkT, RequestT, ResponseT],
|
|
45
|
+
ABC,
|
|
46
|
+
ThreadSafeSingletonMixin,
|
|
46
47
|
):
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"run_complete",
|
|
50
|
-
"scheduler_start",
|
|
51
|
-
"scheduler_update",
|
|
52
|
-
"scheduler_complete",
|
|
53
|
-
"benchmark_compiled",
|
|
54
|
-
]
|
|
55
|
-
start_time: float
|
|
56
|
-
end_number: int
|
|
57
|
-
profile: Profile
|
|
58
|
-
current_index: int
|
|
59
|
-
current_strategy: Optional[SchedulingStrategy] = None
|
|
60
|
-
current_aggregator: Optional[AggregatorT] = None
|
|
61
|
-
current_benchmark: Optional[BenchmarkT] = None
|
|
62
|
-
current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class BenchmarkerStrategyLimits(StandardBaseModel):
|
|
66
|
-
requests_loader_size: Optional[int] = Field(
|
|
67
|
-
description="Size of the request loader.",
|
|
68
|
-
)
|
|
69
|
-
max_number_per_strategy: Optional[int] = Field(
|
|
70
|
-
description="Maximum number of requests to process per strategy.",
|
|
71
|
-
ge=0,
|
|
72
|
-
)
|
|
73
|
-
max_duration_per_strategy: Optional[float] = Field(
|
|
74
|
-
description="Maximum duration (in seconds) to process requests per strategy.",
|
|
75
|
-
ge=0,
|
|
76
|
-
)
|
|
77
|
-
warmup_percent_per_strategy: Optional[float] = Field(
|
|
78
|
-
description="Percentage of requests to use for warmup.",
|
|
79
|
-
ge=0,
|
|
80
|
-
le=1,
|
|
81
|
-
)
|
|
82
|
-
cooldown_percent_per_strategy: Optional[float] = Field(
|
|
83
|
-
description="Percentage of requests to use for cooldown.",
|
|
84
|
-
ge=0,
|
|
85
|
-
le=1,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
@property
|
|
89
|
-
def max_number(self) -> Optional[int]:
|
|
90
|
-
if self.max_number_per_strategy is not None:
|
|
91
|
-
return self.max_number_per_strategy
|
|
92
|
-
|
|
93
|
-
if self.requests_loader_size is not None:
|
|
94
|
-
return self.requests_loader_size
|
|
95
|
-
|
|
96
|
-
return None
|
|
97
|
-
|
|
98
|
-
@property
|
|
99
|
-
def max_duration(self) -> Optional[float]:
|
|
100
|
-
return self.max_duration_per_strategy
|
|
101
|
-
|
|
102
|
-
@property
|
|
103
|
-
def warmup_number(self) -> Optional[int]:
|
|
104
|
-
if self.warmup_percent_per_strategy is None or self.max_number is None:
|
|
105
|
-
return None
|
|
106
|
-
|
|
107
|
-
return int(self.warmup_percent_per_strategy * self.max_number)
|
|
108
|
-
|
|
109
|
-
@property
|
|
110
|
-
def warmup_duration(self) -> Optional[float]:
|
|
111
|
-
if self.warmup_percent_per_strategy is None or self.max_duration is None:
|
|
112
|
-
return None
|
|
113
|
-
|
|
114
|
-
return self.warmup_percent_per_strategy * self.max_duration
|
|
115
|
-
|
|
116
|
-
@property
|
|
117
|
-
def cooldown_number(self) -> Optional[int]:
|
|
118
|
-
if self.cooldown_percent_per_strategy is None or self.max_number is None:
|
|
119
|
-
return None
|
|
120
|
-
|
|
121
|
-
return int(self.cooldown_percent_per_strategy * self.max_number)
|
|
122
|
-
|
|
123
|
-
@property
|
|
124
|
-
def cooldown_duration(self) -> Optional[float]:
|
|
125
|
-
if self.cooldown_percent_per_strategy is None or self.max_duration is None:
|
|
126
|
-
return None
|
|
127
|
-
|
|
128
|
-
return self.cooldown_percent_per_strategy * self.max_duration
|
|
48
|
+
"""
|
|
49
|
+
Orchestrates benchmark execution across scheduling strategies.
|
|
129
50
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
request_loader: Iterable[RequestT],
|
|
136
|
-
requests_loader_description: RequestLoaderDescription,
|
|
137
|
-
benchmark_save_extras: Optional[dict[str, Any]] = None,
|
|
138
|
-
):
|
|
139
|
-
self.worker = worker
|
|
140
|
-
self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
|
|
141
|
-
worker=worker, request_loader=request_loader
|
|
142
|
-
)
|
|
143
|
-
self.requests_loader_description = requests_loader_description
|
|
144
|
-
self.benchmark_save_extras = benchmark_save_extras
|
|
51
|
+
Coordinates benchmarking runs by managing request scheduling, metric aggregation,
|
|
52
|
+
and result compilation. Implements a thread-safe singleton pattern to ensure
|
|
53
|
+
consistent state management across concurrent operations while supporting multiple
|
|
54
|
+
scheduling strategies and execution environments.
|
|
55
|
+
"""
|
|
145
56
|
|
|
146
57
|
async def run(
|
|
147
58
|
self,
|
|
59
|
+
accumulator_class: type[BenchmarkAccumulatorT],
|
|
60
|
+
benchmark_class: type[BenchmarkT],
|
|
61
|
+
requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
62
|
+
backend: BackendInterface[RequestT, ResponseT],
|
|
148
63
|
profile: Profile,
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
current_index=current_index,
|
|
265
|
-
current_strategy=None,
|
|
266
|
-
current_aggregator=None,
|
|
267
|
-
current_benchmark=None,
|
|
268
|
-
current_result=None,
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
@abstractmethod
|
|
272
|
-
def create_benchmark_aggregator(
|
|
273
|
-
self,
|
|
274
|
-
run_id: str,
|
|
275
|
-
profile: Profile,
|
|
276
|
-
strategy_index: int,
|
|
277
|
-
strategy: SchedulingStrategy,
|
|
278
|
-
limits: BenchmarkerStrategyLimits,
|
|
279
|
-
) -> AggregatorT: ...
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
class GenerativeBenchmarker(
|
|
283
|
-
Benchmarker[
|
|
284
|
-
GenerativeBenchmarkAggregator,
|
|
285
|
-
GenerativeBenchmark,
|
|
286
|
-
GenerationRequest,
|
|
287
|
-
ResponseSummary,
|
|
288
|
-
],
|
|
289
|
-
):
|
|
290
|
-
def __init__(
|
|
291
|
-
self,
|
|
292
|
-
backend: Backend,
|
|
293
|
-
request_loader: Iterable[GenerationRequest],
|
|
294
|
-
request_loader_description: GenerativeRequestLoaderDescription,
|
|
295
|
-
benchmark_save_extras: Optional[dict[str, Any]] = None,
|
|
296
|
-
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
|
|
297
|
-
processor_args: Optional[dict[str, Any]] = None,
|
|
298
|
-
):
|
|
299
|
-
super().__init__(
|
|
300
|
-
worker=GenerativeRequestsWorker(backend),
|
|
301
|
-
request_loader=request_loader,
|
|
302
|
-
requests_loader_description=request_loader_description,
|
|
303
|
-
benchmark_save_extras=benchmark_save_extras,
|
|
304
|
-
)
|
|
305
|
-
self.processor = processor
|
|
306
|
-
self.processor_args = processor_args
|
|
307
|
-
|
|
308
|
-
def create_benchmark_aggregator(
|
|
309
|
-
self,
|
|
310
|
-
run_id: str,
|
|
311
|
-
profile: Profile,
|
|
312
|
-
strategy_index: int,
|
|
313
|
-
strategy: SchedulingStrategy,
|
|
314
|
-
limits: BenchmarkerStrategyLimits,
|
|
315
|
-
) -> GenerativeBenchmarkAggregator:
|
|
316
|
-
return GenerativeBenchmarkAggregator(
|
|
317
|
-
run_id=run_id,
|
|
318
|
-
args=BenchmarkArgs(
|
|
319
|
-
profile=profile,
|
|
320
|
-
strategy_index=strategy_index,
|
|
321
|
-
strategy=strategy,
|
|
322
|
-
max_number=limits.max_number,
|
|
323
|
-
max_duration=limits.max_duration,
|
|
324
|
-
warmup_number=limits.warmup_number,
|
|
325
|
-
warmup_duration=limits.warmup_duration,
|
|
326
|
-
cooldown_number=limits.cooldown_number,
|
|
327
|
-
cooldown_duration=limits.cooldown_duration,
|
|
328
|
-
),
|
|
329
|
-
worker_description=self.worker.description, # type: ignore[arg-type]
|
|
330
|
-
request_loader_description=self.requests_loader_description, # type: ignore[arg-type]
|
|
331
|
-
extras=self.benchmark_save_extras or {},
|
|
332
|
-
processor=self.processor,
|
|
333
|
-
processor_args=self.processor_args,
|
|
334
|
-
)
|
|
64
|
+
environment: Environment,
|
|
65
|
+
warmup: TransientPhaseConfig,
|
|
66
|
+
cooldown: TransientPhaseConfig,
|
|
67
|
+
sample_requests: int | None = 20,
|
|
68
|
+
prefer_response_metrics: bool = True,
|
|
69
|
+
progress: (
|
|
70
|
+
BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
|
|
71
|
+
) = None,
|
|
72
|
+
) -> AsyncIterator[BenchmarkT]:
|
|
73
|
+
"""
|
|
74
|
+
Execute benchmark runs across scheduling strategies in the profile.
|
|
75
|
+
|
|
76
|
+
:param accumulator_class: Class for accumulating metrics during execution
|
|
77
|
+
:param benchmark_class: Class for constructing final benchmark results
|
|
78
|
+
:param requests: Request datasets to process across strategies
|
|
79
|
+
:param backend: Backend interface for executing requests
|
|
80
|
+
:param profile: Profile defining scheduling strategies and constraints
|
|
81
|
+
:param environment: Environment for execution coordination
|
|
82
|
+
:param warmup: Warmup phase configuration before benchmarking
|
|
83
|
+
:param cooldown: Cooldown phase configuration after benchmarking
|
|
84
|
+
:param sample_requests: Number of requests to sample for estimation,
|
|
85
|
+
defaults to 20
|
|
86
|
+
:param prefer_response_metrics: Whether to prefer response metrics over
|
|
87
|
+
request metrics, defaults to True
|
|
88
|
+
:param progress: Optional tracker for benchmark lifecycle events
|
|
89
|
+
:yield: Compiled benchmark result for each strategy execution
|
|
90
|
+
:raises Exception: If benchmark execution or compilation fails
|
|
91
|
+
"""
|
|
92
|
+
with self.thread_lock:
|
|
93
|
+
if progress:
|
|
94
|
+
await progress.on_initialize(profile)
|
|
95
|
+
|
|
96
|
+
run_id = str(uuid.uuid4())
|
|
97
|
+
strategies_generator = profile.strategies_generator()
|
|
98
|
+
strategy: SchedulingStrategy | None
|
|
99
|
+
constraints: dict[str, Constraint] | None
|
|
100
|
+
strategy, constraints = next(strategies_generator)
|
|
101
|
+
|
|
102
|
+
while strategy is not None:
|
|
103
|
+
if progress:
|
|
104
|
+
await progress.on_benchmark_start(strategy)
|
|
105
|
+
|
|
106
|
+
config = BenchmarkConfig(
|
|
107
|
+
run_id=run_id,
|
|
108
|
+
run_index=len(profile.completed_strategies),
|
|
109
|
+
strategy=strategy,
|
|
110
|
+
constraints=(
|
|
111
|
+
{
|
|
112
|
+
key: InfoMixin.extract_from_obj(val)
|
|
113
|
+
for key, val in constraints.items()
|
|
114
|
+
}
|
|
115
|
+
if isinstance(constraints, dict)
|
|
116
|
+
else {"constraint": InfoMixin.extract_from_obj(constraints)}
|
|
117
|
+
if constraints
|
|
118
|
+
else {}
|
|
119
|
+
),
|
|
120
|
+
sample_requests=sample_requests,
|
|
121
|
+
warmup=warmup,
|
|
122
|
+
cooldown=cooldown,
|
|
123
|
+
prefer_response_metrics=prefer_response_metrics,
|
|
124
|
+
profile=profile,
|
|
125
|
+
requests=InfoMixin.extract_from_obj(requests),
|
|
126
|
+
backend=InfoMixin.extract_from_obj(backend),
|
|
127
|
+
environment=InfoMixin.extract_from_obj(environment),
|
|
128
|
+
)
|
|
129
|
+
accumulator = accumulator_class(config=config)
|
|
130
|
+
scheduler_state = None
|
|
131
|
+
scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
|
|
132
|
+
|
|
133
|
+
async for (
|
|
134
|
+
response,
|
|
135
|
+
request,
|
|
136
|
+
request_info,
|
|
137
|
+
scheduler_state,
|
|
138
|
+
) in scheduler.run(
|
|
139
|
+
requests=requests,
|
|
140
|
+
backend=backend,
|
|
141
|
+
strategy=strategy,
|
|
142
|
+
env=environment,
|
|
143
|
+
**constraints or {},
|
|
144
|
+
):
|
|
145
|
+
try:
|
|
146
|
+
accumulator.update_estimate(
|
|
147
|
+
response,
|
|
148
|
+
request,
|
|
149
|
+
request_info,
|
|
150
|
+
scheduler_state,
|
|
151
|
+
)
|
|
152
|
+
if progress:
|
|
153
|
+
await progress.on_benchmark_update(
|
|
154
|
+
accumulator, scheduler_state
|
|
155
|
+
)
|
|
156
|
+
except Exception as err: # noqa: BLE001
|
|
157
|
+
logger.error(
|
|
158
|
+
f"Error updating benchmark estimate/progress: {err}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
benchmark = benchmark_class.compile(
|
|
162
|
+
accumulator=accumulator,
|
|
163
|
+
scheduler_state=scheduler_state,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if progress:
|
|
167
|
+
await progress.on_benchmark_complete(benchmark)
|
|
168
|
+
|
|
169
|
+
yield benchmark
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
strategy, constraints = strategies_generator.send(benchmark)
|
|
173
|
+
except StopIteration:
|
|
174
|
+
strategy = None
|
|
175
|
+
constraints = None
|
|
176
|
+
|
|
177
|
+
if progress:
|
|
178
|
+
await progress.on_finalize()
|