guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +452 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +150 -317
- guidellm/benchmark/entrypoints.py +467 -128
- guidellm/benchmark/output.py +519 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2086 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +144 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +348 -0
- guidellm/data/loaders.py +149 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +404 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +31 -0
- guidellm/data/processor.py +31 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +226 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +71 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
- guidellm-0.4.0a169.dist-info/RECORD +95 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
|
@@ -1,334 +1,167 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
Optional,
|
|
11
|
-
Union,
|
|
12
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
Benchmark execution orchestration and lifecycle management.
|
|
3
|
+
|
|
4
|
+
Provides the core benchmarking engine that coordinates request scheduling,
|
|
5
|
+
data aggregation, and result compilation across different execution strategies
|
|
6
|
+
and environments. The Benchmarker acts as the primary workflow coordinator,
|
|
7
|
+
managing the complete benchmark lifecycle from request submission through
|
|
8
|
+
result compilation while supporting thread-safe singleton operations.
|
|
9
|
+
"""
|
|
13
10
|
|
|
14
|
-
from
|
|
15
|
-
from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
|
|
11
|
+
from __future__ import annotations
|
|
16
12
|
|
|
17
|
-
|
|
18
|
-
from
|
|
19
|
-
|
|
13
|
+
import uuid
|
|
14
|
+
from abc import ABC
|
|
15
|
+
from collections.abc import AsyncIterator, Iterable
|
|
16
|
+
from typing import Generic
|
|
17
|
+
|
|
18
|
+
from guidellm.benchmark.profile import Profile
|
|
19
|
+
from guidellm.benchmark.progress import BenchmarkerProgress
|
|
20
|
+
from guidellm.benchmark.schemas import (
|
|
21
|
+
BenchmarkerArgs,
|
|
20
22
|
BenchmarkT,
|
|
21
|
-
|
|
23
|
+
EstimatedBenchmarkState,
|
|
22
24
|
)
|
|
23
|
-
from guidellm.
|
|
24
|
-
from guidellm.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
GenerationRequest,
|
|
28
|
-
GenerativeRequestLoaderDescription,
|
|
29
|
-
RequestLoaderDescription,
|
|
25
|
+
from guidellm.logger import logger
|
|
26
|
+
from guidellm.scheduler import (
|
|
27
|
+
BackendInterface,
|
|
28
|
+
Environment,
|
|
30
29
|
RequestT,
|
|
31
30
|
ResponseT,
|
|
32
|
-
)
|
|
33
|
-
from guidellm.scheduler import (
|
|
34
|
-
GenerativeRequestsWorker,
|
|
35
|
-
RequestsWorker,
|
|
36
31
|
Scheduler,
|
|
37
|
-
SchedulerRequestResult,
|
|
38
|
-
SchedulingStrategy,
|
|
39
32
|
)
|
|
33
|
+
from guidellm.utils import ThreadSafeSingletonMixin
|
|
40
34
|
|
|
41
|
-
__all__ = ["Benchmarker"
|
|
35
|
+
__all__ = ["Benchmarker"]
|
|
42
36
|
|
|
43
37
|
|
|
44
|
-
class
|
|
45
|
-
|
|
38
|
+
class Benchmarker(
|
|
39
|
+
Generic[BenchmarkT, RequestT, ResponseT],
|
|
40
|
+
ABC,
|
|
41
|
+
ThreadSafeSingletonMixin,
|
|
46
42
|
):
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"run_complete",
|
|
50
|
-
"scheduler_start",
|
|
51
|
-
"scheduler_update",
|
|
52
|
-
"scheduler_complete",
|
|
53
|
-
"benchmark_compiled",
|
|
54
|
-
]
|
|
55
|
-
start_time: float
|
|
56
|
-
end_number: int
|
|
57
|
-
profile: Profile
|
|
58
|
-
current_index: int
|
|
59
|
-
current_strategy: Optional[SchedulingStrategy] = None
|
|
60
|
-
current_aggregator: Optional[AggregatorT] = None
|
|
61
|
-
current_benchmark: Optional[BenchmarkT] = None
|
|
62
|
-
current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class BenchmarkerStrategyLimits(StandardBaseModel):
|
|
66
|
-
requests_loader_size: Optional[int] = Field(
|
|
67
|
-
description="Size of the request loader.",
|
|
68
|
-
)
|
|
69
|
-
max_number_per_strategy: Optional[int] = Field(
|
|
70
|
-
description="Maximum number of requests to process per strategy.",
|
|
71
|
-
ge=0,
|
|
72
|
-
)
|
|
73
|
-
max_duration_per_strategy: Optional[float] = Field(
|
|
74
|
-
description="Maximum duration (in seconds) to process requests per strategy.",
|
|
75
|
-
ge=0,
|
|
76
|
-
)
|
|
77
|
-
warmup_percent_per_strategy: Optional[float] = Field(
|
|
78
|
-
description="Percentage of requests to use for warmup.",
|
|
79
|
-
ge=0,
|
|
80
|
-
le=1,
|
|
81
|
-
)
|
|
82
|
-
cooldown_percent_per_strategy: Optional[float] = Field(
|
|
83
|
-
description="Percentage of requests to use for cooldown.",
|
|
84
|
-
ge=0,
|
|
85
|
-
le=1,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
@property
|
|
89
|
-
def max_number(self) -> Optional[int]:
|
|
90
|
-
if self.max_number_per_strategy is not None:
|
|
91
|
-
return self.max_number_per_strategy
|
|
92
|
-
|
|
93
|
-
if self.requests_loader_size is not None:
|
|
94
|
-
return self.requests_loader_size
|
|
95
|
-
|
|
96
|
-
return None
|
|
97
|
-
|
|
98
|
-
@property
|
|
99
|
-
def max_duration(self) -> Optional[float]:
|
|
100
|
-
return self.max_duration_per_strategy
|
|
101
|
-
|
|
102
|
-
@property
|
|
103
|
-
def warmup_number(self) -> Optional[int]:
|
|
104
|
-
if self.warmup_percent_per_strategy is None or self.max_number is None:
|
|
105
|
-
return None
|
|
106
|
-
|
|
107
|
-
return int(self.warmup_percent_per_strategy * self.max_number)
|
|
108
|
-
|
|
109
|
-
@property
|
|
110
|
-
def warmup_duration(self) -> Optional[float]:
|
|
111
|
-
if self.warmup_percent_per_strategy is None or self.max_duration is None:
|
|
112
|
-
return None
|
|
113
|
-
|
|
114
|
-
return self.warmup_percent_per_strategy * self.max_duration
|
|
115
|
-
|
|
116
|
-
@property
|
|
117
|
-
def cooldown_number(self) -> Optional[int]:
|
|
118
|
-
if self.cooldown_percent_per_strategy is None or self.max_number is None:
|
|
119
|
-
return None
|
|
120
|
-
|
|
121
|
-
return int(self.cooldown_percent_per_strategy * self.max_number)
|
|
43
|
+
"""
|
|
44
|
+
Abstract benchmark orchestrator for request processing workflows.
|
|
122
45
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
|
|
132
|
-
def __init__(
|
|
133
|
-
self,
|
|
134
|
-
worker: RequestsWorker[RequestT, ResponseT],
|
|
135
|
-
request_loader: Iterable[RequestT],
|
|
136
|
-
requests_loader_description: RequestLoaderDescription,
|
|
137
|
-
benchmark_save_extras: Optional[dict[str, Any]] = None,
|
|
138
|
-
):
|
|
139
|
-
self.worker = worker
|
|
140
|
-
self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
|
|
141
|
-
worker=worker, request_loader=request_loader
|
|
142
|
-
)
|
|
143
|
-
self.requests_loader_description = requests_loader_description
|
|
144
|
-
self.benchmark_save_extras = benchmark_save_extras
|
|
46
|
+
Coordinates execution of benchmarking runs across different scheduling
|
|
47
|
+
strategies, aggregating metrics and compiling results. Manages the complete
|
|
48
|
+
benchmark lifecycle from request submission through result compilation while
|
|
49
|
+
implementing thread-safe singleton pattern to ensure consistent state across
|
|
50
|
+
concurrent operations.
|
|
51
|
+
"""
|
|
145
52
|
|
|
146
53
|
async def run(
|
|
147
54
|
self,
|
|
55
|
+
benchmark_class: type[BenchmarkT],
|
|
56
|
+
requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
|
|
57
|
+
backend: BackendInterface[RequestT, ResponseT],
|
|
148
58
|
profile: Profile,
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
yield BenchmarkerResult(
|
|
260
|
-
type_="run_complete",
|
|
261
|
-
start_time=start_time,
|
|
262
|
-
end_number=end_number,
|
|
263
|
-
profile=profile,
|
|
264
|
-
current_index=current_index,
|
|
265
|
-
current_strategy=None,
|
|
266
|
-
current_aggregator=None,
|
|
267
|
-
current_benchmark=None,
|
|
268
|
-
current_result=None,
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
@abstractmethod
|
|
272
|
-
def create_benchmark_aggregator(
|
|
273
|
-
self,
|
|
274
|
-
run_id: str,
|
|
275
|
-
profile: Profile,
|
|
276
|
-
strategy_index: int,
|
|
277
|
-
strategy: SchedulingStrategy,
|
|
278
|
-
limits: BenchmarkerStrategyLimits,
|
|
279
|
-
) -> AggregatorT: ...
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
class GenerativeBenchmarker(
|
|
283
|
-
Benchmarker[
|
|
284
|
-
GenerativeBenchmarkAggregator,
|
|
285
|
-
GenerativeBenchmark,
|
|
286
|
-
GenerationRequest,
|
|
287
|
-
ResponseSummary,
|
|
288
|
-
],
|
|
289
|
-
):
|
|
290
|
-
def __init__(
|
|
291
|
-
self,
|
|
292
|
-
backend: Backend,
|
|
293
|
-
request_loader: Iterable[GenerationRequest],
|
|
294
|
-
request_loader_description: GenerativeRequestLoaderDescription,
|
|
295
|
-
benchmark_save_extras: Optional[dict[str, Any]] = None,
|
|
296
|
-
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
|
|
297
|
-
processor_args: Optional[dict[str, Any]] = None,
|
|
298
|
-
):
|
|
299
|
-
super().__init__(
|
|
300
|
-
worker=GenerativeRequestsWorker(backend),
|
|
301
|
-
request_loader=request_loader,
|
|
302
|
-
requests_loader_description=request_loader_description,
|
|
303
|
-
benchmark_save_extras=benchmark_save_extras,
|
|
304
|
-
)
|
|
305
|
-
self.processor = processor
|
|
306
|
-
self.processor_args = processor_args
|
|
307
|
-
|
|
308
|
-
def create_benchmark_aggregator(
|
|
309
|
-
self,
|
|
310
|
-
run_id: str,
|
|
311
|
-
profile: Profile,
|
|
312
|
-
strategy_index: int,
|
|
313
|
-
strategy: SchedulingStrategy,
|
|
314
|
-
limits: BenchmarkerStrategyLimits,
|
|
315
|
-
) -> GenerativeBenchmarkAggregator:
|
|
316
|
-
return GenerativeBenchmarkAggregator(
|
|
317
|
-
run_id=run_id,
|
|
318
|
-
args=BenchmarkArgs(
|
|
319
|
-
profile=profile,
|
|
320
|
-
strategy_index=strategy_index,
|
|
321
|
-
strategy=strategy,
|
|
322
|
-
max_number=limits.max_number,
|
|
323
|
-
max_duration=limits.max_duration,
|
|
324
|
-
warmup_number=limits.warmup_number,
|
|
325
|
-
warmup_duration=limits.warmup_duration,
|
|
326
|
-
cooldown_number=limits.cooldown_number,
|
|
327
|
-
cooldown_duration=limits.cooldown_duration,
|
|
328
|
-
),
|
|
329
|
-
worker_description=self.worker.description, # type: ignore[arg-type]
|
|
330
|
-
request_loader_description=self.requests_loader_description, # type: ignore[arg-type]
|
|
331
|
-
extras=self.benchmark_save_extras or {},
|
|
332
|
-
processor=self.processor,
|
|
333
|
-
processor_args=self.processor_args,
|
|
334
|
-
)
|
|
59
|
+
environment: Environment,
|
|
60
|
+
data: list[Any],
|
|
61
|
+
progress: BenchmarkerProgress[BenchmarkT] | None = None,
|
|
62
|
+
sample_requests: int | None = 20,
|
|
63
|
+
warmup: float | None = None,
|
|
64
|
+
cooldown: float | None = None,
|
|
65
|
+
prefer_response_metrics: bool = True,
|
|
66
|
+
) -> AsyncIterator[BenchmarkT]:
|
|
67
|
+
"""
|
|
68
|
+
Execute benchmark runs across multiple scheduling strategies.
|
|
69
|
+
|
|
70
|
+
Orchestrates the complete benchmark workflow by iterating through scheduling
|
|
71
|
+
strategies from the profile, executing requests through the scheduler,
|
|
72
|
+
aggregating metrics, and compiling final benchmark results.
|
|
73
|
+
|
|
74
|
+
:param benchmark_class: Class for constructing final benchmark objects
|
|
75
|
+
:param requests: Request datasets for processing across strategies
|
|
76
|
+
:param backend: Backend interface for request processing
|
|
77
|
+
:param profile: Benchmark profile defining strategies and constraints
|
|
78
|
+
:param environment: Execution environment for coordination
|
|
79
|
+
:param progress: Optional progress tracker for benchmark lifecycle events
|
|
80
|
+
:param sample_requests: Number of sample requests to use for estimation
|
|
81
|
+
:param warmup: Optional warmup duration in seconds before benchmarking
|
|
82
|
+
:param cooldown: Optional cooldown duration in seconds after benchmarking
|
|
83
|
+
:param prefer_response_metrics: Whether to prefer response-based metrics over
|
|
84
|
+
request-based metrics
|
|
85
|
+
:yield: Compiled benchmark results for each strategy execution
|
|
86
|
+
:raises Exception: If benchmark execution or compilation fails
|
|
87
|
+
"""
|
|
88
|
+
with self.thread_lock:
|
|
89
|
+
if progress:
|
|
90
|
+
await progress.on_initialize(profile)
|
|
91
|
+
|
|
92
|
+
run_id = str(uuid.uuid4())
|
|
93
|
+
strategies_generator = profile.strategies_generator()
|
|
94
|
+
strategy, constraints = next(strategies_generator)
|
|
95
|
+
|
|
96
|
+
while strategy is not None:
|
|
97
|
+
if progress:
|
|
98
|
+
await progress.on_benchmark_start(strategy)
|
|
99
|
+
|
|
100
|
+
args = BenchmarkerArgs(
|
|
101
|
+
run_id=run_id,
|
|
102
|
+
run_index=len(profile.completed_strategies),
|
|
103
|
+
sample_requests=sample_requests,
|
|
104
|
+
warmup=warmup,
|
|
105
|
+
cooldown=cooldown,
|
|
106
|
+
prefer_response_metrics=prefer_response_metrics,
|
|
107
|
+
)
|
|
108
|
+
estimated_state = EstimatedBenchmarkState()
|
|
109
|
+
scheduler_state = None
|
|
110
|
+
scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
|
|
111
|
+
|
|
112
|
+
async for (
|
|
113
|
+
response,
|
|
114
|
+
request,
|
|
115
|
+
request_info,
|
|
116
|
+
scheduler_state,
|
|
117
|
+
) in scheduler.run(
|
|
118
|
+
requests=requests,
|
|
119
|
+
backend=backend,
|
|
120
|
+
strategy=strategy,
|
|
121
|
+
startup_duration=warmup if warmup and warmup >= 1 else 0.0,
|
|
122
|
+
env=environment,
|
|
123
|
+
**constraints or {},
|
|
124
|
+
):
|
|
125
|
+
try:
|
|
126
|
+
benchmark_class.update_estimate(
|
|
127
|
+
args,
|
|
128
|
+
estimated_state,
|
|
129
|
+
response,
|
|
130
|
+
request,
|
|
131
|
+
request_info,
|
|
132
|
+
scheduler_state,
|
|
133
|
+
)
|
|
134
|
+
if progress:
|
|
135
|
+
await progress.on_benchmark_update(
|
|
136
|
+
estimated_state, scheduler_state
|
|
137
|
+
)
|
|
138
|
+
except Exception as err: # noqa: BLE001
|
|
139
|
+
logger.error(
|
|
140
|
+
f"Error updating benchmark estimate/progress: {err}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
benchmark = benchmark_class.compile(
|
|
144
|
+
args=args,
|
|
145
|
+
estimated_state=estimated_state,
|
|
146
|
+
scheduler_state=scheduler_state,
|
|
147
|
+
profile=profile,
|
|
148
|
+
requests=requests,
|
|
149
|
+
backend=backend,
|
|
150
|
+
environment=environment,
|
|
151
|
+
strategy=strategy,
|
|
152
|
+
constraints=constraints,
|
|
153
|
+
data=data,
|
|
154
|
+
)
|
|
155
|
+
if progress:
|
|
156
|
+
await progress.on_benchmark_complete(benchmark)
|
|
157
|
+
|
|
158
|
+
yield benchmark
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
strategy, constraints = strategies_generator.send(benchmark)
|
|
162
|
+
except StopIteration:
|
|
163
|
+
strategy = None
|
|
164
|
+
constraints = None
|
|
165
|
+
|
|
166
|
+
if progress:
|
|
167
|
+
await progress.on_finalize()
|