guidellm 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +38 -6
- guidellm/__main__.py +294 -0
- guidellm/backend/__init__.py +19 -6
- guidellm/backend/backend.py +238 -0
- guidellm/backend/openai.py +532 -122
- guidellm/backend/response.py +132 -0
- guidellm/benchmark/__init__.py +73 -0
- guidellm/benchmark/aggregator.py +760 -0
- guidellm/benchmark/benchmark.py +838 -0
- guidellm/benchmark/benchmarker.py +334 -0
- guidellm/benchmark/entrypoints.py +141 -0
- guidellm/benchmark/output.py +946 -0
- guidellm/benchmark/profile.py +409 -0
- guidellm/benchmark/progress.py +720 -0
- guidellm/config.py +34 -56
- guidellm/data/__init__.py +4 -0
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +22 -0
- guidellm/dataset/creator.py +213 -0
- guidellm/dataset/entrypoints.py +42 -0
- guidellm/dataset/file.py +90 -0
- guidellm/dataset/hf_datasets.py +62 -0
- guidellm/dataset/in_memory.py +132 -0
- guidellm/dataset/synthetic.py +262 -0
- guidellm/objects/__init__.py +18 -0
- guidellm/objects/pydantic.py +60 -0
- guidellm/objects/statistics.py +947 -0
- guidellm/request/__init__.py +12 -10
- guidellm/request/loader.py +281 -0
- guidellm/request/request.py +79 -0
- guidellm/scheduler/__init__.py +51 -3
- guidellm/scheduler/result.py +137 -0
- guidellm/scheduler/scheduler.py +382 -0
- guidellm/scheduler/strategy.py +493 -0
- guidellm/scheduler/types.py +7 -0
- guidellm/scheduler/worker.py +511 -0
- guidellm/utils/__init__.py +16 -29
- guidellm/utils/colors.py +8 -0
- guidellm/utils/hf_transformers.py +35 -0
- guidellm/utils/random.py +43 -0
- guidellm/utils/text.py +118 -357
- {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/METADATA +96 -79
- guidellm-0.2.0.dist-info/RECORD +48 -0
- {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/WHEEL +1 -1
- guidellm-0.2.0.dist-info/entry_points.txt +2 -0
- guidellm/backend/base.py +0 -320
- guidellm/core/__init__.py +0 -24
- guidellm/core/distribution.py +0 -190
- guidellm/core/report.py +0 -321
- guidellm/core/request.py +0 -44
- guidellm/core/result.py +0 -545
- guidellm/core/serializable.py +0 -169
- guidellm/executor/__init__.py +0 -10
- guidellm/executor/base.py +0 -213
- guidellm/executor/profile_generator.py +0 -343
- guidellm/main.py +0 -336
- guidellm/request/base.py +0 -194
- guidellm/request/emulated.py +0 -391
- guidellm/request/file.py +0 -76
- guidellm/request/transformers.py +0 -100
- guidellm/scheduler/base.py +0 -374
- guidellm/scheduler/load_generator.py +0 -196
- guidellm/utils/injector.py +0 -70
- guidellm/utils/progress.py +0 -196
- guidellm/utils/transformers.py +0 -151
- guidellm-0.1.0.dist-info/RECORD +0 -35
- guidellm-0.1.0.dist-info/entry_points.txt +0 -3
- {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info/licenses}/LICENSE +0 -0
- {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import time
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Generic, Optional, TypeVar, Union
|
|
6
|
+
|
|
7
|
+
from rich.console import Group
|
|
8
|
+
from rich.live import Live
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.progress import (
|
|
11
|
+
BarColumn,
|
|
12
|
+
Progress,
|
|
13
|
+
ProgressColumn,
|
|
14
|
+
SpinnerColumn,
|
|
15
|
+
TaskID,
|
|
16
|
+
TaskProgressColumn,
|
|
17
|
+
TextColumn,
|
|
18
|
+
TimeElapsedColumn,
|
|
19
|
+
TimeRemainingColumn,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from guidellm.benchmark.aggregator import (
|
|
23
|
+
BenchmarkAggregator,
|
|
24
|
+
GenerativeBenchmarkAggregator,
|
|
25
|
+
)
|
|
26
|
+
from guidellm.benchmark.benchmark import Benchmark, GenerativeBenchmark
|
|
27
|
+
from guidellm.benchmark.benchmarker import BenchmarkerResult
|
|
28
|
+
from guidellm.scheduler import (
|
|
29
|
+
SchedulingStrategy,
|
|
30
|
+
StrategyType,
|
|
31
|
+
strategy_display_str,
|
|
32
|
+
)
|
|
33
|
+
from guidellm.utils import Colors
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"BenchmarkerTaskProgressState",
|
|
37
|
+
"BenchmarkerProgressDisplay",
|
|
38
|
+
"GenerativeTextBenchmarkerTaskProgressState",
|
|
39
|
+
"GenerativeTextBenchmarkerProgressDisplay",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class BenchmarkerTaskProgressState:
|
|
45
|
+
display_scheduler_stats: bool
|
|
46
|
+
|
|
47
|
+
task_id: TaskID
|
|
48
|
+
strategy: Union[StrategyType, SchedulingStrategy]
|
|
49
|
+
started: bool = False
|
|
50
|
+
compiling: bool = False
|
|
51
|
+
ended: bool = False
|
|
52
|
+
|
|
53
|
+
start_time: Optional[float] = None
|
|
54
|
+
max_number: Optional[float] = None
|
|
55
|
+
max_duration: Optional[float] = None
|
|
56
|
+
in_warmup: bool = False
|
|
57
|
+
in_cooldown: bool = False
|
|
58
|
+
|
|
59
|
+
requests_rate: float = 0
|
|
60
|
+
request_latency: float = 0
|
|
61
|
+
requests_processing: float = 0
|
|
62
|
+
requests_successful: float = 0
|
|
63
|
+
requests_incomplete: float = 0
|
|
64
|
+
requests_errored: float = 0
|
|
65
|
+
|
|
66
|
+
worker_overheads_time_ms: float = 0.0
|
|
67
|
+
backend_overheads_time_ms: float = 0.0
|
|
68
|
+
requests_sleep_time_ms: float = 0.0
|
|
69
|
+
requests_targeted_start_time_delay_ms: float = 0.0
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def description(self) -> str:
|
|
73
|
+
return strategy_display_str(self.strategy)
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def total(self) -> Optional[float]:
|
|
77
|
+
if self.max_number is None and self.max_duration is None:
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
return 1000
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def completed(self) -> int:
|
|
84
|
+
if self.ended:
|
|
85
|
+
return 1000
|
|
86
|
+
|
|
87
|
+
if self.max_number is None and self.max_duration is None:
|
|
88
|
+
return 0
|
|
89
|
+
|
|
90
|
+
number = self.requests_successful + self.requests_errored
|
|
91
|
+
number_percent = (
|
|
92
|
+
number / float(self.max_number) * 1000 if self.max_number else -math.inf
|
|
93
|
+
)
|
|
94
|
+
duration_percent = (
|
|
95
|
+
(time.time() - self.start_time) / self.max_duration * 1000
|
|
96
|
+
if self.max_duration and self.start_time
|
|
97
|
+
else -math.inf
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return min(int(max(number_percent, duration_percent)), 1000)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def fields(self) -> dict[str, str]:
|
|
104
|
+
fields = {
|
|
105
|
+
"start_time": self.formatted_start_time,
|
|
106
|
+
"progress_status": self.formatted_progress_status,
|
|
107
|
+
"requests_summary": self.formatted_requests_summary,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if self.display_scheduler_stats:
|
|
111
|
+
fields["scheduler_stats"] = self.formatted_scheduler_stats
|
|
112
|
+
|
|
113
|
+
return fields
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def formatted_start_time(self) -> str:
|
|
117
|
+
if self.start_time is None:
|
|
118
|
+
return "--:--:--"
|
|
119
|
+
|
|
120
|
+
return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def formatted_progress_status(self) -> str:
|
|
124
|
+
if self.ended:
|
|
125
|
+
status = "complete"
|
|
126
|
+
color = Colors.SUCCESS
|
|
127
|
+
elif self.compiling:
|
|
128
|
+
status = "compiling"
|
|
129
|
+
color = Colors.PROGRESS
|
|
130
|
+
elif self.started and self.in_warmup:
|
|
131
|
+
status = "warmup"
|
|
132
|
+
color = Colors.PROGRESS
|
|
133
|
+
elif self.started and self.in_cooldown:
|
|
134
|
+
status = "cooldown"
|
|
135
|
+
color = Colors.PROGRESS
|
|
136
|
+
elif self.started:
|
|
137
|
+
status = "running"
|
|
138
|
+
color = Colors.PROGRESS
|
|
139
|
+
else:
|
|
140
|
+
status = "pending"
|
|
141
|
+
color = Colors.INFO
|
|
142
|
+
|
|
143
|
+
return f"[{color}]{status.ljust(8)}[/{color}]"
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def formatted_requests_summary(self) -> str:
|
|
147
|
+
if not self.started:
|
|
148
|
+
return " "
|
|
149
|
+
|
|
150
|
+
return (
|
|
151
|
+
f"[{Colors.INFO}]Req:[/{Colors.INFO}] "
|
|
152
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
153
|
+
value=self.requests_rate,
|
|
154
|
+
label="req/s",
|
|
155
|
+
total_characters=12,
|
|
156
|
+
digits_places=4,
|
|
157
|
+
decimal_places=1,
|
|
158
|
+
)
|
|
159
|
+
+ ", "
|
|
160
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
161
|
+
value=self.request_latency,
|
|
162
|
+
label="Lat",
|
|
163
|
+
units="s",
|
|
164
|
+
total_characters=12,
|
|
165
|
+
digits_places=4,
|
|
166
|
+
decimal_places=2,
|
|
167
|
+
)
|
|
168
|
+
+ ", "
|
|
169
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
170
|
+
value=self.requests_processing,
|
|
171
|
+
label="Conc",
|
|
172
|
+
total_characters=12,
|
|
173
|
+
digits_places=4,
|
|
174
|
+
decimal_places=1,
|
|
175
|
+
)
|
|
176
|
+
+ ", "
|
|
177
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
178
|
+
value=self.requests_successful,
|
|
179
|
+
label="Comp",
|
|
180
|
+
total_characters=12,
|
|
181
|
+
digits_places=5,
|
|
182
|
+
decimal_places=0,
|
|
183
|
+
)
|
|
184
|
+
+ ", "
|
|
185
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
186
|
+
value=self.requests_incomplete,
|
|
187
|
+
label="Inc",
|
|
188
|
+
total_characters=12,
|
|
189
|
+
digits_places=5,
|
|
190
|
+
decimal_places=0,
|
|
191
|
+
)
|
|
192
|
+
+ ", "
|
|
193
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
194
|
+
value=self.requests_errored,
|
|
195
|
+
label="Err",
|
|
196
|
+
total_characters=12,
|
|
197
|
+
digits_places=5,
|
|
198
|
+
decimal_places=0,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def formatted_scheduler_stats(self) -> str:
|
|
204
|
+
if not self.started:
|
|
205
|
+
return " "
|
|
206
|
+
|
|
207
|
+
return (
|
|
208
|
+
f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
|
|
209
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
210
|
+
value=self.worker_overheads_time_ms,
|
|
211
|
+
label="Work OH",
|
|
212
|
+
units="ms",
|
|
213
|
+
total_characters=18,
|
|
214
|
+
digits_places=3,
|
|
215
|
+
decimal_places=1,
|
|
216
|
+
)
|
|
217
|
+
+ ", "
|
|
218
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
219
|
+
value=self.backend_overheads_time_ms,
|
|
220
|
+
label="Back OH",
|
|
221
|
+
units="ms",
|
|
222
|
+
total_characters=18,
|
|
223
|
+
digits_places=3,
|
|
224
|
+
decimal_places=1,
|
|
225
|
+
)
|
|
226
|
+
+ ", "
|
|
227
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
228
|
+
value=self.requests_sleep_time_ms,
|
|
229
|
+
label="Req Sleep",
|
|
230
|
+
units="ms",
|
|
231
|
+
total_characters=18,
|
|
232
|
+
digits_places=5,
|
|
233
|
+
decimal_places=0,
|
|
234
|
+
)
|
|
235
|
+
+ ", "
|
|
236
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
237
|
+
value=self.requests_targeted_start_time_delay_ms,
|
|
238
|
+
label="Start Del",
|
|
239
|
+
units="ms",
|
|
240
|
+
total_characters=18,
|
|
241
|
+
digits_places=5,
|
|
242
|
+
decimal_places=0,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def format_progress_display(
|
|
248
|
+
value: float,
|
|
249
|
+
label: str,
|
|
250
|
+
units: str = "",
|
|
251
|
+
total_characters: Optional[int] = None,
|
|
252
|
+
digits_places: Optional[int] = None,
|
|
253
|
+
decimal_places: Optional[int] = None,
|
|
254
|
+
) -> str:
|
|
255
|
+
if decimal_places is None and digits_places is None:
|
|
256
|
+
formatted_number = f"{value}:.0f"
|
|
257
|
+
elif digits_places is None:
|
|
258
|
+
formatted_number = f"{value:.{decimal_places}f}"
|
|
259
|
+
elif decimal_places is None:
|
|
260
|
+
formatted_number = f"{value:>{digits_places}f}"
|
|
261
|
+
else:
|
|
262
|
+
formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
|
|
263
|
+
|
|
264
|
+
result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
|
|
265
|
+
|
|
266
|
+
if total_characters is not None:
|
|
267
|
+
total_characters += len(Colors.INFO) * 2 + 5
|
|
268
|
+
|
|
269
|
+
if len(result) < total_characters:
|
|
270
|
+
result = result.rjust(total_characters)
|
|
271
|
+
|
|
272
|
+
return result
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
276
|
+
output_tokens: float = 0
|
|
277
|
+
prompt_tokens: float = 0
|
|
278
|
+
output_tokens_rate: float = 0
|
|
279
|
+
total_tokens_rate: float = 0
|
|
280
|
+
tokens_ttft: float = 0
|
|
281
|
+
tokens_itl: float = 0
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def fields(self) -> dict[str, str]:
|
|
285
|
+
fields = super().fields
|
|
286
|
+
fields["tokens_summary"] = self.formatted_tokens_summary
|
|
287
|
+
return fields
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def formatted_tokens_summary(self) -> str:
|
|
291
|
+
if not self.started:
|
|
292
|
+
return " "
|
|
293
|
+
|
|
294
|
+
return (
|
|
295
|
+
f"[{Colors.INFO}]Tok:[/{Colors.INFO}] "
|
|
296
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
297
|
+
value=self.output_tokens_rate,
|
|
298
|
+
label="gen/s",
|
|
299
|
+
total_characters=12,
|
|
300
|
+
digits_places=4,
|
|
301
|
+
decimal_places=1,
|
|
302
|
+
)
|
|
303
|
+
+ ", "
|
|
304
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
305
|
+
value=self.total_tokens_rate,
|
|
306
|
+
label="tot/s",
|
|
307
|
+
total_characters=12,
|
|
308
|
+
digits_places=4,
|
|
309
|
+
decimal_places=1,
|
|
310
|
+
)
|
|
311
|
+
+ ", "
|
|
312
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
313
|
+
value=self.tokens_ttft,
|
|
314
|
+
label="TTFT",
|
|
315
|
+
units="ms",
|
|
316
|
+
total_characters=12,
|
|
317
|
+
digits_places=3,
|
|
318
|
+
decimal_places=1,
|
|
319
|
+
)
|
|
320
|
+
+ ", "
|
|
321
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
322
|
+
value=self.tokens_itl,
|
|
323
|
+
label="ITL",
|
|
324
|
+
units="ms",
|
|
325
|
+
total_characters=12,
|
|
326
|
+
digits_places=3,
|
|
327
|
+
decimal_places=1,
|
|
328
|
+
)
|
|
329
|
+
+ ", "
|
|
330
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
331
|
+
value=self.prompt_tokens,
|
|
332
|
+
label="Prompt",
|
|
333
|
+
total_characters=12,
|
|
334
|
+
digits_places=4,
|
|
335
|
+
decimal_places=0,
|
|
336
|
+
)
|
|
337
|
+
+ ", "
|
|
338
|
+
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
339
|
+
value=self.output_tokens,
|
|
340
|
+
label="Gen",
|
|
341
|
+
total_characters=12,
|
|
342
|
+
digits_places=4,
|
|
343
|
+
decimal_places=0,
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
BTPS = TypeVar("BTPS", bound=BenchmarkerTaskProgressState)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class BenchmarkerProgressDisplay(Generic[BTPS]):
|
|
352
|
+
def __init__(self, display_scheduler_stats: bool):
|
|
353
|
+
self.display_scheduler_stats = display_scheduler_stats
|
|
354
|
+
self.started = False
|
|
355
|
+
self.benchmarker_tasks_progress = Progress(*self.create_task_progress_columns())
|
|
356
|
+
self.benchmarker_tasks_panel = Panel(
|
|
357
|
+
self.benchmarker_tasks_progress,
|
|
358
|
+
title="Benchmarks",
|
|
359
|
+
title_align="left",
|
|
360
|
+
expand=True,
|
|
361
|
+
)
|
|
362
|
+
self.benchmarker_progress = Progress(
|
|
363
|
+
TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
|
|
364
|
+
BarColumn(
|
|
365
|
+
bar_width=None,
|
|
366
|
+
complete_style=Colors.PROGRESS,
|
|
367
|
+
finished_style=Colors.SUCCESS,
|
|
368
|
+
),
|
|
369
|
+
TextColumn(
|
|
370
|
+
"({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
|
|
371
|
+
style=Colors.PROGRESS,
|
|
372
|
+
),
|
|
373
|
+
TextColumn("["),
|
|
374
|
+
TimeElapsedColumn(),
|
|
375
|
+
TextColumn("<"),
|
|
376
|
+
TimeRemainingColumn(),
|
|
377
|
+
TextColumn("]"),
|
|
378
|
+
)
|
|
379
|
+
self.benchmarker_live = Live(
|
|
380
|
+
Group(
|
|
381
|
+
self.benchmarker_tasks_panel,
|
|
382
|
+
self.benchmarker_progress,
|
|
383
|
+
),
|
|
384
|
+
redirect_stdout=True,
|
|
385
|
+
redirect_stderr=True,
|
|
386
|
+
)
|
|
387
|
+
self.active_task: Optional[TaskID] = None
|
|
388
|
+
self.benchmarker_tasks: list[BTPS] = []
|
|
389
|
+
self.progress_task: Optional[TaskID] = None
|
|
390
|
+
|
|
391
|
+
def update(self, result: BenchmarkerResult):
|
|
392
|
+
if result.type_ == "run_start":
|
|
393
|
+
if self.started:
|
|
394
|
+
raise RuntimeError("Progress display already started.")
|
|
395
|
+
|
|
396
|
+
self.handle_start(result)
|
|
397
|
+
self.started = True
|
|
398
|
+
elif result.type_ == "run_complete":
|
|
399
|
+
if not self.started:
|
|
400
|
+
raise RuntimeError("Progress display not started.")
|
|
401
|
+
|
|
402
|
+
self.handle_end(result)
|
|
403
|
+
self.started = False
|
|
404
|
+
else:
|
|
405
|
+
if not self.started:
|
|
406
|
+
raise RuntimeError("Progress display not started.")
|
|
407
|
+
|
|
408
|
+
self.handle_update(result)
|
|
409
|
+
|
|
410
|
+
def handle_start(self, result: BenchmarkerResult):
|
|
411
|
+
self.benchmarker_live.start()
|
|
412
|
+
|
|
413
|
+
for index, strategy_type in enumerate(result.profile.strategy_types):
|
|
414
|
+
task_id = self.benchmarker_tasks_progress.add_task(
|
|
415
|
+
description=strategy_type,
|
|
416
|
+
start=False,
|
|
417
|
+
total=None,
|
|
418
|
+
completed=0,
|
|
419
|
+
visible=False,
|
|
420
|
+
)
|
|
421
|
+
task_progress_state = self.create_task_progress_state(
|
|
422
|
+
task_id=task_id,
|
|
423
|
+
index=index,
|
|
424
|
+
strategy_type=strategy_type,
|
|
425
|
+
result=result,
|
|
426
|
+
)
|
|
427
|
+
self.benchmarker_tasks.append(task_progress_state)
|
|
428
|
+
self.benchmarker_tasks_progress.update(
|
|
429
|
+
task_id,
|
|
430
|
+
description=task_progress_state.description,
|
|
431
|
+
visible=True,
|
|
432
|
+
**task_progress_state.fields, # type: ignore[arg-type]
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
self.progress_task = self.benchmarker_progress.add_task(
|
|
436
|
+
"",
|
|
437
|
+
total=len(self.benchmarker_tasks) * 1000,
|
|
438
|
+
completed_benchmarks=0,
|
|
439
|
+
total_benchmarks=len(self.benchmarker_tasks),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
def handle_update(self, result: BenchmarkerResult):
|
|
443
|
+
current_state: BTPS = self.benchmarker_tasks[result.current_index]
|
|
444
|
+
|
|
445
|
+
if result.type_ == "scheduler_start":
|
|
446
|
+
self.handle_update_scheduler_start(current_state, result)
|
|
447
|
+
self.active_task = current_state.task_id
|
|
448
|
+
elif result.type_ == "scheduler_update":
|
|
449
|
+
self.handle_update_scheduler_update(current_state, result)
|
|
450
|
+
elif result.type_ == "scheduler_complete":
|
|
451
|
+
self.handle_update_scheduler_complete(current_state, result)
|
|
452
|
+
elif result.type_ == "benchmark_compiled":
|
|
453
|
+
self.handle_update_benchmark_compiled(current_state, result)
|
|
454
|
+
else:
|
|
455
|
+
raise ValueError(f"Unknown result type: {result.type_}")
|
|
456
|
+
|
|
457
|
+
if self.progress_task is None:
|
|
458
|
+
raise RuntimeError("Progress task not set.")
|
|
459
|
+
|
|
460
|
+
self.benchmarker_tasks_progress.update(
|
|
461
|
+
current_state.task_id,
|
|
462
|
+
description=current_state.description,
|
|
463
|
+
completed=current_state.completed,
|
|
464
|
+
total=current_state.total,
|
|
465
|
+
**current_state.fields, # type: ignore[arg-type]
|
|
466
|
+
)
|
|
467
|
+
self.benchmarker_progress.update(
|
|
468
|
+
self.progress_task,
|
|
469
|
+
completed=(result.current_index * 1000) + current_state.completed,
|
|
470
|
+
total=1000 * len(self.benchmarker_tasks),
|
|
471
|
+
completed_benchmarks=(
|
|
472
|
+
result.current_index + (1 if current_state.ended else 0)
|
|
473
|
+
),
|
|
474
|
+
total_benchmarks=len(self.benchmarker_tasks),
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
if current_state.ended:
|
|
478
|
+
self.benchmarker_tasks_progress.stop_task(current_state.task_id)
|
|
479
|
+
self.active_task = None
|
|
480
|
+
|
|
481
|
+
def handle_update_scheduler_start(
|
|
482
|
+
self, progress_state: BTPS, result: BenchmarkerResult
|
|
483
|
+
):
|
|
484
|
+
if self.active_task is not None:
|
|
485
|
+
raise RuntimeError("Active task already set.")
|
|
486
|
+
|
|
487
|
+
progress_state.strategy = result.current_strategy # type: ignore[assignment]
|
|
488
|
+
progress_state.started = True
|
|
489
|
+
current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
490
|
+
progress_state.start_time = (
|
|
491
|
+
current_aggregator.requests_stats.totals.total.start_time
|
|
492
|
+
)
|
|
493
|
+
progress_state.max_number = current_aggregator.args.max_number
|
|
494
|
+
progress_state.max_duration = current_aggregator.args.max_duration
|
|
495
|
+
|
|
496
|
+
def handle_update_scheduler_update(
|
|
497
|
+
self, progress_state: BTPS, result: BenchmarkerResult
|
|
498
|
+
):
|
|
499
|
+
if self.active_task is None:
|
|
500
|
+
raise RuntimeError("Active task not set.")
|
|
501
|
+
|
|
502
|
+
if self.active_task != progress_state.task_id:
|
|
503
|
+
raise RuntimeError("Active task does not match current task.")
|
|
504
|
+
|
|
505
|
+
current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
506
|
+
progress_state.in_warmup = current_aggregator.in_warmup
|
|
507
|
+
progress_state.in_cooldown = current_aggregator.in_cooldown
|
|
508
|
+
progress_state.requests_rate = (
|
|
509
|
+
current_aggregator.requests_stats.totals.successful.rate
|
|
510
|
+
)
|
|
511
|
+
progress_state.request_latency = (
|
|
512
|
+
current_aggregator.requests_stats.request_time.mean
|
|
513
|
+
)
|
|
514
|
+
progress_state.requests_processing = (
|
|
515
|
+
current_aggregator.scheduler_stats.processing_requests.last
|
|
516
|
+
)
|
|
517
|
+
progress_state.requests_successful = (
|
|
518
|
+
current_aggregator.requests_stats.totals.successful.total
|
|
519
|
+
)
|
|
520
|
+
progress_state.requests_incomplete = (
|
|
521
|
+
current_aggregator.requests_stats.totals.incomplete.total
|
|
522
|
+
)
|
|
523
|
+
progress_state.requests_errored = (
|
|
524
|
+
current_aggregator.requests_stats.totals.errored.total
|
|
525
|
+
)
|
|
526
|
+
progress_state.worker_overheads_time_ms = (
|
|
527
|
+
current_aggregator.requests_stats.scheduled_time_delay.mean_ms
|
|
528
|
+
+ current_aggregator.requests_stats.worker_start_delay.mean_ms
|
|
529
|
+
)
|
|
530
|
+
progress_state.backend_overheads_time_ms = (
|
|
531
|
+
current_aggregator.requests_stats.request_time_delay.mean_ms
|
|
532
|
+
)
|
|
533
|
+
progress_state.requests_sleep_time_ms = (
|
|
534
|
+
current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
|
|
535
|
+
)
|
|
536
|
+
progress_state.requests_targeted_start_time_delay_ms = (
|
|
537
|
+
current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
def handle_update_scheduler_complete(
|
|
541
|
+
self,
|
|
542
|
+
progress_state: BTPS,
|
|
543
|
+
result: BenchmarkerResult, # noqa: ARG002
|
|
544
|
+
):
|
|
545
|
+
if self.active_task is None:
|
|
546
|
+
raise RuntimeError("Active task not set.")
|
|
547
|
+
|
|
548
|
+
if self.active_task != progress_state.task_id:
|
|
549
|
+
raise RuntimeError("Active task does not match current task.")
|
|
550
|
+
|
|
551
|
+
progress_state.in_warmup = False
|
|
552
|
+
progress_state.in_cooldown = False
|
|
553
|
+
progress_state.compiling = True
|
|
554
|
+
|
|
555
|
+
def handle_update_benchmark_compiled(
|
|
556
|
+
self, progress_state: BTPS, result: BenchmarkerResult
|
|
557
|
+
):
|
|
558
|
+
if self.active_task is None:
|
|
559
|
+
raise RuntimeError("Active task not set.")
|
|
560
|
+
|
|
561
|
+
if self.active_task != progress_state.task_id:
|
|
562
|
+
raise RuntimeError("Active task does not match current task.")
|
|
563
|
+
|
|
564
|
+
current_benchmark: Benchmark = result.current_benchmark # type: ignore[assignment]
|
|
565
|
+
progress_state.compiling = False
|
|
566
|
+
progress_state.ended = True
|
|
567
|
+
progress_state.requests_rate = (
|
|
568
|
+
current_benchmark.metrics.requests_per_second.successful.mean
|
|
569
|
+
)
|
|
570
|
+
progress_state.requests_processing = (
|
|
571
|
+
current_benchmark.metrics.request_concurrency.successful.mean
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
def handle_end(self, result: BenchmarkerResult): # noqa: ARG002
|
|
575
|
+
if self.progress_task is None:
|
|
576
|
+
raise RuntimeError("Progress task not set.")
|
|
577
|
+
|
|
578
|
+
self.benchmarker_progress.update(
|
|
579
|
+
self.progress_task,
|
|
580
|
+
completed=len(self.benchmarker_tasks) * 1000,
|
|
581
|
+
total=len(self.benchmarker_tasks) * 1000,
|
|
582
|
+
completed_benchmarks=len(self.benchmarker_tasks),
|
|
583
|
+
total_benchmarks=len(self.benchmarker_tasks),
|
|
584
|
+
)
|
|
585
|
+
self.benchmarker_progress.stop_task(self.progress_task)
|
|
586
|
+
self.benchmarker_live.stop()
|
|
587
|
+
self.active_task = None
|
|
588
|
+
self.benchmarker_tasks = []
|
|
589
|
+
self.progress_task = None
|
|
590
|
+
|
|
591
|
+
def create_task_progress_columns(self) -> list[ProgressColumn]:
|
|
592
|
+
columns = [
|
|
593
|
+
TextColumn("[{task.fields[start_time]}]"),
|
|
594
|
+
SpinnerColumn(style=Colors.PROGRESS),
|
|
595
|
+
TaskProgressColumn(style=Colors.PROGRESS),
|
|
596
|
+
TextColumn("{task.description}"),
|
|
597
|
+
TextColumn("({task.fields[progress_status]})"),
|
|
598
|
+
TextColumn(" "),
|
|
599
|
+
]
|
|
600
|
+
|
|
601
|
+
if not self.display_scheduler_stats:
|
|
602
|
+
columns += [
|
|
603
|
+
TextColumn("{task.fields[requests_summary]}\n"),
|
|
604
|
+
]
|
|
605
|
+
else:
|
|
606
|
+
columns += [
|
|
607
|
+
TextColumn(
|
|
608
|
+
"{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
|
|
609
|
+
),
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
return columns
|
|
613
|
+
|
|
614
|
+
def create_task_progress_state(
|
|
615
|
+
self,
|
|
616
|
+
task_id: TaskID,
|
|
617
|
+
index: int, # noqa: ARG002
|
|
618
|
+
strategy_type: StrategyType,
|
|
619
|
+
result: BenchmarkerResult, # noqa: ARG002
|
|
620
|
+
) -> BTPS:
|
|
621
|
+
return BenchmarkerTaskProgressState( # type: ignore[return-value]
|
|
622
|
+
display_scheduler_stats=self.display_scheduler_stats,
|
|
623
|
+
task_id=task_id,
|
|
624
|
+
strategy=strategy_type,
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
class GenerativeTextBenchmarkerProgressDisplay(
|
|
629
|
+
BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState]
|
|
630
|
+
):
|
|
631
|
+
def handle_update_scheduler_update(
|
|
632
|
+
self,
|
|
633
|
+
progress_state: GenerativeTextBenchmarkerTaskProgressState,
|
|
634
|
+
result: BenchmarkerResult,
|
|
635
|
+
):
|
|
636
|
+
super().handle_update_scheduler_update(progress_state, result)
|
|
637
|
+
current_aggregator: GenerativeBenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
638
|
+
progress_state.output_tokens = (
|
|
639
|
+
current_aggregator.requests_stats.output_tokens.mean
|
|
640
|
+
)
|
|
641
|
+
progress_state.prompt_tokens = (
|
|
642
|
+
current_aggregator.requests_stats.prompt_tokens.mean
|
|
643
|
+
)
|
|
644
|
+
progress_state.output_tokens_rate = (
|
|
645
|
+
current_aggregator.requests_stats.output_tokens.rate
|
|
646
|
+
)
|
|
647
|
+
progress_state.total_tokens_rate = (
|
|
648
|
+
current_aggregator.requests_stats.total_tokens.rate
|
|
649
|
+
)
|
|
650
|
+
progress_state.tokens_ttft = (
|
|
651
|
+
current_aggregator.requests_stats.time_to_first_token.mean_ms
|
|
652
|
+
)
|
|
653
|
+
progress_state.tokens_itl = (
|
|
654
|
+
current_aggregator.requests_stats.inter_token_latency.mean_ms
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
def handle_update_benchmark_compiled(
|
|
658
|
+
self,
|
|
659
|
+
progress_state: GenerativeTextBenchmarkerTaskProgressState,
|
|
660
|
+
result: BenchmarkerResult,
|
|
661
|
+
):
|
|
662
|
+
super().handle_update_benchmark_compiled(progress_state, result)
|
|
663
|
+
|
|
664
|
+
current_benchmark: GenerativeBenchmark = result.current_benchmark # type: ignore[assignment]
|
|
665
|
+
progress_state.request_latency = (
|
|
666
|
+
current_benchmark.metrics.request_latency.successful.mean
|
|
667
|
+
)
|
|
668
|
+
progress_state.requests_successful = current_benchmark.request_totals.successful
|
|
669
|
+
progress_state.requests_errored = current_benchmark.request_totals.errored
|
|
670
|
+
progress_state.requests_incomplete = current_benchmark.request_totals.incomplete
|
|
671
|
+
progress_state.output_tokens = (
|
|
672
|
+
current_benchmark.metrics.output_token_count.successful.mean
|
|
673
|
+
)
|
|
674
|
+
progress_state.prompt_tokens = (
|
|
675
|
+
current_benchmark.metrics.prompt_token_count.successful.mean
|
|
676
|
+
)
|
|
677
|
+
progress_state.output_tokens_rate = (
|
|
678
|
+
current_benchmark.metrics.output_tokens_per_second.successful.mean
|
|
679
|
+
)
|
|
680
|
+
progress_state.total_tokens_rate = (
|
|
681
|
+
current_benchmark.metrics.tokens_per_second.successful.mean
|
|
682
|
+
)
|
|
683
|
+
progress_state.tokens_ttft = (
|
|
684
|
+
current_benchmark.metrics.time_to_first_token_ms.successful.mean
|
|
685
|
+
)
|
|
686
|
+
progress_state.tokens_itl = (
|
|
687
|
+
current_benchmark.metrics.inter_token_latency_ms.successful.mean
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
def create_task_progress_state(
|
|
691
|
+
self,
|
|
692
|
+
task_id: TaskID,
|
|
693
|
+
index: int, # noqa: ARG002
|
|
694
|
+
strategy_type: StrategyType,
|
|
695
|
+
result: BenchmarkerResult, # noqa: ARG002
|
|
696
|
+
) -> GenerativeTextBenchmarkerTaskProgressState:
|
|
697
|
+
return GenerativeTextBenchmarkerTaskProgressState(
|
|
698
|
+
display_scheduler_stats=self.display_scheduler_stats,
|
|
699
|
+
task_id=task_id,
|
|
700
|
+
strategy=strategy_type,
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
def create_task_progress_columns(self) -> list[ProgressColumn]:
|
|
704
|
+
columns = super().create_task_progress_columns()
|
|
705
|
+
columns = columns[:-1] # remove the last display info column
|
|
706
|
+
|
|
707
|
+
if not self.display_scheduler_stats:
|
|
708
|
+
columns += [
|
|
709
|
+
TextColumn(
|
|
710
|
+
"{task.fields[requests_summary]}\n{task.fields[tokens_summary]}",
|
|
711
|
+
),
|
|
712
|
+
]
|
|
713
|
+
else:
|
|
714
|
+
columns += [
|
|
715
|
+
TextColumn(
|
|
716
|
+
"{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
|
|
717
|
+
),
|
|
718
|
+
]
|
|
719
|
+
|
|
720
|
+
return columns
|