guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a18.dist-info/RECORD +0 -62
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
guidellm/benchmark/progress.py
CHANGED
|
@@ -1,8 +1,25 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
"""
|
|
2
|
+
Benchmark progress tracking and console display abstractions.
|
|
3
|
+
|
|
4
|
+
Provides progress tracking interfaces and implementations for monitoring benchmark
|
|
5
|
+
execution, displaying real-time statistics, and managing UI updates during
|
|
6
|
+
generative benchmarking operations.
|
|
7
|
+
|
|
8
|
+
Classes:
|
|
9
|
+
BenchmarkerProgress: Abstract base for benchmark progress tracking.
|
|
10
|
+
BenchmarkerProgressGroup: Composite progress handler for multiple instances.
|
|
11
|
+
GenerativeConsoleBenchmarkerProgress: Console-based progress display.
|
|
12
|
+
|
|
13
|
+
Type Variables:
|
|
14
|
+
BenchmarkT: Generic benchmark object type.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
3
20
|
from dataclasses import dataclass
|
|
4
21
|
from datetime import datetime
|
|
5
|
-
from typing import
|
|
22
|
+
from typing import Any, Generic, Literal
|
|
6
23
|
|
|
7
24
|
from rich.console import Group
|
|
8
25
|
from rich.live import Live
|
|
@@ -10,7 +27,6 @@ from rich.panel import Panel
|
|
|
10
27
|
from rich.progress import (
|
|
11
28
|
BarColumn,
|
|
12
29
|
Progress,
|
|
13
|
-
ProgressColumn,
|
|
14
30
|
SpinnerColumn,
|
|
15
31
|
TaskID,
|
|
16
32
|
TaskProgressColumn,
|
|
@@ -19,145 +35,392 @@ from rich.progress import (
|
|
|
19
35
|
TimeRemainingColumn,
|
|
20
36
|
)
|
|
21
37
|
|
|
22
|
-
from guidellm.benchmark.
|
|
23
|
-
|
|
24
|
-
|
|
38
|
+
from guidellm.benchmark.profile import Profile
|
|
39
|
+
from guidellm.benchmark.schemas import (
|
|
40
|
+
BenchmarkT,
|
|
41
|
+
EstimatedBenchmarkState,
|
|
42
|
+
GenerativeBenchmark,
|
|
25
43
|
)
|
|
26
|
-
from guidellm.
|
|
27
|
-
from guidellm.
|
|
28
|
-
from guidellm.scheduler import (
|
|
29
|
-
SchedulingStrategy,
|
|
30
|
-
StrategyType,
|
|
31
|
-
strategy_display_str,
|
|
32
|
-
)
|
|
33
|
-
from guidellm.utils import Colors
|
|
44
|
+
from guidellm.scheduler import SchedulerState, SchedulingStrategy, StrategyType
|
|
45
|
+
from guidellm.utils import Colors, format_value_display
|
|
34
46
|
|
|
35
|
-
__all__ = [
|
|
36
|
-
"BenchmarkerProgressDisplay",
|
|
37
|
-
"BenchmarkerTaskProgressState",
|
|
38
|
-
"GenerativeTextBenchmarkerProgressDisplay",
|
|
39
|
-
"GenerativeTextBenchmarkerTaskProgressState",
|
|
40
|
-
]
|
|
47
|
+
__all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]
|
|
41
48
|
|
|
42
49
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
50
|
+
class BenchmarkerProgress(Generic[BenchmarkT], ABC):
|
|
51
|
+
"""
|
|
52
|
+
Abstract base class for tracking and displaying benchmark progress.
|
|
53
|
+
|
|
54
|
+
Provides lifecycle hooks for monitoring benchmark execution stages including
|
|
55
|
+
initialization, start, updates, completion, and finalization. Supports
|
|
56
|
+
enable/disable functionality for conditional progress tracking.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self):
|
|
60
|
+
"""
|
|
61
|
+
Initialize progress tracker.
|
|
62
|
+
|
|
63
|
+
:param enabled: Whether to enable progress tracking and display.
|
|
64
|
+
"""
|
|
65
|
+
self.profile: Profile = None
|
|
66
|
+
self.current_strategy: SchedulingStrategy = None
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
async def on_initialize(self, profile: Profile):
|
|
70
|
+
"""
|
|
71
|
+
Initialize progress tracking for benchmark profile.
|
|
72
|
+
|
|
73
|
+
:param profile: Benchmark profile configuration.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
async def on_benchmark_start(self, strategy: SchedulingStrategy):
|
|
78
|
+
"""
|
|
79
|
+
Handle start of new benchmark strategy execution.
|
|
80
|
+
|
|
81
|
+
:param strategy: Scheduling strategy being executed.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
async def on_benchmark_update(
|
|
86
|
+
self, estimated_state: EstimatedBenchmarkState, scheduler_state: SchedulerState
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Handle benchmark execution progress update.
|
|
65
90
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
91
|
+
:param estimated_state: Current benchmark metrics and statistics.
|
|
92
|
+
:param scheduler_state: Current scheduler execution state.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
async def on_benchmark_complete(self, benchmark: BenchmarkT):
|
|
97
|
+
"""
|
|
98
|
+
Handle completion of benchmark strategy execution.
|
|
99
|
+
|
|
100
|
+
:param benchmark: Completed benchmark results.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
async def on_finalize(self):
|
|
105
|
+
"""Finalize progress tracking and cleanup resources."""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class GenerativeConsoleBenchmarkerProgress(
|
|
109
|
+
BenchmarkerProgress[GenerativeBenchmark], Live
|
|
110
|
+
):
|
|
111
|
+
"""
|
|
112
|
+
Console-based progress display for generative benchmarks.
|
|
113
|
+
|
|
114
|
+
Provides real-time visual progress tracking using Rich library components,
|
|
115
|
+
displaying benchmark execution statistics, timing information, and progress
|
|
116
|
+
bars in a structured console interface.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(self, display_scheduler_stats: bool = False):
|
|
120
|
+
"""
|
|
121
|
+
Initialize console progress display.
|
|
122
|
+
|
|
123
|
+
:param enabled: Whether to enable progress tracking and display.
|
|
124
|
+
:param display_scheduler_stats: Whether to display scheduler statistics.
|
|
125
|
+
"""
|
|
126
|
+
BenchmarkerProgress.__init__(self)
|
|
127
|
+
Live.__init__(
|
|
128
|
+
self,
|
|
129
|
+
refresh_per_second=4,
|
|
130
|
+
auto_refresh=True,
|
|
131
|
+
redirect_stdout=True,
|
|
132
|
+
redirect_stderr=True,
|
|
133
|
+
)
|
|
134
|
+
self.display_scheduler_stats: bool = display_scheduler_stats
|
|
135
|
+
self.run_progress: Progress = None
|
|
136
|
+
self.run_progress_task: TaskID = None
|
|
137
|
+
self.tasks_progress: _GenerativeProgressTasks = None
|
|
138
|
+
|
|
139
|
+
async def on_initialize(self, profile: Profile):
|
|
140
|
+
"""
|
|
141
|
+
Initialize console display components and start rendering.
|
|
142
|
+
|
|
143
|
+
:param profile: Benchmark profile configuration.
|
|
144
|
+
"""
|
|
145
|
+
self.tasks_progress = _GenerativeProgressTasks(
|
|
146
|
+
profile=profile, display_scheduler_stats=self.display_scheduler_stats
|
|
147
|
+
)
|
|
148
|
+
self.run_progress = Progress(
|
|
149
|
+
TextColumn("Generating...", style=f"italic {Colors.progress}"),
|
|
150
|
+
BarColumn(
|
|
151
|
+
bar_width=None,
|
|
152
|
+
complete_style=Colors.progress,
|
|
153
|
+
finished_style=Colors.success,
|
|
154
|
+
),
|
|
155
|
+
TextColumn(
|
|
156
|
+
"({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
|
|
157
|
+
style=Colors.progress,
|
|
158
|
+
),
|
|
159
|
+
TextColumn("["),
|
|
160
|
+
TimeElapsedColumn(),
|
|
161
|
+
TextColumn("<"),
|
|
162
|
+
TimeRemainingColumn(),
|
|
163
|
+
TextColumn("]"),
|
|
164
|
+
)
|
|
165
|
+
self.run_progress_task = self.run_progress.add_task("")
|
|
166
|
+
self._sync_run_progress()
|
|
167
|
+
self.update(
|
|
168
|
+
Group(
|
|
169
|
+
Panel(
|
|
170
|
+
self.tasks_progress,
|
|
171
|
+
title="Benchmarks",
|
|
172
|
+
title_align="left",
|
|
173
|
+
expand=True,
|
|
174
|
+
),
|
|
175
|
+
self.run_progress,
|
|
176
|
+
)
|
|
177
|
+
)
|
|
178
|
+
self.start()
|
|
179
|
+
|
|
180
|
+
async def on_benchmark_start(self, strategy: SchedulingStrategy):
|
|
181
|
+
"""
|
|
182
|
+
Update display for new benchmark strategy start.
|
|
183
|
+
|
|
184
|
+
:param strategy: Scheduling strategy being executed.
|
|
185
|
+
"""
|
|
186
|
+
self.tasks_progress.start_benchmark(strategy)
|
|
187
|
+
self._sync_run_progress()
|
|
188
|
+
|
|
189
|
+
async def on_benchmark_update(
|
|
190
|
+
self,
|
|
191
|
+
aggregator_update: EstimatedBenchmarkState | None,
|
|
192
|
+
scheduler_state: SchedulerState,
|
|
193
|
+
):
|
|
194
|
+
"""
|
|
195
|
+
Update display with current benchmark progress.
|
|
196
|
+
|
|
197
|
+
:param aggregator_update: Current benchmark metrics and statistics.
|
|
198
|
+
:param scheduler_state: Current scheduler execution state.
|
|
199
|
+
"""
|
|
200
|
+
self.tasks_progress.update_benchmark(aggregator_update, scheduler_state)
|
|
201
|
+
self._sync_run_progress()
|
|
202
|
+
|
|
203
|
+
async def on_benchmark_complete(self, benchmark: GenerativeBenchmark):
|
|
204
|
+
"""
|
|
205
|
+
Update display for completed benchmark.
|
|
206
|
+
|
|
207
|
+
:param benchmark: Completed benchmark results.
|
|
208
|
+
"""
|
|
209
|
+
self.tasks_progress.complete_benchmark(benchmark)
|
|
210
|
+
self._sync_run_progress()
|
|
211
|
+
|
|
212
|
+
async def on_finalize(self):
|
|
213
|
+
"""Stop display rendering and cleanup resources."""
|
|
214
|
+
self.tasks_progress.finalize()
|
|
215
|
+
self._sync_run_progress()
|
|
216
|
+
self.run_progress.stop_task(self.run_progress_task)
|
|
217
|
+
self.stop()
|
|
218
|
+
self.run_progress = None
|
|
219
|
+
self.run_progress_task = None
|
|
220
|
+
self.tasks_progress = None
|
|
221
|
+
|
|
222
|
+
def _sync_run_progress(self):
|
|
223
|
+
"""Synchronize overall progress display with task progress."""
|
|
224
|
+
self.run_progress.update(
|
|
225
|
+
self.run_progress_task,
|
|
226
|
+
total=self.tasks_progress.steps_total,
|
|
227
|
+
completed=self.tasks_progress.steps_progress,
|
|
228
|
+
completed_benchmarks=self.tasks_progress.tasks_progress,
|
|
229
|
+
total_benchmarks=self.tasks_progress.tasks_total,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Scaling factor for progress calculations to provide granular progress updates
|
|
234
|
+
_PROGRESS_SCALE = 1000
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class _GenerativeProgressTasks(Progress):
|
|
238
|
+
def __init__(self, profile: Profile, display_scheduler_stats: bool):
|
|
239
|
+
self.profile: Profile = profile
|
|
240
|
+
self.display_scheduler_stats: bool = display_scheduler_stats
|
|
241
|
+
self.benchmark_task_states: list[_GenerativeProgressTaskState] = []
|
|
242
|
+
self.current_index: int = -1
|
|
243
|
+
|
|
244
|
+
summary_text = "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}"
|
|
245
|
+
if self.display_scheduler_stats:
|
|
246
|
+
summary_text += "\n{task.fields[scheduler_stats]}"
|
|
247
|
+
super().__init__(
|
|
248
|
+
TextColumn("[{task.fields[start_time]}]"),
|
|
249
|
+
SpinnerColumn(style=Colors.progress),
|
|
250
|
+
TaskProgressColumn(style=Colors.progress),
|
|
251
|
+
TextColumn("{task.description}"),
|
|
252
|
+
TextColumn("({task.fields[progress_status]})"),
|
|
253
|
+
TextColumn(" "),
|
|
254
|
+
TextColumn(summary_text),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
for strategy_type in profile.strategy_types:
|
|
258
|
+
task_state = _GenerativeProgressTaskState(
|
|
259
|
+
strategy_type=strategy_type,
|
|
260
|
+
)
|
|
261
|
+
task_id = self.add_task(**task_state.current)
|
|
262
|
+
task_state.task_id = task_id
|
|
263
|
+
self.benchmark_task_states.append(task_state)
|
|
70
264
|
|
|
71
265
|
@property
|
|
72
|
-
def
|
|
73
|
-
return
|
|
266
|
+
def tasks_total(self) -> int:
|
|
267
|
+
return len(self.benchmark_task_states)
|
|
74
268
|
|
|
75
269
|
@property
|
|
76
|
-
def
|
|
77
|
-
|
|
78
|
-
return None
|
|
270
|
+
def tasks_progress(self) -> int:
|
|
271
|
+
return self.current_index + 1
|
|
79
272
|
|
|
80
|
-
|
|
273
|
+
@property
|
|
274
|
+
def steps_total(self) -> int:
|
|
275
|
+
return _PROGRESS_SCALE * len(self.benchmark_task_states)
|
|
81
276
|
|
|
82
277
|
@property
|
|
83
|
-
def
|
|
84
|
-
|
|
85
|
-
|
|
278
|
+
def steps_progress(self) -> int:
|
|
279
|
+
progress_current_task = (
|
|
280
|
+
self.benchmark_task_states[self.current_index].progress
|
|
281
|
+
if self.current_index < len(self.benchmark_task_states)
|
|
282
|
+
else 0
|
|
283
|
+
)
|
|
284
|
+
progress_total = self.current_index + (progress_current_task or 0)
|
|
86
285
|
|
|
87
|
-
|
|
88
|
-
|
|
286
|
+
return progress_total * _PROGRESS_SCALE
|
|
287
|
+
|
|
288
|
+
def start_benchmark(self, strategy: SchedulingStrategy):
|
|
289
|
+
self.current_index += 1
|
|
290
|
+
if self.current_index >= len(self.benchmark_task_states):
|
|
291
|
+
# New task past initially estimated, append it to the end
|
|
292
|
+
task_state = _GenerativeProgressTaskState(strategy_type=strategy.type_)
|
|
293
|
+
task_id = self.add_task(**task_state.current)
|
|
294
|
+
task_state.task_id = task_id
|
|
295
|
+
self.benchmark_task_states.append(task_state)
|
|
89
296
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
297
|
+
self.benchmark_task_states[self.current_index].start(strategy)
|
|
298
|
+
self.update(
|
|
299
|
+
self.benchmark_task_states[self.current_index].task_id,
|
|
300
|
+
start=True,
|
|
301
|
+
**self.benchmark_task_states[self.current_index].current,
|
|
93
302
|
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
303
|
+
|
|
304
|
+
def update_benchmark(
|
|
305
|
+
self,
|
|
306
|
+
aggregator_update: EstimatedBenchmarkState,
|
|
307
|
+
scheduler_state: SchedulerState,
|
|
308
|
+
):
|
|
309
|
+
self.benchmark_task_states[self.current_index].update(
|
|
310
|
+
aggregator_update, scheduler_state
|
|
311
|
+
)
|
|
312
|
+
self.update(
|
|
313
|
+
self.benchmark_task_states[self.current_index].task_id,
|
|
314
|
+
**self.benchmark_task_states[self.current_index].current,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def complete_benchmark(self, benchmark: GenerativeBenchmark):
|
|
318
|
+
self.benchmark_task_states[self.current_index].complete(benchmark)
|
|
319
|
+
self.update(
|
|
320
|
+
self.benchmark_task_states[self.current_index].task_id,
|
|
321
|
+
**self.benchmark_task_states[self.current_index].current,
|
|
98
322
|
)
|
|
99
323
|
|
|
100
|
-
|
|
324
|
+
def finalize(self):
|
|
325
|
+
self.stop()
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@dataclass
|
|
329
|
+
class _GenerativeProgressTaskState:
|
|
330
|
+
strategy_type: StrategyType
|
|
331
|
+
task_id: TaskID = None
|
|
332
|
+
strategy: SchedulingStrategy | None = None
|
|
333
|
+
benchmark_status: Literal[
|
|
334
|
+
"pending", "in_warmup", "in_progress", "in_cooldown", "completed"
|
|
335
|
+
] = "pending"
|
|
336
|
+
progress: float | None = None
|
|
337
|
+
start_time: float = -1.0
|
|
338
|
+
successful_requests: int = 0
|
|
339
|
+
cancelled_requests: int = 0
|
|
340
|
+
errored_requests: int = 0
|
|
341
|
+
request_concurrency: int = 0
|
|
342
|
+
requests_per_second: float = 0
|
|
343
|
+
request_latency: float = 0
|
|
344
|
+
output_tokens: int = 0
|
|
345
|
+
output_tokens_rate: float = 0
|
|
346
|
+
prompt_tokens: int = 0
|
|
347
|
+
total_tokens_rate: float = 0
|
|
348
|
+
time_to_first_token: float = 0
|
|
349
|
+
inter_token_latency: float = 0
|
|
350
|
+
queued_time: float = 0
|
|
351
|
+
request_targeted_start_delay: float = 0
|
|
352
|
+
scheduler_overheads_time: float = 0
|
|
101
353
|
|
|
102
354
|
@property
|
|
103
|
-
def
|
|
104
|
-
|
|
355
|
+
def current(self) -> dict[str, Any]:
|
|
356
|
+
return {
|
|
105
357
|
"start_time": self.formatted_start_time,
|
|
358
|
+
"description": str(self.strategy or self.strategy_type),
|
|
106
359
|
"progress_status": self.formatted_progress_status,
|
|
107
360
|
"requests_summary": self.formatted_requests_summary,
|
|
361
|
+
"tokens_summary": self.formatted_tokens_summary,
|
|
362
|
+
"scheduler_stats": self.formatted_scheduler_stats,
|
|
363
|
+
"completed": self.completed,
|
|
364
|
+
"total": self.total,
|
|
108
365
|
}
|
|
109
366
|
|
|
110
|
-
|
|
111
|
-
|
|
367
|
+
@property
|
|
368
|
+
def completed(self) -> float:
|
|
369
|
+
if self.benchmark_status == "pending":
|
|
370
|
+
return 0
|
|
371
|
+
|
|
372
|
+
if self.benchmark_status == "completed":
|
|
373
|
+
return _PROGRESS_SCALE
|
|
112
374
|
|
|
113
|
-
return
|
|
375
|
+
return self.progress * _PROGRESS_SCALE if self.progress is not None else None
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def total(self) -> float:
|
|
379
|
+
return _PROGRESS_SCALE
|
|
114
380
|
|
|
115
381
|
@property
|
|
116
382
|
def formatted_start_time(self) -> str:
|
|
117
|
-
if self.start_time
|
|
383
|
+
if self.start_time < 0.0:
|
|
118
384
|
return "--:--:--"
|
|
119
385
|
|
|
120
386
|
return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
|
|
121
387
|
|
|
122
388
|
@property
|
|
123
389
|
def formatted_progress_status(self) -> str:
|
|
124
|
-
if self.
|
|
125
|
-
status = "complete"
|
|
126
|
-
color = Colors.SUCCESS
|
|
127
|
-
elif self.compiling:
|
|
128
|
-
status = "compiling"
|
|
129
|
-
color = Colors.PROGRESS
|
|
130
|
-
elif self.started and self.in_warmup:
|
|
390
|
+
if self.benchmark_status == "in_warmup":
|
|
131
391
|
status = "warmup"
|
|
132
|
-
color = Colors.
|
|
133
|
-
elif self.
|
|
134
|
-
status = "cooldown"
|
|
135
|
-
color = Colors.PROGRESS
|
|
136
|
-
elif self.started:
|
|
392
|
+
color = Colors.progress
|
|
393
|
+
elif self.benchmark_status == "in_progress":
|
|
137
394
|
status = "running"
|
|
138
|
-
color = Colors.
|
|
395
|
+
color = Colors.progress
|
|
396
|
+
elif self.benchmark_status == "in_cooldown":
|
|
397
|
+
status = "cooldown"
|
|
398
|
+
color = Colors.progress
|
|
399
|
+
elif self.benchmark_status == "completed":
|
|
400
|
+
status = "complete"
|
|
401
|
+
color = Colors.success
|
|
139
402
|
else:
|
|
140
403
|
status = "pending"
|
|
141
|
-
color = Colors.
|
|
404
|
+
color = Colors.info
|
|
142
405
|
|
|
143
406
|
return f"[{color}]{status.ljust(8)}[/{color}]"
|
|
144
407
|
|
|
145
408
|
@property
|
|
146
409
|
def formatted_requests_summary(self) -> str:
|
|
147
|
-
if
|
|
410
|
+
if self.benchmark_status == "pending":
|
|
148
411
|
return " "
|
|
149
412
|
|
|
150
413
|
return (
|
|
151
|
-
f"[{Colors.
|
|
152
|
-
+
|
|
153
|
-
value=self.
|
|
414
|
+
f"[{Colors.info}]Req:[/{Colors.info}] "
|
|
415
|
+
+ format_value_display(
|
|
416
|
+
value=self.requests_per_second,
|
|
154
417
|
label="req/s",
|
|
155
418
|
total_characters=12,
|
|
156
419
|
digits_places=4,
|
|
157
420
|
decimal_places=1,
|
|
158
421
|
)
|
|
159
422
|
+ ", "
|
|
160
|
-
+
|
|
423
|
+
+ format_value_display(
|
|
161
424
|
value=self.request_latency,
|
|
162
425
|
label="Lat",
|
|
163
426
|
units="s",
|
|
@@ -166,32 +429,32 @@ class BenchmarkerTaskProgressState:
|
|
|
166
429
|
decimal_places=2,
|
|
167
430
|
)
|
|
168
431
|
+ ", "
|
|
169
|
-
+
|
|
170
|
-
value=self.
|
|
432
|
+
+ format_value_display(
|
|
433
|
+
value=self.request_concurrency,
|
|
171
434
|
label="Conc",
|
|
172
435
|
total_characters=12,
|
|
173
436
|
digits_places=4,
|
|
174
437
|
decimal_places=1,
|
|
175
438
|
)
|
|
176
439
|
+ ", "
|
|
177
|
-
+
|
|
178
|
-
value=self.
|
|
440
|
+
+ format_value_display(
|
|
441
|
+
value=self.successful_requests,
|
|
179
442
|
label="Comp",
|
|
180
443
|
total_characters=12,
|
|
181
444
|
digits_places=5,
|
|
182
445
|
decimal_places=0,
|
|
183
446
|
)
|
|
184
447
|
+ ", "
|
|
185
|
-
+
|
|
186
|
-
value=self.
|
|
448
|
+
+ format_value_display(
|
|
449
|
+
value=self.cancelled_requests,
|
|
187
450
|
label="Inc",
|
|
188
451
|
total_characters=12,
|
|
189
452
|
digits_places=5,
|
|
190
453
|
decimal_places=0,
|
|
191
454
|
)
|
|
192
455
|
+ ", "
|
|
193
|
-
+
|
|
194
|
-
value=self.
|
|
456
|
+
+ format_value_display(
|
|
457
|
+
value=self.errored_requests,
|
|
195
458
|
label="Err",
|
|
196
459
|
total_characters=12,
|
|
197
460
|
digits_places=5,
|
|
@@ -199,101 +462,14 @@ class BenchmarkerTaskProgressState:
|
|
|
199
462
|
)
|
|
200
463
|
)
|
|
201
464
|
|
|
202
|
-
@property
|
|
203
|
-
def formatted_scheduler_stats(self) -> str:
|
|
204
|
-
if not self.started:
|
|
205
|
-
return " "
|
|
206
|
-
|
|
207
|
-
return (
|
|
208
|
-
f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
|
|
209
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
210
|
-
value=self.worker_overheads_time_ms,
|
|
211
|
-
label="Work OH",
|
|
212
|
-
units="ms",
|
|
213
|
-
total_characters=18,
|
|
214
|
-
digits_places=3,
|
|
215
|
-
decimal_places=1,
|
|
216
|
-
)
|
|
217
|
-
+ ", "
|
|
218
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
219
|
-
value=self.backend_overheads_time_ms,
|
|
220
|
-
label="Back OH",
|
|
221
|
-
units="ms",
|
|
222
|
-
total_characters=18,
|
|
223
|
-
digits_places=3,
|
|
224
|
-
decimal_places=1,
|
|
225
|
-
)
|
|
226
|
-
+ ", "
|
|
227
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
228
|
-
value=self.requests_sleep_time_ms,
|
|
229
|
-
label="Req Sleep",
|
|
230
|
-
units="ms",
|
|
231
|
-
total_characters=18,
|
|
232
|
-
digits_places=5,
|
|
233
|
-
decimal_places=0,
|
|
234
|
-
)
|
|
235
|
-
+ ", "
|
|
236
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
237
|
-
value=self.requests_targeted_start_time_delay_ms,
|
|
238
|
-
label="Start Del",
|
|
239
|
-
units="ms",
|
|
240
|
-
total_characters=18,
|
|
241
|
-
digits_places=5,
|
|
242
|
-
decimal_places=0,
|
|
243
|
-
)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
@staticmethod
|
|
247
|
-
def format_progress_display(
|
|
248
|
-
value: float,
|
|
249
|
-
label: str,
|
|
250
|
-
units: str = "",
|
|
251
|
-
total_characters: Optional[int] = None,
|
|
252
|
-
digits_places: Optional[int] = None,
|
|
253
|
-
decimal_places: Optional[int] = None,
|
|
254
|
-
) -> str:
|
|
255
|
-
if decimal_places is None and digits_places is None:
|
|
256
|
-
formatted_number = f"{value}:.0f"
|
|
257
|
-
elif digits_places is None:
|
|
258
|
-
formatted_number = f"{value:.{decimal_places}f}"
|
|
259
|
-
elif decimal_places is None:
|
|
260
|
-
formatted_number = f"{value:>{digits_places}f}"
|
|
261
|
-
else:
|
|
262
|
-
formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
|
|
263
|
-
|
|
264
|
-
result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
|
|
265
|
-
|
|
266
|
-
if total_characters is not None:
|
|
267
|
-
total_characters += len(Colors.INFO) * 2 + 5
|
|
268
|
-
|
|
269
|
-
if len(result) < total_characters:
|
|
270
|
-
result = result.rjust(total_characters)
|
|
271
|
-
|
|
272
|
-
return result
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
276
|
-
output_tokens: float = 0
|
|
277
|
-
prompt_tokens: float = 0
|
|
278
|
-
output_tokens_rate: float = 0
|
|
279
|
-
total_tokens_rate: float = 0
|
|
280
|
-
tokens_ttft: float = 0
|
|
281
|
-
tokens_itl: float = 0
|
|
282
|
-
|
|
283
|
-
@property
|
|
284
|
-
def fields(self) -> dict[str, str]:
|
|
285
|
-
fields = super().fields
|
|
286
|
-
fields["tokens_summary"] = self.formatted_tokens_summary
|
|
287
|
-
return fields
|
|
288
|
-
|
|
289
465
|
@property
|
|
290
466
|
def formatted_tokens_summary(self) -> str:
|
|
291
|
-
if
|
|
467
|
+
if self.benchmark_status == "pending":
|
|
292
468
|
return " "
|
|
293
469
|
|
|
294
470
|
return (
|
|
295
|
-
f"[{Colors.
|
|
296
|
-
+
|
|
471
|
+
f"[{Colors.info}]Tok:[/{Colors.info}] "
|
|
472
|
+
+ format_value_display(
|
|
297
473
|
value=self.output_tokens_rate,
|
|
298
474
|
label="gen/s",
|
|
299
475
|
total_characters=12,
|
|
@@ -301,7 +477,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
301
477
|
decimal_places=1,
|
|
302
478
|
)
|
|
303
479
|
+ ", "
|
|
304
|
-
+
|
|
480
|
+
+ format_value_display(
|
|
305
481
|
value=self.total_tokens_rate,
|
|
306
482
|
label="tot/s",
|
|
307
483
|
total_characters=12,
|
|
@@ -309,8 +485,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
309
485
|
decimal_places=1,
|
|
310
486
|
)
|
|
311
487
|
+ ", "
|
|
312
|
-
+
|
|
313
|
-
value=self.
|
|
488
|
+
+ format_value_display(
|
|
489
|
+
value=self.time_to_first_token,
|
|
314
490
|
label="TTFT",
|
|
315
491
|
units="ms",
|
|
316
492
|
total_characters=12,
|
|
@@ -318,8 +494,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
318
494
|
decimal_places=1,
|
|
319
495
|
)
|
|
320
496
|
+ ", "
|
|
321
|
-
+
|
|
322
|
-
value=self.
|
|
497
|
+
+ format_value_display(
|
|
498
|
+
value=self.inter_token_latency,
|
|
323
499
|
label="ITL",
|
|
324
500
|
units="ms",
|
|
325
501
|
total_characters=12,
|
|
@@ -327,7 +503,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
327
503
|
decimal_places=1,
|
|
328
504
|
)
|
|
329
505
|
+ ", "
|
|
330
|
-
+
|
|
506
|
+
+ format_value_display(
|
|
331
507
|
value=self.prompt_tokens,
|
|
332
508
|
label="Prompt",
|
|
333
509
|
total_characters=12,
|
|
@@ -335,7 +511,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
335
511
|
decimal_places=0,
|
|
336
512
|
)
|
|
337
513
|
+ ", "
|
|
338
|
-
+
|
|
514
|
+
+ format_value_display(
|
|
339
515
|
value=self.output_tokens,
|
|
340
516
|
label="Gen",
|
|
341
517
|
total_characters=12,
|
|
@@ -344,377 +520,220 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
344
520
|
)
|
|
345
521
|
)
|
|
346
522
|
|
|
523
|
+
@property
|
|
524
|
+
def formatted_scheduler_stats(self) -> str:
|
|
525
|
+
if self.benchmark_status == "pending":
|
|
526
|
+
return " "
|
|
347
527
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
self.benchmarker_tasks_progress,
|
|
358
|
-
title="Benchmarks",
|
|
359
|
-
title_align="left",
|
|
360
|
-
expand=True,
|
|
361
|
-
)
|
|
362
|
-
self.benchmarker_progress = Progress(
|
|
363
|
-
TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
|
|
364
|
-
BarColumn(
|
|
365
|
-
bar_width=None,
|
|
366
|
-
complete_style=Colors.PROGRESS,
|
|
367
|
-
finished_style=Colors.SUCCESS,
|
|
368
|
-
),
|
|
369
|
-
TextColumn(
|
|
370
|
-
"({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
|
|
371
|
-
style=Colors.PROGRESS,
|
|
372
|
-
),
|
|
373
|
-
TextColumn("["),
|
|
374
|
-
TimeElapsedColumn(),
|
|
375
|
-
TextColumn("<"),
|
|
376
|
-
TimeRemainingColumn(),
|
|
377
|
-
TextColumn("]"),
|
|
378
|
-
)
|
|
379
|
-
self.benchmarker_live = Live(
|
|
380
|
-
Group(
|
|
381
|
-
self.benchmarker_tasks_panel,
|
|
382
|
-
self.benchmarker_progress,
|
|
383
|
-
),
|
|
384
|
-
redirect_stdout=True,
|
|
385
|
-
redirect_stderr=True,
|
|
386
|
-
)
|
|
387
|
-
self.active_task: Optional[TaskID] = None
|
|
388
|
-
self.benchmarker_tasks: list[BTPS] = []
|
|
389
|
-
self.progress_task: Optional[TaskID] = None
|
|
390
|
-
|
|
391
|
-
def update(self, result: BenchmarkerResult):
|
|
392
|
-
if result.type_ == "run_start":
|
|
393
|
-
if self.started:
|
|
394
|
-
raise RuntimeError("Progress display already started.")
|
|
395
|
-
|
|
396
|
-
self.handle_start(result)
|
|
397
|
-
self.started = True
|
|
398
|
-
elif result.type_ == "run_complete":
|
|
399
|
-
if not self.started:
|
|
400
|
-
raise RuntimeError("Progress display not started.")
|
|
401
|
-
|
|
402
|
-
self.handle_end(result)
|
|
403
|
-
self.started = False
|
|
404
|
-
else:
|
|
405
|
-
if not self.started:
|
|
406
|
-
raise RuntimeError("Progress display not started.")
|
|
407
|
-
|
|
408
|
-
self.handle_update(result)
|
|
409
|
-
|
|
410
|
-
def handle_start(self, result: BenchmarkerResult):
|
|
411
|
-
self.benchmarker_live.start()
|
|
412
|
-
|
|
413
|
-
for index, strategy_type in enumerate(result.profile.strategy_types):
|
|
414
|
-
task_id = self.benchmarker_tasks_progress.add_task(
|
|
415
|
-
description=strategy_type,
|
|
416
|
-
start=False,
|
|
417
|
-
total=None,
|
|
418
|
-
completed=0,
|
|
419
|
-
visible=False,
|
|
528
|
+
return (
|
|
529
|
+
f"[{Colors.info}]Sys:[/{Colors.info}] , "
|
|
530
|
+
+ format_value_display(
|
|
531
|
+
value=self.request_targeted_start_delay,
|
|
532
|
+
label="Start Del",
|
|
533
|
+
units="ms",
|
|
534
|
+
total_characters=18,
|
|
535
|
+
digits_places=5,
|
|
536
|
+
decimal_places=0,
|
|
420
537
|
)
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
538
|
+
+ format_value_display(
|
|
539
|
+
value=self.scheduler_overheads_time,
|
|
540
|
+
label="Sched OH",
|
|
541
|
+
units="ms",
|
|
542
|
+
total_characters=18,
|
|
543
|
+
digits_places=3,
|
|
544
|
+
decimal_places=1,
|
|
426
545
|
)
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
546
|
+
+ ", "
|
|
547
|
+
+ format_value_display(
|
|
548
|
+
value=self.queued_time,
|
|
549
|
+
label="Queued",
|
|
550
|
+
units="ms",
|
|
551
|
+
total_characters=18,
|
|
552
|
+
digits_places=5,
|
|
553
|
+
decimal_places=0,
|
|
433
554
|
)
|
|
434
|
-
|
|
435
|
-
self.progress_task = self.benchmarker_progress.add_task(
|
|
436
|
-
"",
|
|
437
|
-
total=len(self.benchmarker_tasks) * 1000,
|
|
438
|
-
completed_benchmarks=0,
|
|
439
|
-
total_benchmarks=len(self.benchmarker_tasks),
|
|
440
|
-
)
|
|
441
|
-
|
|
442
|
-
def handle_update(self, result: BenchmarkerResult):
|
|
443
|
-
current_state: BTPS = self.benchmarker_tasks[result.current_index]
|
|
444
|
-
|
|
445
|
-
if result.type_ == "scheduler_start":
|
|
446
|
-
self.handle_update_scheduler_start(current_state, result)
|
|
447
|
-
self.active_task = current_state.task_id
|
|
448
|
-
elif result.type_ == "scheduler_update":
|
|
449
|
-
self.handle_update_scheduler_update(current_state, result)
|
|
450
|
-
elif result.type_ == "scheduler_complete":
|
|
451
|
-
self.handle_update_scheduler_complete(current_state, result)
|
|
452
|
-
elif result.type_ == "benchmark_compiled":
|
|
453
|
-
self.handle_update_benchmark_compiled(current_state, result)
|
|
454
|
-
else:
|
|
455
|
-
raise ValueError(f"Unknown result type: {result.type_}")
|
|
456
|
-
|
|
457
|
-
if self.progress_task is None:
|
|
458
|
-
raise RuntimeError("Progress task not set.")
|
|
459
|
-
|
|
460
|
-
self.benchmarker_tasks_progress.update(
|
|
461
|
-
current_state.task_id,
|
|
462
|
-
description=current_state.description,
|
|
463
|
-
completed=current_state.completed,
|
|
464
|
-
total=current_state.total,
|
|
465
|
-
**current_state.fields, # type: ignore[arg-type]
|
|
466
|
-
)
|
|
467
|
-
self.benchmarker_progress.update(
|
|
468
|
-
self.progress_task,
|
|
469
|
-
completed=(result.current_index * 1000) + current_state.completed,
|
|
470
|
-
total=1000 * len(self.benchmarker_tasks),
|
|
471
|
-
completed_benchmarks=(
|
|
472
|
-
result.current_index + (1 if current_state.ended else 0)
|
|
473
|
-
),
|
|
474
|
-
total_benchmarks=len(self.benchmarker_tasks),
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
if current_state.ended:
|
|
478
|
-
self.benchmarker_tasks_progress.stop_task(current_state.task_id)
|
|
479
|
-
self.active_task = None
|
|
480
|
-
|
|
481
|
-
def handle_update_scheduler_start(
|
|
482
|
-
self, progress_state: BTPS, result: BenchmarkerResult
|
|
483
|
-
):
|
|
484
|
-
if self.active_task is not None:
|
|
485
|
-
raise RuntimeError("Active task already set.")
|
|
486
|
-
|
|
487
|
-
progress_state.strategy = result.current_strategy # type: ignore[assignment]
|
|
488
|
-
progress_state.started = True
|
|
489
|
-
current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
490
|
-
progress_state.start_time = (
|
|
491
|
-
current_aggregator.requests_stats.totals.total.start_time
|
|
492
555
|
)
|
|
493
|
-
progress_state.max_number = current_aggregator.args.max_number
|
|
494
|
-
progress_state.max_duration = current_aggregator.args.max_duration
|
|
495
|
-
|
|
496
|
-
def handle_update_scheduler_update(
|
|
497
|
-
self, progress_state: BTPS, result: BenchmarkerResult
|
|
498
|
-
):
|
|
499
|
-
if self.active_task is None:
|
|
500
|
-
raise RuntimeError("Active task not set.")
|
|
501
556
|
|
|
502
|
-
|
|
503
|
-
|
|
557
|
+
def start(self, strategy: SchedulingStrategy):
|
|
558
|
+
self.strategy = strategy
|
|
559
|
+
self.strategy_type = strategy.type_
|
|
504
560
|
|
|
505
|
-
|
|
506
|
-
progress_state.in_warmup = current_aggregator.in_warmup
|
|
507
|
-
progress_state.in_cooldown = current_aggregator.in_cooldown
|
|
508
|
-
progress_state.requests_rate = (
|
|
509
|
-
current_aggregator.requests_stats.totals.successful.rate
|
|
510
|
-
)
|
|
511
|
-
progress_state.request_latency = (
|
|
512
|
-
current_aggregator.requests_stats.request_time.mean
|
|
513
|
-
)
|
|
514
|
-
progress_state.requests_processing = (
|
|
515
|
-
current_aggregator.scheduler_stats.processing_requests.last
|
|
516
|
-
)
|
|
517
|
-
progress_state.requests_successful = (
|
|
518
|
-
current_aggregator.requests_stats.totals.successful.total
|
|
519
|
-
)
|
|
520
|
-
progress_state.requests_incomplete = (
|
|
521
|
-
current_aggregator.requests_stats.totals.incomplete.total
|
|
522
|
-
)
|
|
523
|
-
progress_state.requests_errored = (
|
|
524
|
-
current_aggregator.requests_stats.totals.errored.total
|
|
525
|
-
)
|
|
526
|
-
progress_state.worker_overheads_time_ms = (
|
|
527
|
-
current_aggregator.requests_stats.scheduled_time_delay.mean_ms
|
|
528
|
-
+ current_aggregator.requests_stats.worker_start_delay.mean_ms
|
|
529
|
-
)
|
|
530
|
-
progress_state.backend_overheads_time_ms = (
|
|
531
|
-
current_aggregator.requests_stats.request_time_delay.mean_ms
|
|
532
|
-
)
|
|
533
|
-
progress_state.requests_sleep_time_ms = (
|
|
534
|
-
current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
|
|
535
|
-
)
|
|
536
|
-
progress_state.requests_targeted_start_time_delay_ms = (
|
|
537
|
-
current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
|
|
538
|
-
)
|
|
539
|
-
|
|
540
|
-
def handle_update_scheduler_complete(
|
|
561
|
+
def update(
|
|
541
562
|
self,
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
):
|
|
545
|
-
if self.active_task is None:
|
|
546
|
-
raise RuntimeError("Active task not set.")
|
|
547
|
-
|
|
548
|
-
if self.active_task != progress_state.task_id:
|
|
549
|
-
raise RuntimeError("Active task does not match current task.")
|
|
550
|
-
|
|
551
|
-
progress_state.in_warmup = False
|
|
552
|
-
progress_state.in_cooldown = False
|
|
553
|
-
progress_state.compiling = True
|
|
554
|
-
|
|
555
|
-
def handle_update_benchmark_compiled(
|
|
556
|
-
self, progress_state: BTPS, result: BenchmarkerResult
|
|
563
|
+
estimated_state: EstimatedBenchmarkState,
|
|
564
|
+
scheduler_state: SchedulerState,
|
|
557
565
|
):
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
566
|
+
self.progress = (
|
|
567
|
+
(1.0 - scheduler_state.remaining_fraction)
|
|
568
|
+
if scheduler_state.remaining_fraction is not None
|
|
569
|
+
else 0.0
|
|
570
|
+
)
|
|
571
|
+
self._update_processing_states(
|
|
572
|
+
benchmark_status=estimated_state.get_metric(
|
|
573
|
+
group=EstimatedBenchmarkState.benchmark_state_group,
|
|
574
|
+
key="status",
|
|
575
|
+
default=None,
|
|
576
|
+
),
|
|
577
|
+
start_time=scheduler_state.start_time,
|
|
578
|
+
successful_requests=scheduler_state.successful_requests,
|
|
579
|
+
cancelled_requests=scheduler_state.cancelled_requests,
|
|
580
|
+
errored_requests=scheduler_state.errored_requests,
|
|
581
|
+
)
|
|
582
|
+
self._update_request_stats(
|
|
583
|
+
request_concurrency=estimated_state.get_metric(
|
|
584
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
585
|
+
key="concurrency_requests",
|
|
586
|
+
),
|
|
587
|
+
requests_per_second=estimated_state.get_metric(
|
|
588
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
589
|
+
key="completed_requests_per_second",
|
|
590
|
+
),
|
|
591
|
+
request_latency=estimated_state.get_metric(
|
|
592
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
593
|
+
key="completed_request_latency",
|
|
594
|
+
),
|
|
572
595
|
)
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
596
|
+
self._update_token_stats(
|
|
597
|
+
output_tokens=estimated_state.get_metric(
|
|
598
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
599
|
+
key="completed_output_tokens_total",
|
|
600
|
+
),
|
|
601
|
+
output_tokens_rate=estimated_state.get_metric(
|
|
602
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
603
|
+
key="completed_output_tokens",
|
|
604
|
+
),
|
|
605
|
+
prompt_tokens=estimated_state.get_metric(
|
|
606
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
607
|
+
key="completed_input_tokens_total",
|
|
608
|
+
),
|
|
609
|
+
total_tokens_rate=estimated_state.get_metric(
|
|
610
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
611
|
+
key="completed_total_tokens",
|
|
612
|
+
),
|
|
613
|
+
time_to_first_token=estimated_state.get_metric(
|
|
614
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
615
|
+
key="completed_time_to_first_token",
|
|
616
|
+
),
|
|
617
|
+
inter_token_latency=estimated_state.get_metric(
|
|
618
|
+
group=EstimatedBenchmarkState.benchmark_metrics_group,
|
|
619
|
+
key="completed_inter_token_latency",
|
|
620
|
+
),
|
|
584
621
|
)
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
def create_task_progress_columns(self) -> list[ProgressColumn]:
|
|
592
|
-
columns = [
|
|
593
|
-
TextColumn("[{task.fields[start_time]}]"),
|
|
594
|
-
SpinnerColumn(style=Colors.PROGRESS),
|
|
595
|
-
TaskProgressColumn(style=Colors.PROGRESS),
|
|
596
|
-
TextColumn("{task.description}"),
|
|
597
|
-
TextColumn("({task.fields[progress_status]})"),
|
|
598
|
-
TextColumn(" "),
|
|
599
|
-
]
|
|
600
|
-
|
|
601
|
-
if not self.display_scheduler_stats:
|
|
602
|
-
columns += [
|
|
603
|
-
TextColumn("{task.fields[requests_summary]}\n"),
|
|
604
|
-
]
|
|
605
|
-
else:
|
|
606
|
-
columns += [
|
|
607
|
-
TextColumn(
|
|
608
|
-
"{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
|
|
622
|
+
if estimated_state.get("updated_scheduler_stats"):
|
|
623
|
+
self._update_system_stats(
|
|
624
|
+
request_targeted_start_delay=estimated_state.get_metric(
|
|
625
|
+
group=EstimatedBenchmarkState.scheduler_state_group,
|
|
626
|
+
key="request_targeted_start_delay",
|
|
609
627
|
),
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
628
|
+
queued_time=estimated_state.get_metric(
|
|
629
|
+
group=EstimatedBenchmarkState.scheduler_state_group,
|
|
630
|
+
key="queued_time",
|
|
631
|
+
),
|
|
632
|
+
scheduler_overheads_time=0.0, # Need to add up metrics here
|
|
633
|
+
)
|
|
613
634
|
|
|
614
|
-
def
|
|
615
|
-
self
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
635
|
+
def complete(self, benchmark: GenerativeBenchmark):
|
|
636
|
+
self._update_processing_states(
|
|
637
|
+
benchmark_status="completed",
|
|
638
|
+
start_time=benchmark.start_time,
|
|
639
|
+
successful_requests=benchmark.request_totals.successful,
|
|
640
|
+
cancelled_requests=benchmark.request_totals.incomplete,
|
|
641
|
+
errored_requests=benchmark.request_totals.errored,
|
|
642
|
+
)
|
|
643
|
+
self._update_request_stats(
|
|
644
|
+
request_concurrency=benchmark.metrics.request_concurrency.successful.mean,
|
|
645
|
+
requests_per_second=benchmark.metrics.requests_per_second.successful.mean,
|
|
646
|
+
request_latency=benchmark.metrics.request_latency.successful.mean,
|
|
647
|
+
)
|
|
648
|
+
self._update_token_stats(
|
|
649
|
+
output_tokens=benchmark.metrics.output_token_count.successful.mean,
|
|
650
|
+
output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean,
|
|
651
|
+
prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean,
|
|
652
|
+
total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean,
|
|
653
|
+
time_to_first_token=(
|
|
654
|
+
benchmark.metrics.time_to_first_token_ms.successful.mean
|
|
655
|
+
),
|
|
656
|
+
inter_token_latency=(
|
|
657
|
+
benchmark.metrics.inter_token_latency_ms.successful.mean
|
|
658
|
+
),
|
|
659
|
+
converted=True,
|
|
625
660
|
)
|
|
626
661
|
|
|
627
|
-
|
|
628
|
-
class GenerativeTextBenchmarkerProgressDisplay(
|
|
629
|
-
BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState]
|
|
630
|
-
):
|
|
631
|
-
def handle_update_scheduler_update(
|
|
662
|
+
def _update_processing_states(
|
|
632
663
|
self,
|
|
633
|
-
|
|
634
|
-
|
|
664
|
+
benchmark_status: Literal[
|
|
665
|
+
"pending", "in_warmup", "in_progress", "in_cooldown", "completed"
|
|
666
|
+
],
|
|
667
|
+
start_time: float | None = None,
|
|
668
|
+
successful_requests: int | None = None,
|
|
669
|
+
cancelled_requests: int | None = None,
|
|
670
|
+
errored_requests: int | None = None,
|
|
635
671
|
):
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
current_aggregator.requests_stats.total_tokens.rate
|
|
649
|
-
)
|
|
650
|
-
progress_state.tokens_ttft = (
|
|
651
|
-
current_aggregator.requests_stats.time_to_first_token.mean_ms
|
|
652
|
-
)
|
|
653
|
-
progress_state.tokens_itl = (
|
|
654
|
-
current_aggregator.requests_stats.inter_token_latency.mean_ms
|
|
655
|
-
)
|
|
656
|
-
|
|
657
|
-
def handle_update_benchmark_compiled(
|
|
672
|
+
if benchmark_status is not None:
|
|
673
|
+
self.benchmark_status = benchmark_status
|
|
674
|
+
if start_time is not None:
|
|
675
|
+
self.start_time = start_time
|
|
676
|
+
if successful_requests is not None:
|
|
677
|
+
self.successful_requests = successful_requests
|
|
678
|
+
if cancelled_requests is not None:
|
|
679
|
+
self.cancelled_requests = cancelled_requests
|
|
680
|
+
if errored_requests is not None:
|
|
681
|
+
self.errored_requests = errored_requests
|
|
682
|
+
|
|
683
|
+
def _update_request_stats(
|
|
658
684
|
self,
|
|
659
|
-
|
|
660
|
-
|
|
685
|
+
request_concurrency: int | None = None,
|
|
686
|
+
requests_per_second: float | None = None,
|
|
687
|
+
request_latency: float | None = None,
|
|
661
688
|
):
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
progress_state.requests_incomplete = current_benchmark.request_totals.incomplete
|
|
671
|
-
progress_state.output_tokens = (
|
|
672
|
-
current_benchmark.metrics.output_token_count.successful.mean
|
|
673
|
-
)
|
|
674
|
-
progress_state.prompt_tokens = (
|
|
675
|
-
current_benchmark.metrics.prompt_token_count.successful.mean
|
|
676
|
-
)
|
|
677
|
-
progress_state.output_tokens_rate = (
|
|
678
|
-
current_benchmark.metrics.output_tokens_per_second.successful.mean
|
|
679
|
-
)
|
|
680
|
-
progress_state.total_tokens_rate = (
|
|
681
|
-
current_benchmark.metrics.tokens_per_second.successful.mean
|
|
682
|
-
)
|
|
683
|
-
progress_state.tokens_ttft = (
|
|
684
|
-
current_benchmark.metrics.time_to_first_token_ms.successful.mean
|
|
685
|
-
)
|
|
686
|
-
progress_state.tokens_itl = (
|
|
687
|
-
current_benchmark.metrics.inter_token_latency_ms.successful.mean
|
|
688
|
-
)
|
|
689
|
-
|
|
690
|
-
def create_task_progress_state(
|
|
689
|
+
if request_concurrency is not None:
|
|
690
|
+
self.request_concurrency = request_concurrency
|
|
691
|
+
if requests_per_second is not None:
|
|
692
|
+
self.requests_per_second = requests_per_second
|
|
693
|
+
if request_latency is not None:
|
|
694
|
+
self.request_latency = request_latency
|
|
695
|
+
|
|
696
|
+
def _update_token_stats(
|
|
691
697
|
self,
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
"{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
|
|
717
|
-
),
|
|
718
|
-
]
|
|
698
|
+
output_tokens: int | None = None,
|
|
699
|
+
output_tokens_rate: float | None = None,
|
|
700
|
+
prompt_tokens: int | None = None,
|
|
701
|
+
total_tokens_rate: float | None = None,
|
|
702
|
+
time_to_first_token: float | None = None,
|
|
703
|
+
inter_token_latency: float | None = None,
|
|
704
|
+
converted: bool = False,
|
|
705
|
+
):
|
|
706
|
+
if output_tokens is not None:
|
|
707
|
+
self.output_tokens = output_tokens
|
|
708
|
+
if output_tokens_rate is not None:
|
|
709
|
+
self.output_tokens_rate = output_tokens_rate
|
|
710
|
+
if prompt_tokens is not None:
|
|
711
|
+
self.prompt_tokens = prompt_tokens
|
|
712
|
+
if total_tokens_rate is not None:
|
|
713
|
+
self.total_tokens_rate = total_tokens_rate
|
|
714
|
+
if time_to_first_token is not None:
|
|
715
|
+
self.time_to_first_token = time_to_first_token * (
|
|
716
|
+
1000 if not converted else 1
|
|
717
|
+
)
|
|
718
|
+
if inter_token_latency is not None:
|
|
719
|
+
self.inter_token_latency = inter_token_latency * (
|
|
720
|
+
1000 if not converted else 1
|
|
721
|
+
)
|
|
719
722
|
|
|
720
|
-
|
|
723
|
+
def _update_system_stats(
|
|
724
|
+
self,
|
|
725
|
+
request_targeted_start_delay: float | None = None,
|
|
726
|
+
queued_time: float | None = None,
|
|
727
|
+
scheduler_overheads_time: float | None = None,
|
|
728
|
+
converted: bool = False,
|
|
729
|
+
):
|
|
730
|
+
if request_targeted_start_delay is not None:
|
|
731
|
+
self.request_targeted_start_delay = request_targeted_start_delay * (
|
|
732
|
+
1000 if not converted else 1
|
|
733
|
+
)
|
|
734
|
+
if queued_time is not None:
|
|
735
|
+
self.queued_time = queued_time * (1000 if not converted else 1)
|
|
736
|
+
if scheduler_overheads_time is not None:
|
|
737
|
+
self.scheduler_overheads_time = scheduler_overheads_time * (
|
|
738
|
+
1000 if not converted else 1
|
|
739
|
+
)
|