guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
guidellm/benchmark/progress.py
CHANGED
|
@@ -1,8 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
"""
|
|
2
|
+
Progress tracking and console display for benchmark execution monitoring.
|
|
3
|
+
|
|
4
|
+
Provides abstract interfaces and concrete implementations for tracking benchmark
|
|
5
|
+
progress during execution. The module enables real-time display of benchmark
|
|
6
|
+
statistics, metrics, and execution state through console-based UI components.
|
|
7
|
+
Primary use cases include monitoring generative benchmark runs with detailed
|
|
8
|
+
request/token statistics and scheduler state updates.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
3
14
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
5
|
-
from typing import Generic, Optional, TypeVar, Union
|
|
15
|
+
from typing import Any, Generic, Literal
|
|
6
16
|
|
|
7
17
|
from rich.console import Group
|
|
8
18
|
from rich.live import Live
|
|
@@ -10,7 +20,6 @@ from rich.panel import Panel
|
|
|
10
20
|
from rich.progress import (
|
|
11
21
|
BarColumn,
|
|
12
22
|
Progress,
|
|
13
|
-
ProgressColumn,
|
|
14
23
|
SpinnerColumn,
|
|
15
24
|
TaskID,
|
|
16
25
|
TaskProgressColumn,
|
|
@@ -19,145 +28,405 @@ from rich.progress import (
|
|
|
19
28
|
TimeRemainingColumn,
|
|
20
29
|
)
|
|
21
30
|
|
|
22
|
-
from guidellm.benchmark.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
from guidellm.scheduler import (
|
|
29
|
-
SchedulingStrategy,
|
|
30
|
-
StrategyType,
|
|
31
|
-
strategy_display_str,
|
|
31
|
+
from guidellm.benchmark.profiles import Profile
|
|
32
|
+
from guidellm.benchmark.schemas import (
|
|
33
|
+
BenchmarkAccumulatorT,
|
|
34
|
+
BenchmarkT,
|
|
35
|
+
GenerativeBenchmark,
|
|
36
|
+
GenerativeBenchmarkAccumulator,
|
|
32
37
|
)
|
|
33
|
-
from guidellm.
|
|
38
|
+
from guidellm.scheduler import SchedulerState, SchedulingStrategy
|
|
39
|
+
from guidellm.utils import Colors, format_value_display, safe_format_timestamp
|
|
34
40
|
|
|
35
|
-
__all__ = [
|
|
36
|
-
"BenchmarkerProgressDisplay",
|
|
37
|
-
"BenchmarkerTaskProgressState",
|
|
38
|
-
"GenerativeTextBenchmarkerProgressDisplay",
|
|
39
|
-
"GenerativeTextBenchmarkerTaskProgressState",
|
|
40
|
-
]
|
|
41
|
+
__all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
44
|
+
class BenchmarkerProgress(Generic[BenchmarkAccumulatorT, BenchmarkT], ABC):
|
|
45
|
+
"""
|
|
46
|
+
Abstract interface for tracking and displaying benchmark execution progress.
|
|
47
|
+
|
|
48
|
+
Provides lifecycle hooks for monitoring benchmark stages including initialization,
|
|
49
|
+
execution start, progress updates, completion, and finalization. Implementations
|
|
50
|
+
handle display updates, progress tracking, and resource management for benchmark
|
|
51
|
+
monitoring.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
"""Initialize progress tracker with default state."""
|
|
56
|
+
self.profile: Profile | None = None
|
|
57
|
+
self.current_strategy: SchedulingStrategy | None = None
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
async def on_initialize(self, profile: Profile):
|
|
61
|
+
"""
|
|
62
|
+
Initialize progress tracking for the given benchmark profile.
|
|
63
|
+
|
|
64
|
+
:param profile: Benchmark profile configuration defining execution parameters
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
async def on_benchmark_start(self, strategy: SchedulingStrategy):
|
|
69
|
+
"""
|
|
70
|
+
Handle benchmark strategy execution start event.
|
|
71
|
+
|
|
72
|
+
:param strategy: Scheduling strategy configuration being executed
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
async def on_benchmark_update(
|
|
77
|
+
self, accumulator: BenchmarkAccumulatorT, scheduler_state: SchedulerState
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Handle benchmark execution progress update with current metrics.
|
|
81
|
+
|
|
82
|
+
:param accumulator: Current accumulated benchmark metrics and statistics
|
|
83
|
+
:param scheduler_state: Current scheduler execution state and counters
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
async def on_benchmark_complete(self, benchmark: BenchmarkT):
|
|
88
|
+
"""
|
|
89
|
+
Handle benchmark strategy execution completion event.
|
|
90
|
+
|
|
91
|
+
:param benchmark: Completed benchmark results with final metrics
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
async def on_finalize(self):
|
|
96
|
+
"""Finalize progress tracking and release associated resources."""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class GenerativeConsoleBenchmarkerProgress(
|
|
100
|
+
BenchmarkerProgress[GenerativeBenchmarkAccumulator, GenerativeBenchmark], Live
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Console-based real-time progress display for generative benchmarks.
|
|
104
|
+
|
|
105
|
+
Renders live benchmark execution statistics using Rich library components with
|
|
106
|
+
structured progress bars, timing information, request/token metrics, and optional
|
|
107
|
+
scheduler statistics. Updates refresh automatically during benchmark execution.
|
|
108
|
+
|
|
109
|
+
:cvar display_scheduler_stats: Whether to include scheduler statistics in display
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, display_scheduler_stats: bool = False):
|
|
113
|
+
"""
|
|
114
|
+
Initialize console progress display with rendering configuration.
|
|
115
|
+
|
|
116
|
+
:param display_scheduler_stats: Whether to display scheduler timing statistics
|
|
117
|
+
"""
|
|
118
|
+
super().__init__()
|
|
119
|
+
Live.__init__(
|
|
120
|
+
self,
|
|
121
|
+
refresh_per_second=4,
|
|
122
|
+
auto_refresh=True,
|
|
123
|
+
redirect_stdout=True,
|
|
124
|
+
redirect_stderr=True,
|
|
125
|
+
)
|
|
126
|
+
self.display_scheduler_stats: bool = display_scheduler_stats
|
|
127
|
+
self.run_progress: Progress | None = None
|
|
128
|
+
self.run_progress_task: TaskID | None = None
|
|
129
|
+
self.tasks_progress: _GenerativeProgressTasks | None = None
|
|
130
|
+
|
|
131
|
+
async def on_initialize(self, profile: Profile):
|
|
132
|
+
"""
|
|
133
|
+
Initialize console display components and begin live rendering.
|
|
134
|
+
|
|
135
|
+
:param profile: Benchmark profile configuration defining execution parameters
|
|
136
|
+
"""
|
|
137
|
+
self.tasks_progress = _GenerativeProgressTasks(
|
|
138
|
+
profile=profile, display_scheduler_stats=self.display_scheduler_stats
|
|
139
|
+
)
|
|
140
|
+
self.run_progress = Progress(
|
|
141
|
+
TextColumn("Generating...", style=f"italic {Colors.progress}"),
|
|
142
|
+
BarColumn(
|
|
143
|
+
bar_width=None,
|
|
144
|
+
complete_style=Colors.progress,
|
|
145
|
+
finished_style=Colors.success,
|
|
146
|
+
),
|
|
147
|
+
TextColumn(
|
|
148
|
+
"({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
|
|
149
|
+
style=Colors.progress,
|
|
150
|
+
),
|
|
151
|
+
TextColumn("["),
|
|
152
|
+
TimeElapsedColumn(),
|
|
153
|
+
TextColumn("<"),
|
|
154
|
+
TimeRemainingColumn(),
|
|
155
|
+
TextColumn("]"),
|
|
156
|
+
)
|
|
157
|
+
self.run_progress_task = self.run_progress.add_task("")
|
|
158
|
+
self._sync_run_progress()
|
|
159
|
+
self.update(
|
|
160
|
+
Group(
|
|
161
|
+
Panel(
|
|
162
|
+
self.tasks_progress,
|
|
163
|
+
title="Benchmarks",
|
|
164
|
+
title_align="left",
|
|
165
|
+
expand=True,
|
|
166
|
+
),
|
|
167
|
+
self.run_progress,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
self.start()
|
|
171
|
+
|
|
172
|
+
async def on_benchmark_start(self, strategy: SchedulingStrategy):
|
|
173
|
+
"""
|
|
174
|
+
Update display for benchmark strategy execution start.
|
|
175
|
+
|
|
176
|
+
:param strategy: Scheduling strategy configuration being executed
|
|
177
|
+
"""
|
|
178
|
+
if self.tasks_progress is not None:
|
|
179
|
+
self.tasks_progress.start_benchmark(strategy)
|
|
180
|
+
self._sync_run_progress()
|
|
181
|
+
|
|
182
|
+
async def on_benchmark_update(
|
|
183
|
+
self,
|
|
184
|
+
accumulator: GenerativeBenchmarkAccumulator,
|
|
185
|
+
scheduler_state: SchedulerState,
|
|
186
|
+
):
|
|
187
|
+
"""
|
|
188
|
+
Update display with current benchmark progress and metrics.
|
|
189
|
+
|
|
190
|
+
:param accumulator: Current accumulated benchmark metrics and statistics
|
|
191
|
+
:param scheduler_state: Current scheduler execution state and counters
|
|
192
|
+
"""
|
|
193
|
+
if self.tasks_progress is not None:
|
|
194
|
+
self.tasks_progress.update_benchmark(accumulator, scheduler_state)
|
|
195
|
+
self._sync_run_progress()
|
|
196
|
+
|
|
197
|
+
async def on_benchmark_complete(self, benchmark: GenerativeBenchmark):
|
|
198
|
+
"""
|
|
199
|
+
Update display for completed benchmark strategy.
|
|
200
|
+
|
|
201
|
+
:param benchmark: Completed benchmark results with final metrics
|
|
202
|
+
"""
|
|
203
|
+
if self.tasks_progress is not None:
|
|
204
|
+
self.tasks_progress.complete_benchmark(benchmark)
|
|
205
|
+
self._sync_run_progress()
|
|
206
|
+
|
|
207
|
+
async def on_finalize(self):
|
|
208
|
+
"""Stop display rendering and release resources."""
|
|
209
|
+
if self.tasks_progress is not None:
|
|
210
|
+
self.tasks_progress.finalize()
|
|
211
|
+
self._sync_run_progress()
|
|
212
|
+
if self.run_progress is not None and self.run_progress_task is not None:
|
|
213
|
+
self.run_progress.stop_task(self.run_progress_task)
|
|
214
|
+
self.stop()
|
|
215
|
+
self.run_progress = None
|
|
216
|
+
self.run_progress_task = None
|
|
217
|
+
self.tasks_progress = None
|
|
218
|
+
|
|
219
|
+
def _sync_run_progress(self):
|
|
220
|
+
"""Synchronize overall progress display with task progress."""
|
|
221
|
+
if (
|
|
222
|
+
self.run_progress is not None
|
|
223
|
+
and self.run_progress_task is not None
|
|
224
|
+
and self.tasks_progress is not None
|
|
225
|
+
):
|
|
226
|
+
self.run_progress.update(
|
|
227
|
+
self.run_progress_task,
|
|
228
|
+
total=self.tasks_progress.steps_total,
|
|
229
|
+
completed=self.tasks_progress.steps_progress,
|
|
230
|
+
completed_benchmarks=self.tasks_progress.tasks_progress,
|
|
231
|
+
total_benchmarks=self.tasks_progress.tasks_total,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# Scaling factor for progress calculations to provide granular progress updates
|
|
236
|
+
_PROGRESS_SCALE = 1000
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class _GenerativeProgressTasks(Progress):
|
|
240
|
+
def __init__(self, profile: Profile, display_scheduler_stats: bool):
|
|
241
|
+
self.profile: Profile = profile
|
|
242
|
+
self.display_scheduler_stats: bool = display_scheduler_stats
|
|
243
|
+
self.benchmark_task_states: list[_GenerativeProgressTaskState] = []
|
|
244
|
+
self.current_index: int = -1
|
|
245
|
+
|
|
246
|
+
summary_text = "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}"
|
|
247
|
+
if self.display_scheduler_stats:
|
|
248
|
+
summary_text += "\n{task.fields[scheduler_stats]}"
|
|
249
|
+
super().__init__(
|
|
250
|
+
TextColumn("[{task.fields[start_time]}]"),
|
|
251
|
+
SpinnerColumn(style=Colors.progress),
|
|
252
|
+
TaskProgressColumn(style=Colors.progress),
|
|
253
|
+
TextColumn("{task.description}"),
|
|
254
|
+
TextColumn("({task.fields[progress_status]})"),
|
|
255
|
+
TextColumn(" "),
|
|
256
|
+
TextColumn(summary_text),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
for strategy_type in profile.strategy_types:
|
|
260
|
+
task_state = _GenerativeProgressTaskState(
|
|
261
|
+
strategy_type=strategy_type,
|
|
262
|
+
)
|
|
263
|
+
task_id = self.add_task(**task_state.current)
|
|
264
|
+
task_state.task_id = task_id
|
|
265
|
+
self.benchmark_task_states.append(task_state)
|
|
70
266
|
|
|
71
267
|
@property
|
|
72
|
-
def
|
|
73
|
-
return
|
|
268
|
+
def tasks_total(self) -> int:
|
|
269
|
+
return len(self.benchmark_task_states)
|
|
74
270
|
|
|
75
271
|
@property
|
|
76
|
-
def
|
|
77
|
-
|
|
78
|
-
return None
|
|
272
|
+
def tasks_progress(self) -> int:
|
|
273
|
+
return self.current_index + 1
|
|
79
274
|
|
|
80
|
-
|
|
275
|
+
@property
|
|
276
|
+
def steps_total(self) -> int:
|
|
277
|
+
return _PROGRESS_SCALE * len(self.benchmark_task_states)
|
|
81
278
|
|
|
82
279
|
@property
|
|
83
|
-
def
|
|
84
|
-
|
|
85
|
-
|
|
280
|
+
def steps_progress(self) -> int:
|
|
281
|
+
progress_current_task = (
|
|
282
|
+
self.benchmark_task_states[self.current_index].progress
|
|
283
|
+
if self.current_index < len(self.benchmark_task_states)
|
|
284
|
+
else 0
|
|
285
|
+
)
|
|
286
|
+
progress_total = self.current_index + (progress_current_task or 0)
|
|
287
|
+
|
|
288
|
+
return int(progress_total * _PROGRESS_SCALE)
|
|
289
|
+
|
|
290
|
+
def start_benchmark(self, strategy: SchedulingStrategy):
|
|
291
|
+
self.current_index += 1
|
|
292
|
+
if self.current_index >= len(self.benchmark_task_states):
|
|
293
|
+
# New task past initially estimated, append it to the end
|
|
294
|
+
task_state = _GenerativeProgressTaskState(strategy_type=strategy.type_)
|
|
295
|
+
task_id = self.add_task(**task_state.current)
|
|
296
|
+
task_state.task_id = task_id
|
|
297
|
+
self.benchmark_task_states.append(task_state)
|
|
298
|
+
|
|
299
|
+
current_state = self.benchmark_task_states[self.current_index]
|
|
300
|
+
current_state.start(strategy)
|
|
301
|
+
if current_state.task_id is not None:
|
|
302
|
+
self.update(
|
|
303
|
+
current_state.task_id,
|
|
304
|
+
start=True,
|
|
305
|
+
**current_state.current,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
def update_benchmark(
|
|
309
|
+
self,
|
|
310
|
+
accumulator: GenerativeBenchmarkAccumulator,
|
|
311
|
+
scheduler_state: SchedulerState,
|
|
312
|
+
):
|
|
313
|
+
current_state = self.benchmark_task_states[self.current_index]
|
|
314
|
+
current_state.update(accumulator, scheduler_state)
|
|
315
|
+
if current_state.task_id is not None:
|
|
316
|
+
self.update(
|
|
317
|
+
current_state.task_id,
|
|
318
|
+
**current_state.current,
|
|
319
|
+
)
|
|
86
320
|
|
|
87
|
-
|
|
88
|
-
|
|
321
|
+
def complete_benchmark(self, benchmark: GenerativeBenchmark):
|
|
322
|
+
current_state = self.benchmark_task_states[self.current_index]
|
|
323
|
+
current_state.complete(benchmark)
|
|
324
|
+
if current_state.task_id is not None:
|
|
325
|
+
self.update(
|
|
326
|
+
current_state.task_id,
|
|
327
|
+
**current_state.current,
|
|
328
|
+
)
|
|
89
329
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
number / float(self.max_number) * 1000 if self.max_number else -math.inf
|
|
93
|
-
)
|
|
94
|
-
duration_percent = (
|
|
95
|
-
(time.time() - self.start_time) / self.max_duration * 1000
|
|
96
|
-
if self.max_duration and self.start_time
|
|
97
|
-
else -math.inf
|
|
98
|
-
)
|
|
330
|
+
def finalize(self):
|
|
331
|
+
self.stop()
|
|
99
332
|
|
|
100
|
-
|
|
333
|
+
|
|
334
|
+
@dataclass
|
|
335
|
+
class _GenerativeProgressTaskState:
|
|
336
|
+
strategy_type: str
|
|
337
|
+
task_id: TaskID | None = None
|
|
338
|
+
strategy: SchedulingStrategy | None = None
|
|
339
|
+
benchmark_status: Literal[
|
|
340
|
+
"pending", "warmup", "active", "cooldown", "completed"
|
|
341
|
+
] = "pending"
|
|
342
|
+
progress: float | None = None
|
|
343
|
+
start_time: float = -1.0
|
|
344
|
+
successful_requests: int = 0
|
|
345
|
+
cancelled_requests: int = 0
|
|
346
|
+
errored_requests: int = 0
|
|
347
|
+
request_concurrency: float = 0.0
|
|
348
|
+
requests_per_second: float = 0.0
|
|
349
|
+
request_latency: float = 0.0
|
|
350
|
+
output_tokens: float = 0
|
|
351
|
+
output_tokens_rate: float = 0.0
|
|
352
|
+
prompt_tokens: float = 0
|
|
353
|
+
total_tokens_rate: float = 0.0
|
|
354
|
+
time_to_first_token: float = 0.0
|
|
355
|
+
inter_token_latency: float = 0.0
|
|
356
|
+
queued_time: float = 0.0
|
|
357
|
+
request_targeted_start_delay: float = 0.0
|
|
358
|
+
scheduler_overheads_time: float = 0.0
|
|
101
359
|
|
|
102
360
|
@property
|
|
103
|
-
def
|
|
104
|
-
|
|
361
|
+
def current(self) -> dict[str, Any]:
|
|
362
|
+
return {
|
|
105
363
|
"start_time": self.formatted_start_time,
|
|
364
|
+
"description": str(self.strategy or self.strategy_type),
|
|
106
365
|
"progress_status": self.formatted_progress_status,
|
|
107
366
|
"requests_summary": self.formatted_requests_summary,
|
|
367
|
+
"tokens_summary": self.formatted_tokens_summary,
|
|
368
|
+
"scheduler_stats": self.formatted_scheduler_stats,
|
|
369
|
+
"completed": self.completed,
|
|
370
|
+
"total": self.total,
|
|
108
371
|
}
|
|
109
372
|
|
|
110
|
-
|
|
111
|
-
|
|
373
|
+
@property
|
|
374
|
+
def completed(self) -> float:
|
|
375
|
+
if self.benchmark_status == "pending":
|
|
376
|
+
return 0.0
|
|
112
377
|
|
|
113
|
-
|
|
378
|
+
if self.benchmark_status == "completed":
|
|
379
|
+
return float(_PROGRESS_SCALE)
|
|
380
|
+
|
|
381
|
+
return self.progress * _PROGRESS_SCALE if self.progress is not None else 0.0
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def total(self) -> float:
|
|
385
|
+
return _PROGRESS_SCALE
|
|
114
386
|
|
|
115
387
|
@property
|
|
116
388
|
def formatted_start_time(self) -> str:
|
|
117
|
-
if self.start_time
|
|
389
|
+
if self.start_time < 0.0:
|
|
118
390
|
return "--:--:--"
|
|
119
391
|
|
|
120
|
-
return
|
|
392
|
+
return safe_format_timestamp(self.start_time, format_="%H:%M:%S")
|
|
121
393
|
|
|
122
394
|
@property
|
|
123
395
|
def formatted_progress_status(self) -> str:
|
|
124
|
-
if self.
|
|
125
|
-
status = "complete"
|
|
126
|
-
color = Colors.SUCCESS
|
|
127
|
-
elif self.compiling:
|
|
128
|
-
status = "compiling"
|
|
129
|
-
color = Colors.PROGRESS
|
|
130
|
-
elif self.started and self.in_warmup:
|
|
396
|
+
if self.benchmark_status == "warmup":
|
|
131
397
|
status = "warmup"
|
|
132
|
-
color = Colors.
|
|
133
|
-
elif self.
|
|
134
|
-
status = "cooldown"
|
|
135
|
-
color = Colors.PROGRESS
|
|
136
|
-
elif self.started:
|
|
398
|
+
color = Colors.progress
|
|
399
|
+
elif self.benchmark_status == "active":
|
|
137
400
|
status = "running"
|
|
138
|
-
color = Colors.
|
|
401
|
+
color = Colors.progress
|
|
402
|
+
elif self.benchmark_status == "cooldown":
|
|
403
|
+
status = "cooldown"
|
|
404
|
+
color = Colors.progress
|
|
405
|
+
elif self.benchmark_status == "completed":
|
|
406
|
+
status = "complete"
|
|
407
|
+
color = Colors.success
|
|
139
408
|
else:
|
|
140
409
|
status = "pending"
|
|
141
|
-
color = Colors.
|
|
410
|
+
color = Colors.info
|
|
142
411
|
|
|
143
412
|
return f"[{color}]{status.ljust(8)}[/{color}]"
|
|
144
413
|
|
|
145
414
|
@property
|
|
146
415
|
def formatted_requests_summary(self) -> str:
|
|
147
|
-
if
|
|
416
|
+
if self.benchmark_status == "pending":
|
|
148
417
|
return " "
|
|
149
418
|
|
|
150
419
|
return (
|
|
151
|
-
f"[{Colors.
|
|
152
|
-
+
|
|
153
|
-
value=self.
|
|
420
|
+
f"[{Colors.info}]Req:[/{Colors.info}] "
|
|
421
|
+
+ format_value_display(
|
|
422
|
+
value=self.requests_per_second,
|
|
154
423
|
label="req/s",
|
|
155
424
|
total_characters=12,
|
|
156
425
|
digits_places=4,
|
|
157
426
|
decimal_places=1,
|
|
158
427
|
)
|
|
159
428
|
+ ", "
|
|
160
|
-
+
|
|
429
|
+
+ format_value_display(
|
|
161
430
|
value=self.request_latency,
|
|
162
431
|
label="Lat",
|
|
163
432
|
units="s",
|
|
@@ -166,32 +435,32 @@ class BenchmarkerTaskProgressState:
|
|
|
166
435
|
decimal_places=2,
|
|
167
436
|
)
|
|
168
437
|
+ ", "
|
|
169
|
-
+
|
|
170
|
-
value=self.
|
|
438
|
+
+ format_value_display(
|
|
439
|
+
value=self.request_concurrency,
|
|
171
440
|
label="Conc",
|
|
172
441
|
total_characters=12,
|
|
173
442
|
digits_places=4,
|
|
174
443
|
decimal_places=1,
|
|
175
444
|
)
|
|
176
445
|
+ ", "
|
|
177
|
-
+
|
|
178
|
-
value=self.
|
|
446
|
+
+ format_value_display(
|
|
447
|
+
value=self.successful_requests,
|
|
179
448
|
label="Comp",
|
|
180
449
|
total_characters=12,
|
|
181
450
|
digits_places=5,
|
|
182
451
|
decimal_places=0,
|
|
183
452
|
)
|
|
184
453
|
+ ", "
|
|
185
|
-
+
|
|
186
|
-
value=self.
|
|
454
|
+
+ format_value_display(
|
|
455
|
+
value=self.cancelled_requests,
|
|
187
456
|
label="Inc",
|
|
188
457
|
total_characters=12,
|
|
189
458
|
digits_places=5,
|
|
190
459
|
decimal_places=0,
|
|
191
460
|
)
|
|
192
461
|
+ ", "
|
|
193
|
-
+
|
|
194
|
-
value=self.
|
|
462
|
+
+ format_value_display(
|
|
463
|
+
value=self.errored_requests,
|
|
195
464
|
label="Err",
|
|
196
465
|
total_characters=12,
|
|
197
466
|
digits_places=5,
|
|
@@ -199,101 +468,14 @@ class BenchmarkerTaskProgressState:
|
|
|
199
468
|
)
|
|
200
469
|
)
|
|
201
470
|
|
|
202
|
-
@property
|
|
203
|
-
def formatted_scheduler_stats(self) -> str:
|
|
204
|
-
if not self.started:
|
|
205
|
-
return " "
|
|
206
|
-
|
|
207
|
-
return (
|
|
208
|
-
f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
|
|
209
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
210
|
-
value=self.worker_overheads_time_ms,
|
|
211
|
-
label="Work OH",
|
|
212
|
-
units="ms",
|
|
213
|
-
total_characters=18,
|
|
214
|
-
digits_places=3,
|
|
215
|
-
decimal_places=1,
|
|
216
|
-
)
|
|
217
|
-
+ ", "
|
|
218
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
219
|
-
value=self.backend_overheads_time_ms,
|
|
220
|
-
label="Back OH",
|
|
221
|
-
units="ms",
|
|
222
|
-
total_characters=18,
|
|
223
|
-
digits_places=3,
|
|
224
|
-
decimal_places=1,
|
|
225
|
-
)
|
|
226
|
-
+ ", "
|
|
227
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
228
|
-
value=self.requests_sleep_time_ms,
|
|
229
|
-
label="Req Sleep",
|
|
230
|
-
units="ms",
|
|
231
|
-
total_characters=18,
|
|
232
|
-
digits_places=5,
|
|
233
|
-
decimal_places=0,
|
|
234
|
-
)
|
|
235
|
-
+ ", "
|
|
236
|
-
+ BenchmarkerTaskProgressState.format_progress_display(
|
|
237
|
-
value=self.requests_targeted_start_time_delay_ms,
|
|
238
|
-
label="Start Del",
|
|
239
|
-
units="ms",
|
|
240
|
-
total_characters=18,
|
|
241
|
-
digits_places=5,
|
|
242
|
-
decimal_places=0,
|
|
243
|
-
)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
@staticmethod
|
|
247
|
-
def format_progress_display(
|
|
248
|
-
value: float,
|
|
249
|
-
label: str,
|
|
250
|
-
units: str = "",
|
|
251
|
-
total_characters: Optional[int] = None,
|
|
252
|
-
digits_places: Optional[int] = None,
|
|
253
|
-
decimal_places: Optional[int] = None,
|
|
254
|
-
) -> str:
|
|
255
|
-
if decimal_places is None and digits_places is None:
|
|
256
|
-
formatted_number = f"{value}:.0f"
|
|
257
|
-
elif digits_places is None:
|
|
258
|
-
formatted_number = f"{value:.{decimal_places}f}"
|
|
259
|
-
elif decimal_places is None:
|
|
260
|
-
formatted_number = f"{value:>{digits_places}f}"
|
|
261
|
-
else:
|
|
262
|
-
formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
|
|
263
|
-
|
|
264
|
-
result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
|
|
265
|
-
|
|
266
|
-
if total_characters is not None:
|
|
267
|
-
total_characters += len(Colors.INFO) * 2 + 5
|
|
268
|
-
|
|
269
|
-
if len(result) < total_characters:
|
|
270
|
-
result = result.rjust(total_characters)
|
|
271
|
-
|
|
272
|
-
return result
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
276
|
-
output_tokens: float = 0
|
|
277
|
-
prompt_tokens: float = 0
|
|
278
|
-
output_tokens_rate: float = 0
|
|
279
|
-
total_tokens_rate: float = 0
|
|
280
|
-
tokens_ttft: float = 0
|
|
281
|
-
tokens_itl: float = 0
|
|
282
|
-
|
|
283
|
-
@property
|
|
284
|
-
def fields(self) -> dict[str, str]:
|
|
285
|
-
fields = super().fields
|
|
286
|
-
fields["tokens_summary"] = self.formatted_tokens_summary
|
|
287
|
-
return fields
|
|
288
|
-
|
|
289
471
|
@property
|
|
290
472
|
def formatted_tokens_summary(self) -> str:
|
|
291
|
-
if
|
|
473
|
+
if self.benchmark_status == "pending":
|
|
292
474
|
return " "
|
|
293
475
|
|
|
294
476
|
return (
|
|
295
|
-
f"[{Colors.
|
|
296
|
-
+
|
|
477
|
+
f"[{Colors.info}]Tok:[/{Colors.info}] "
|
|
478
|
+
+ format_value_display(
|
|
297
479
|
value=self.output_tokens_rate,
|
|
298
480
|
label="gen/s",
|
|
299
481
|
total_characters=12,
|
|
@@ -301,7 +483,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
301
483
|
decimal_places=1,
|
|
302
484
|
)
|
|
303
485
|
+ ", "
|
|
304
|
-
+
|
|
486
|
+
+ format_value_display(
|
|
305
487
|
value=self.total_tokens_rate,
|
|
306
488
|
label="tot/s",
|
|
307
489
|
total_characters=12,
|
|
@@ -309,8 +491,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
309
491
|
decimal_places=1,
|
|
310
492
|
)
|
|
311
493
|
+ ", "
|
|
312
|
-
+
|
|
313
|
-
value=self.
|
|
494
|
+
+ format_value_display(
|
|
495
|
+
value=self.time_to_first_token,
|
|
314
496
|
label="TTFT",
|
|
315
497
|
units="ms",
|
|
316
498
|
total_characters=12,
|
|
@@ -318,8 +500,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
318
500
|
decimal_places=1,
|
|
319
501
|
)
|
|
320
502
|
+ ", "
|
|
321
|
-
+
|
|
322
|
-
value=self.
|
|
503
|
+
+ format_value_display(
|
|
504
|
+
value=self.inter_token_latency,
|
|
323
505
|
label="ITL",
|
|
324
506
|
units="ms",
|
|
325
507
|
total_characters=12,
|
|
@@ -327,7 +509,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
327
509
|
decimal_places=1,
|
|
328
510
|
)
|
|
329
511
|
+ ", "
|
|
330
|
-
+
|
|
512
|
+
+ format_value_display(
|
|
331
513
|
value=self.prompt_tokens,
|
|
332
514
|
label="Prompt",
|
|
333
515
|
total_characters=12,
|
|
@@ -335,7 +517,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
335
517
|
decimal_places=0,
|
|
336
518
|
)
|
|
337
519
|
+ ", "
|
|
338
|
-
+
|
|
520
|
+
+ format_value_display(
|
|
339
521
|
value=self.output_tokens,
|
|
340
522
|
label="Gen",
|
|
341
523
|
total_characters=12,
|
|
@@ -344,377 +526,192 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
|
|
|
344
526
|
)
|
|
345
527
|
)
|
|
346
528
|
|
|
529
|
+
@property
|
|
530
|
+
def formatted_scheduler_stats(self) -> str:
|
|
531
|
+
if self.benchmark_status == "pending":
|
|
532
|
+
return " "
|
|
347
533
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
self.benchmarker_tasks_progress,
|
|
358
|
-
title="Benchmarks",
|
|
359
|
-
title_align="left",
|
|
360
|
-
expand=True,
|
|
361
|
-
)
|
|
362
|
-
self.benchmarker_progress = Progress(
|
|
363
|
-
TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
|
|
364
|
-
BarColumn(
|
|
365
|
-
bar_width=None,
|
|
366
|
-
complete_style=Colors.PROGRESS,
|
|
367
|
-
finished_style=Colors.SUCCESS,
|
|
368
|
-
),
|
|
369
|
-
TextColumn(
|
|
370
|
-
"({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
|
|
371
|
-
style=Colors.PROGRESS,
|
|
372
|
-
),
|
|
373
|
-
TextColumn("["),
|
|
374
|
-
TimeElapsedColumn(),
|
|
375
|
-
TextColumn("<"),
|
|
376
|
-
TimeRemainingColumn(),
|
|
377
|
-
TextColumn("]"),
|
|
378
|
-
)
|
|
379
|
-
self.benchmarker_live = Live(
|
|
380
|
-
Group(
|
|
381
|
-
self.benchmarker_tasks_panel,
|
|
382
|
-
self.benchmarker_progress,
|
|
383
|
-
),
|
|
384
|
-
redirect_stdout=True,
|
|
385
|
-
redirect_stderr=True,
|
|
386
|
-
)
|
|
387
|
-
self.active_task: Optional[TaskID] = None
|
|
388
|
-
self.benchmarker_tasks: list[BTPS] = []
|
|
389
|
-
self.progress_task: Optional[TaskID] = None
|
|
390
|
-
|
|
391
|
-
def update(self, result: BenchmarkerResult):
|
|
392
|
-
if result.type_ == "run_start":
|
|
393
|
-
if self.started:
|
|
394
|
-
raise RuntimeError("Progress display already started.")
|
|
395
|
-
|
|
396
|
-
self.handle_start(result)
|
|
397
|
-
self.started = True
|
|
398
|
-
elif result.type_ == "run_complete":
|
|
399
|
-
if not self.started:
|
|
400
|
-
raise RuntimeError("Progress display not started.")
|
|
401
|
-
|
|
402
|
-
self.handle_end(result)
|
|
403
|
-
self.started = False
|
|
404
|
-
else:
|
|
405
|
-
if not self.started:
|
|
406
|
-
raise RuntimeError("Progress display not started.")
|
|
407
|
-
|
|
408
|
-
self.handle_update(result)
|
|
409
|
-
|
|
410
|
-
def handle_start(self, result: BenchmarkerResult):
|
|
411
|
-
self.benchmarker_live.start()
|
|
412
|
-
|
|
413
|
-
for index, strategy_type in enumerate(result.profile.strategy_types):
|
|
414
|
-
task_id = self.benchmarker_tasks_progress.add_task(
|
|
415
|
-
description=strategy_type,
|
|
416
|
-
start=False,
|
|
417
|
-
total=None,
|
|
418
|
-
completed=0,
|
|
419
|
-
visible=False,
|
|
534
|
+
return (
|
|
535
|
+
f"[{Colors.info}]Sys:[/{Colors.info}] , "
|
|
536
|
+
+ format_value_display(
|
|
537
|
+
value=self.request_targeted_start_delay,
|
|
538
|
+
label="Start Del",
|
|
539
|
+
units="ms",
|
|
540
|
+
total_characters=18,
|
|
541
|
+
digits_places=5,
|
|
542
|
+
decimal_places=0,
|
|
420
543
|
)
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
544
|
+
+ format_value_display(
|
|
545
|
+
value=self.scheduler_overheads_time,
|
|
546
|
+
label="Sched OH",
|
|
547
|
+
units="ms",
|
|
548
|
+
total_characters=18,
|
|
549
|
+
digits_places=3,
|
|
550
|
+
decimal_places=1,
|
|
426
551
|
)
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
552
|
+
+ ", "
|
|
553
|
+
+ format_value_display(
|
|
554
|
+
value=self.queued_time,
|
|
555
|
+
label="Queued",
|
|
556
|
+
units="ms",
|
|
557
|
+
total_characters=18,
|
|
558
|
+
digits_places=5,
|
|
559
|
+
decimal_places=0,
|
|
433
560
|
)
|
|
434
|
-
|
|
435
|
-
self.progress_task = self.benchmarker_progress.add_task(
|
|
436
|
-
"",
|
|
437
|
-
total=len(self.benchmarker_tasks) * 1000,
|
|
438
|
-
completed_benchmarks=0,
|
|
439
|
-
total_benchmarks=len(self.benchmarker_tasks),
|
|
440
561
|
)
|
|
441
562
|
|
|
442
|
-
def
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
if result.type_ == "scheduler_start":
|
|
446
|
-
self.handle_update_scheduler_start(current_state, result)
|
|
447
|
-
self.active_task = current_state.task_id
|
|
448
|
-
elif result.type_ == "scheduler_update":
|
|
449
|
-
self.handle_update_scheduler_update(current_state, result)
|
|
450
|
-
elif result.type_ == "scheduler_complete":
|
|
451
|
-
self.handle_update_scheduler_complete(current_state, result)
|
|
452
|
-
elif result.type_ == "benchmark_compiled":
|
|
453
|
-
self.handle_update_benchmark_compiled(current_state, result)
|
|
454
|
-
else:
|
|
455
|
-
raise ValueError(f"Unknown result type: {result.type_}")
|
|
563
|
+
def start(self, strategy: SchedulingStrategy):
|
|
564
|
+
self.strategy = strategy
|
|
565
|
+
self.strategy_type = strategy.type_
|
|
456
566
|
|
|
457
|
-
|
|
458
|
-
raise RuntimeError("Progress task not set.")
|
|
459
|
-
|
|
460
|
-
self.benchmarker_tasks_progress.update(
|
|
461
|
-
current_state.task_id,
|
|
462
|
-
description=current_state.description,
|
|
463
|
-
completed=current_state.completed,
|
|
464
|
-
total=current_state.total,
|
|
465
|
-
**current_state.fields, # type: ignore[arg-type]
|
|
466
|
-
)
|
|
467
|
-
self.benchmarker_progress.update(
|
|
468
|
-
self.progress_task,
|
|
469
|
-
completed=(result.current_index * 1000) + current_state.completed,
|
|
470
|
-
total=1000 * len(self.benchmarker_tasks),
|
|
471
|
-
completed_benchmarks=(
|
|
472
|
-
result.current_index + (1 if current_state.ended else 0)
|
|
473
|
-
),
|
|
474
|
-
total_benchmarks=len(self.benchmarker_tasks),
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
if current_state.ended:
|
|
478
|
-
self.benchmarker_tasks_progress.stop_task(current_state.task_id)
|
|
479
|
-
self.active_task = None
|
|
480
|
-
|
|
481
|
-
def handle_update_scheduler_start(
|
|
482
|
-
self, progress_state: BTPS, result: BenchmarkerResult
|
|
483
|
-
):
|
|
484
|
-
if self.active_task is not None:
|
|
485
|
-
raise RuntimeError("Active task already set.")
|
|
486
|
-
|
|
487
|
-
progress_state.strategy = result.current_strategy # type: ignore[assignment]
|
|
488
|
-
progress_state.started = True
|
|
489
|
-
current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
490
|
-
progress_state.start_time = (
|
|
491
|
-
current_aggregator.requests_stats.totals.total.start_time
|
|
492
|
-
)
|
|
493
|
-
progress_state.max_number = current_aggregator.args.max_number
|
|
494
|
-
progress_state.max_duration = current_aggregator.args.max_duration
|
|
495
|
-
|
|
496
|
-
def handle_update_scheduler_update(
|
|
497
|
-
self, progress_state: BTPS, result: BenchmarkerResult
|
|
498
|
-
):
|
|
499
|
-
if self.active_task is None:
|
|
500
|
-
raise RuntimeError("Active task not set.")
|
|
501
|
-
|
|
502
|
-
if self.active_task != progress_state.task_id:
|
|
503
|
-
raise RuntimeError("Active task does not match current task.")
|
|
504
|
-
|
|
505
|
-
current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
|
|
506
|
-
progress_state.in_warmup = current_aggregator.in_warmup
|
|
507
|
-
progress_state.in_cooldown = current_aggregator.in_cooldown
|
|
508
|
-
progress_state.requests_rate = (
|
|
509
|
-
current_aggregator.requests_stats.totals.successful.rate
|
|
510
|
-
)
|
|
511
|
-
progress_state.request_latency = (
|
|
512
|
-
current_aggregator.requests_stats.request_time.mean
|
|
513
|
-
)
|
|
514
|
-
progress_state.requests_processing = (
|
|
515
|
-
current_aggregator.scheduler_stats.processing_requests.last
|
|
516
|
-
)
|
|
517
|
-
progress_state.requests_successful = (
|
|
518
|
-
current_aggregator.requests_stats.totals.successful.total
|
|
519
|
-
)
|
|
520
|
-
progress_state.requests_incomplete = (
|
|
521
|
-
current_aggregator.requests_stats.totals.incomplete.total
|
|
522
|
-
)
|
|
523
|
-
progress_state.requests_errored = (
|
|
524
|
-
current_aggregator.requests_stats.totals.errored.total
|
|
525
|
-
)
|
|
526
|
-
progress_state.worker_overheads_time_ms = (
|
|
527
|
-
current_aggregator.requests_stats.scheduled_time_delay.mean_ms
|
|
528
|
-
+ current_aggregator.requests_stats.worker_start_delay.mean_ms
|
|
529
|
-
)
|
|
530
|
-
progress_state.backend_overheads_time_ms = (
|
|
531
|
-
current_aggregator.requests_stats.request_time_delay.mean_ms
|
|
532
|
-
)
|
|
533
|
-
progress_state.requests_sleep_time_ms = (
|
|
534
|
-
current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
|
|
535
|
-
)
|
|
536
|
-
progress_state.requests_targeted_start_time_delay_ms = (
|
|
537
|
-
current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
|
|
538
|
-
)
|
|
539
|
-
|
|
540
|
-
def handle_update_scheduler_complete(
|
|
567
|
+
def update(
|
|
541
568
|
self,
|
|
542
|
-
|
|
543
|
-
|
|
569
|
+
accumulator: GenerativeBenchmarkAccumulator,
|
|
570
|
+
scheduler_state: SchedulerState,
|
|
544
571
|
):
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
completed
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
572
|
+
self.progress = (
|
|
573
|
+
(1.0 - scheduler_state.progress.remaining_fraction)
|
|
574
|
+
if scheduler_state.progress.remaining_fraction is not None
|
|
575
|
+
else 0.0
|
|
576
|
+
)
|
|
577
|
+
self._update_processing_states(
|
|
578
|
+
benchmark_status=self._map_status(accumulator.timings.status),
|
|
579
|
+
start_time=accumulator.timings.measure_start,
|
|
580
|
+
successful_requests=scheduler_state.successful_requests,
|
|
581
|
+
cancelled_requests=scheduler_state.cancelled_requests,
|
|
582
|
+
errored_requests=scheduler_state.errored_requests,
|
|
583
|
+
)
|
|
584
|
+
self._update_request_stats(
|
|
585
|
+
request_concurrency=accumulator.concurrency_metric.time_weighted_mean,
|
|
586
|
+
requests_per_second=accumulator.completed_metrics.requests.rate_per_second,
|
|
587
|
+
request_latency=accumulator.completed_metrics.request_latency.mean,
|
|
588
|
+
)
|
|
589
|
+
self._update_token_stats(
|
|
590
|
+
output_tokens=accumulator.completed_metrics.total_tokens.mean,
|
|
591
|
+
output_tokens_rate=accumulator.completed_metrics.output_tokens.rate_per_second,
|
|
592
|
+
prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean,
|
|
593
|
+
total_tokens_rate=accumulator.completed_metrics.total_tokens.rate_per_second,
|
|
594
|
+
time_to_first_token=accumulator.completed_metrics.time_to_first_token_ms.mean,
|
|
595
|
+
inter_token_latency=accumulator.completed_metrics.inter_token_latency_ms.mean,
|
|
596
|
+
converted=True,
|
|
597
|
+
)
|
|
598
|
+
self._update_system_stats(
|
|
599
|
+
request_targeted_start_delay=accumulator.scheduler_metrics.request_targeted_start_delay.mean,
|
|
600
|
+
queued_time=accumulator.scheduler_metrics.queued_time.mean,
|
|
601
|
+
scheduler_overheads_time=accumulator.scheduler_metrics.resolve_end_delay.mean,
|
|
602
|
+
converted=False,
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
def complete(self, benchmark: GenerativeBenchmark):
|
|
606
|
+
self._update_processing_states(
|
|
607
|
+
benchmark_status="completed",
|
|
608
|
+
start_time=benchmark.start_time,
|
|
609
|
+
successful_requests=benchmark.metrics.request_totals.successful,
|
|
610
|
+
cancelled_requests=benchmark.metrics.request_totals.incomplete,
|
|
611
|
+
errored_requests=benchmark.metrics.request_totals.errored,
|
|
612
|
+
)
|
|
613
|
+
self._update_request_stats(
|
|
614
|
+
request_concurrency=benchmark.metrics.request_concurrency.successful.mean,
|
|
615
|
+
requests_per_second=benchmark.metrics.requests_per_second.successful.mean,
|
|
616
|
+
request_latency=benchmark.metrics.request_latency.successful.mean,
|
|
617
|
+
)
|
|
618
|
+
self._update_token_stats(
|
|
619
|
+
output_tokens=benchmark.metrics.output_token_count.successful.mean,
|
|
620
|
+
output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean,
|
|
621
|
+
prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean,
|
|
622
|
+
total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean,
|
|
623
|
+
time_to_first_token=(
|
|
624
|
+
benchmark.metrics.time_to_first_token_ms.successful.mean
|
|
625
|
+
),
|
|
626
|
+
inter_token_latency=(
|
|
627
|
+
benchmark.metrics.inter_token_latency_ms.successful.mean
|
|
628
|
+
),
|
|
629
|
+
converted=True,
|
|
584
630
|
)
|
|
585
|
-
self.benchmarker_progress.stop_task(self.progress_task)
|
|
586
|
-
self.benchmarker_live.stop()
|
|
587
|
-
self.active_task = None
|
|
588
|
-
self.benchmarker_tasks = []
|
|
589
|
-
self.progress_task = None
|
|
590
|
-
|
|
591
|
-
def create_task_progress_columns(self) -> list[ProgressColumn]:
|
|
592
|
-
columns = [
|
|
593
|
-
TextColumn("[{task.fields[start_time]}]"),
|
|
594
|
-
SpinnerColumn(style=Colors.PROGRESS),
|
|
595
|
-
TaskProgressColumn(style=Colors.PROGRESS),
|
|
596
|
-
TextColumn("{task.description}"),
|
|
597
|
-
TextColumn("({task.fields[progress_status]})"),
|
|
598
|
-
TextColumn(" "),
|
|
599
|
-
]
|
|
600
|
-
|
|
601
|
-
if not self.display_scheduler_stats:
|
|
602
|
-
columns += [
|
|
603
|
-
TextColumn("{task.fields[requests_summary]}\n"),
|
|
604
|
-
]
|
|
605
|
-
else:
|
|
606
|
-
columns += [
|
|
607
|
-
TextColumn(
|
|
608
|
-
"{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
|
|
609
|
-
),
|
|
610
|
-
]
|
|
611
631
|
|
|
612
|
-
|
|
632
|
+
@staticmethod
|
|
633
|
+
def _map_status(
|
|
634
|
+
status: Literal["pending", "warmup", "active", "cooldown", "completed"],
|
|
635
|
+
) -> Literal["pending", "warmup", "active", "cooldown", "completed"]:
|
|
636
|
+
"""Map accumulator status to internal progress status representation."""
|
|
637
|
+
return status
|
|
613
638
|
|
|
614
|
-
def
|
|
639
|
+
def _update_processing_states(
|
|
615
640
|
self,
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
641
|
+
benchmark_status: Literal[
|
|
642
|
+
"pending", "warmup", "active", "cooldown", "completed"
|
|
643
|
+
]
|
|
644
|
+
| None = None,
|
|
645
|
+
start_time: float | None = None,
|
|
646
|
+
successful_requests: int | None = None,
|
|
647
|
+
cancelled_requests: int | None = None,
|
|
648
|
+
errored_requests: int | None = None,
|
|
649
|
+
):
|
|
650
|
+
if benchmark_status is not None:
|
|
651
|
+
self.benchmark_status = benchmark_status
|
|
652
|
+
if start_time is not None:
|
|
653
|
+
self.start_time = start_time
|
|
654
|
+
if successful_requests is not None:
|
|
655
|
+
self.successful_requests = successful_requests
|
|
656
|
+
if cancelled_requests is not None:
|
|
657
|
+
self.cancelled_requests = cancelled_requests
|
|
658
|
+
if errored_requests is not None:
|
|
659
|
+
self.errored_requests = errored_requests
|
|
660
|
+
|
|
661
|
+
def _update_request_stats(
|
|
632
662
|
self,
|
|
633
|
-
|
|
634
|
-
|
|
663
|
+
request_concurrency: float | None = None,
|
|
664
|
+
requests_per_second: float | None = None,
|
|
665
|
+
request_latency: float | None = None,
|
|
635
666
|
):
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
progress_state.output_tokens_rate = (
|
|
645
|
-
current_aggregator.requests_stats.output_tokens.rate
|
|
646
|
-
)
|
|
647
|
-
progress_state.total_tokens_rate = (
|
|
648
|
-
current_aggregator.requests_stats.total_tokens.rate
|
|
649
|
-
)
|
|
650
|
-
progress_state.tokens_ttft = (
|
|
651
|
-
current_aggregator.requests_stats.time_to_first_token.mean_ms
|
|
652
|
-
)
|
|
653
|
-
progress_state.tokens_itl = (
|
|
654
|
-
current_aggregator.requests_stats.inter_token_latency.mean_ms
|
|
655
|
-
)
|
|
656
|
-
|
|
657
|
-
def handle_update_benchmark_compiled(
|
|
667
|
+
if request_concurrency is not None:
|
|
668
|
+
self.request_concurrency = request_concurrency
|
|
669
|
+
if requests_per_second is not None:
|
|
670
|
+
self.requests_per_second = requests_per_second
|
|
671
|
+
if request_latency is not None:
|
|
672
|
+
self.request_latency = request_latency
|
|
673
|
+
|
|
674
|
+
def _update_token_stats(
|
|
658
675
|
self,
|
|
659
|
-
|
|
660
|
-
|
|
676
|
+
output_tokens: float | None = None,
|
|
677
|
+
output_tokens_rate: float | None = None,
|
|
678
|
+
prompt_tokens: float | None = None,
|
|
679
|
+
total_tokens_rate: float | None = None,
|
|
680
|
+
time_to_first_token: float | None = None,
|
|
681
|
+
inter_token_latency: float | None = None,
|
|
682
|
+
converted: bool = False,
|
|
661
683
|
):
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
current_benchmark.metrics.output_tokens_per_second.successful.mean
|
|
679
|
-
)
|
|
680
|
-
progress_state.total_tokens_rate = (
|
|
681
|
-
current_benchmark.metrics.tokens_per_second.successful.mean
|
|
682
|
-
)
|
|
683
|
-
progress_state.tokens_ttft = (
|
|
684
|
-
current_benchmark.metrics.time_to_first_token_ms.successful.mean
|
|
685
|
-
)
|
|
686
|
-
progress_state.tokens_itl = (
|
|
687
|
-
current_benchmark.metrics.inter_token_latency_ms.successful.mean
|
|
688
|
-
)
|
|
684
|
+
if output_tokens is not None:
|
|
685
|
+
self.output_tokens = output_tokens
|
|
686
|
+
if output_tokens_rate is not None:
|
|
687
|
+
self.output_tokens_rate = output_tokens_rate
|
|
688
|
+
if prompt_tokens is not None:
|
|
689
|
+
self.prompt_tokens = prompt_tokens
|
|
690
|
+
if total_tokens_rate is not None:
|
|
691
|
+
self.total_tokens_rate = total_tokens_rate
|
|
692
|
+
if time_to_first_token is not None:
|
|
693
|
+
self.time_to_first_token = time_to_first_token * (
|
|
694
|
+
1000 if not converted else 1
|
|
695
|
+
)
|
|
696
|
+
if inter_token_latency is not None:
|
|
697
|
+
self.inter_token_latency = inter_token_latency * (
|
|
698
|
+
1000 if not converted else 1
|
|
699
|
+
)
|
|
689
700
|
|
|
690
|
-
def
|
|
701
|
+
def _update_system_stats(
|
|
691
702
|
self,
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
if not self.display_scheduler_stats:
|
|
708
|
-
columns += [
|
|
709
|
-
TextColumn(
|
|
710
|
-
"{task.fields[requests_summary]}\n{task.fields[tokens_summary]}",
|
|
711
|
-
),
|
|
712
|
-
]
|
|
713
|
-
else:
|
|
714
|
-
columns += [
|
|
715
|
-
TextColumn(
|
|
716
|
-
"{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
|
|
717
|
-
),
|
|
718
|
-
]
|
|
719
|
-
|
|
720
|
-
return columns
|
|
703
|
+
request_targeted_start_delay: float | None = None,
|
|
704
|
+
queued_time: float | None = None,
|
|
705
|
+
scheduler_overheads_time: float | None = None,
|
|
706
|
+
converted: bool = False,
|
|
707
|
+
):
|
|
708
|
+
if request_targeted_start_delay is not None:
|
|
709
|
+
self.request_targeted_start_delay = request_targeted_start_delay * (
|
|
710
|
+
1000 if not converted else 1
|
|
711
|
+
)
|
|
712
|
+
if queued_time is not None:
|
|
713
|
+
self.queued_time = queued_time * (1000 if not converted else 1)
|
|
714
|
+
if scheduler_overheads_time is not None:
|
|
715
|
+
self.scheduler_overheads_time = scheduler_overheads_time * (
|
|
716
|
+
1000 if not converted else 1
|
|
717
|
+
)
|