guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,25 @@
1
- import math
2
- import time
1
+ """
2
+ Benchmark progress tracking and console display abstractions.
3
+
4
+ Provides progress tracking interfaces and implementations for monitoring benchmark
5
+ execution, displaying real-time statistics, and managing UI updates during
6
+ generative benchmarking operations.
7
+
8
+ Classes:
9
+ BenchmarkerProgress: Abstract base for benchmark progress tracking.
10
+ BenchmarkerProgressGroup: Composite progress handler for multiple instances.
11
+ GenerativeConsoleBenchmarkerProgress: Console-based progress display.
12
+
13
+ Type Variables:
14
+ BenchmarkT: Generic benchmark object type.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from abc import ABC, abstractmethod
3
20
  from dataclasses import dataclass
4
21
  from datetime import datetime
5
- from typing import Generic, Optional, TypeVar, Union
22
+ from typing import Any, Generic, Literal
6
23
 
7
24
  from rich.console import Group
8
25
  from rich.live import Live
@@ -10,7 +27,6 @@ from rich.panel import Panel
10
27
  from rich.progress import (
11
28
  BarColumn,
12
29
  Progress,
13
- ProgressColumn,
14
30
  SpinnerColumn,
15
31
  TaskID,
16
32
  TaskProgressColumn,
@@ -19,145 +35,392 @@ from rich.progress import (
19
35
  TimeRemainingColumn,
20
36
  )
21
37
 
22
- from guidellm.benchmark.aggregator import (
23
- BenchmarkAggregator,
24
- GenerativeBenchmarkAggregator,
38
+ from guidellm.benchmark.profile import Profile
39
+ from guidellm.benchmark.schemas import (
40
+ BenchmarkT,
41
+ EstimatedBenchmarkState,
42
+ GenerativeBenchmark,
25
43
  )
26
- from guidellm.benchmark.benchmark import Benchmark, GenerativeBenchmark
27
- from guidellm.benchmark.benchmarker import BenchmarkerResult
28
- from guidellm.scheduler import (
29
- SchedulingStrategy,
30
- StrategyType,
31
- strategy_display_str,
32
- )
33
- from guidellm.utils import Colors
44
+ from guidellm.scheduler import SchedulerState, SchedulingStrategy, StrategyType
45
+ from guidellm.utils import Colors, format_value_display
34
46
 
35
- __all__ = [
36
- "BenchmarkerProgressDisplay",
37
- "BenchmarkerTaskProgressState",
38
- "GenerativeTextBenchmarkerProgressDisplay",
39
- "GenerativeTextBenchmarkerTaskProgressState",
40
- ]
47
+ __all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]
41
48
 
42
49
 
43
- @dataclass
44
- class BenchmarkerTaskProgressState:
45
- display_scheduler_stats: bool
46
-
47
- task_id: TaskID
48
- strategy: Union[StrategyType, SchedulingStrategy]
49
- started: bool = False
50
- compiling: bool = False
51
- ended: bool = False
52
-
53
- start_time: Optional[float] = None
54
- max_number: Optional[float] = None
55
- max_duration: Optional[float] = None
56
- in_warmup: bool = False
57
- in_cooldown: bool = False
58
-
59
- requests_rate: float = 0
60
- request_latency: float = 0
61
- requests_processing: float = 0
62
- requests_successful: float = 0
63
- requests_incomplete: float = 0
64
- requests_errored: float = 0
50
+ class BenchmarkerProgress(Generic[BenchmarkT], ABC):
51
+ """
52
+ Abstract base class for tracking and displaying benchmark progress.
53
+
54
+ Provides lifecycle hooks for monitoring benchmark execution stages including
55
+ initialization, start, updates, completion, and finalization. Supports
56
+ enable/disable functionality for conditional progress tracking.
57
+ """
58
+
59
+ def __init__(self):
60
+ """
61
+ Initialize progress tracker.
62
+
63
+ :param enabled: Whether to enable progress tracking and display.
64
+ """
65
+ self.profile: Profile = None
66
+ self.current_strategy: SchedulingStrategy = None
67
+
68
+ @abstractmethod
69
+ async def on_initialize(self, profile: Profile):
70
+ """
71
+ Initialize progress tracking for benchmark profile.
72
+
73
+ :param profile: Benchmark profile configuration.
74
+ """
75
+
76
+ @abstractmethod
77
+ async def on_benchmark_start(self, strategy: SchedulingStrategy):
78
+ """
79
+ Handle start of new benchmark strategy execution.
80
+
81
+ :param strategy: Scheduling strategy being executed.
82
+ """
83
+
84
+ @abstractmethod
85
+ async def on_benchmark_update(
86
+ self, estimated_state: EstimatedBenchmarkState, scheduler_state: SchedulerState
87
+ ):
88
+ """
89
+ Handle benchmark execution progress update.
65
90
 
66
- worker_overheads_time_ms: float = 0.0
67
- backend_overheads_time_ms: float = 0.0
68
- requests_sleep_time_ms: float = 0.0
69
- requests_targeted_start_time_delay_ms: float = 0.0
91
+ :param estimated_state: Current benchmark metrics and statistics.
92
+ :param scheduler_state: Current scheduler execution state.
93
+ """
94
+
95
+ @abstractmethod
96
+ async def on_benchmark_complete(self, benchmark: BenchmarkT):
97
+ """
98
+ Handle completion of benchmark strategy execution.
99
+
100
+ :param benchmark: Completed benchmark results.
101
+ """
102
+
103
+ @abstractmethod
104
+ async def on_finalize(self):
105
+ """Finalize progress tracking and cleanup resources."""
106
+
107
+
108
+ class GenerativeConsoleBenchmarkerProgress(
109
+ BenchmarkerProgress[GenerativeBenchmark], Live
110
+ ):
111
+ """
112
+ Console-based progress display for generative benchmarks.
113
+
114
+ Provides real-time visual progress tracking using Rich library components,
115
+ displaying benchmark execution statistics, timing information, and progress
116
+ bars in a structured console interface.
117
+ """
118
+
119
+ def __init__(self, display_scheduler_stats: bool = False):
120
+ """
121
+ Initialize console progress display.
122
+
123
+ :param enabled: Whether to enable progress tracking and display.
124
+ :param display_scheduler_stats: Whether to display scheduler statistics.
125
+ """
126
+ BenchmarkerProgress.__init__(self)
127
+ Live.__init__(
128
+ self,
129
+ refresh_per_second=4,
130
+ auto_refresh=True,
131
+ redirect_stdout=True,
132
+ redirect_stderr=True,
133
+ )
134
+ self.display_scheduler_stats: bool = display_scheduler_stats
135
+ self.run_progress: Progress = None
136
+ self.run_progress_task: TaskID = None
137
+ self.tasks_progress: _GenerativeProgressTasks = None
138
+
139
+ async def on_initialize(self, profile: Profile):
140
+ """
141
+ Initialize console display components and start rendering.
142
+
143
+ :param profile: Benchmark profile configuration.
144
+ """
145
+ self.tasks_progress = _GenerativeProgressTasks(
146
+ profile=profile, display_scheduler_stats=self.display_scheduler_stats
147
+ )
148
+ self.run_progress = Progress(
149
+ TextColumn("Generating...", style=f"italic {Colors.progress}"),
150
+ BarColumn(
151
+ bar_width=None,
152
+ complete_style=Colors.progress,
153
+ finished_style=Colors.success,
154
+ ),
155
+ TextColumn(
156
+ "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
157
+ style=Colors.progress,
158
+ ),
159
+ TextColumn("["),
160
+ TimeElapsedColumn(),
161
+ TextColumn("<"),
162
+ TimeRemainingColumn(),
163
+ TextColumn("]"),
164
+ )
165
+ self.run_progress_task = self.run_progress.add_task("")
166
+ self._sync_run_progress()
167
+ self.update(
168
+ Group(
169
+ Panel(
170
+ self.tasks_progress,
171
+ title="Benchmarks",
172
+ title_align="left",
173
+ expand=True,
174
+ ),
175
+ self.run_progress,
176
+ )
177
+ )
178
+ self.start()
179
+
180
+ async def on_benchmark_start(self, strategy: SchedulingStrategy):
181
+ """
182
+ Update display for new benchmark strategy start.
183
+
184
+ :param strategy: Scheduling strategy being executed.
185
+ """
186
+ self.tasks_progress.start_benchmark(strategy)
187
+ self._sync_run_progress()
188
+
189
+ async def on_benchmark_update(
190
+ self,
191
+ aggregator_update: EstimatedBenchmarkState | None,
192
+ scheduler_state: SchedulerState,
193
+ ):
194
+ """
195
+ Update display with current benchmark progress.
196
+
197
+ :param aggregator_update: Current benchmark metrics and statistics.
198
+ :param scheduler_state: Current scheduler execution state.
199
+ """
200
+ self.tasks_progress.update_benchmark(aggregator_update, scheduler_state)
201
+ self._sync_run_progress()
202
+
203
+ async def on_benchmark_complete(self, benchmark: GenerativeBenchmark):
204
+ """
205
+ Update display for completed benchmark.
206
+
207
+ :param benchmark: Completed benchmark results.
208
+ """
209
+ self.tasks_progress.complete_benchmark(benchmark)
210
+ self._sync_run_progress()
211
+
212
+ async def on_finalize(self):
213
+ """Stop display rendering and cleanup resources."""
214
+ self.tasks_progress.finalize()
215
+ self._sync_run_progress()
216
+ self.run_progress.stop_task(self.run_progress_task)
217
+ self.stop()
218
+ self.run_progress = None
219
+ self.run_progress_task = None
220
+ self.tasks_progress = None
221
+
222
+ def _sync_run_progress(self):
223
+ """Synchronize overall progress display with task progress."""
224
+ self.run_progress.update(
225
+ self.run_progress_task,
226
+ total=self.tasks_progress.steps_total,
227
+ completed=self.tasks_progress.steps_progress,
228
+ completed_benchmarks=self.tasks_progress.tasks_progress,
229
+ total_benchmarks=self.tasks_progress.tasks_total,
230
+ )
231
+
232
+
233
+ # Scaling factor for progress calculations to provide granular progress updates
234
+ _PROGRESS_SCALE = 1000
235
+
236
+
237
+ class _GenerativeProgressTasks(Progress):
238
+ def __init__(self, profile: Profile, display_scheduler_stats: bool):
239
+ self.profile: Profile = profile
240
+ self.display_scheduler_stats: bool = display_scheduler_stats
241
+ self.benchmark_task_states: list[_GenerativeProgressTaskState] = []
242
+ self.current_index: int = -1
243
+
244
+ summary_text = "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}"
245
+ if self.display_scheduler_stats:
246
+ summary_text += "\n{task.fields[scheduler_stats]}"
247
+ super().__init__(
248
+ TextColumn("[{task.fields[start_time]}]"),
249
+ SpinnerColumn(style=Colors.progress),
250
+ TaskProgressColumn(style=Colors.progress),
251
+ TextColumn("{task.description}"),
252
+ TextColumn("({task.fields[progress_status]})"),
253
+ TextColumn(" "),
254
+ TextColumn(summary_text),
255
+ )
256
+
257
+ for strategy_type in profile.strategy_types:
258
+ task_state = _GenerativeProgressTaskState(
259
+ strategy_type=strategy_type,
260
+ )
261
+ task_id = self.add_task(**task_state.current)
262
+ task_state.task_id = task_id
263
+ self.benchmark_task_states.append(task_state)
70
264
 
71
265
  @property
72
- def description(self) -> str:
73
- return strategy_display_str(self.strategy)
266
+ def tasks_total(self) -> int:
267
+ return len(self.benchmark_task_states)
74
268
 
75
269
  @property
76
- def total(self) -> Optional[float]:
77
- if self.max_number is None and self.max_duration is None:
78
- return None
270
+ def tasks_progress(self) -> int:
271
+ return self.current_index + 1
79
272
 
80
- return 1000
273
+ @property
274
+ def steps_total(self) -> int:
275
+ return _PROGRESS_SCALE * len(self.benchmark_task_states)
81
276
 
82
277
  @property
83
- def completed(self) -> int:
84
- if self.ended:
85
- return 1000
278
+ def steps_progress(self) -> int:
279
+ progress_current_task = (
280
+ self.benchmark_task_states[self.current_index].progress
281
+ if self.current_index < len(self.benchmark_task_states)
282
+ else 0
283
+ )
284
+ progress_total = self.current_index + (progress_current_task or 0)
86
285
 
87
- if self.max_number is None and self.max_duration is None:
88
- return 0
286
+ return progress_total * _PROGRESS_SCALE
287
+
288
+ def start_benchmark(self, strategy: SchedulingStrategy):
289
+ self.current_index += 1
290
+ if self.current_index >= len(self.benchmark_task_states):
291
+ # New task past initially estimated, append it to the end
292
+ task_state = _GenerativeProgressTaskState(strategy_type=strategy.type_)
293
+ task_id = self.add_task(**task_state.current)
294
+ task_state.task_id = task_id
295
+ self.benchmark_task_states.append(task_state)
89
296
 
90
- number = self.requests_successful + self.requests_errored
91
- number_percent = (
92
- number / float(self.max_number) * 1000 if self.max_number else -math.inf
297
+ self.benchmark_task_states[self.current_index].start(strategy)
298
+ self.update(
299
+ self.benchmark_task_states[self.current_index].task_id,
300
+ start=True,
301
+ **self.benchmark_task_states[self.current_index].current,
93
302
  )
94
- duration_percent = (
95
- (time.time() - self.start_time) / self.max_duration * 1000
96
- if self.max_duration and self.start_time
97
- else -math.inf
303
+
304
+ def update_benchmark(
305
+ self,
306
+ aggregator_update: EstimatedBenchmarkState,
307
+ scheduler_state: SchedulerState,
308
+ ):
309
+ self.benchmark_task_states[self.current_index].update(
310
+ aggregator_update, scheduler_state
311
+ )
312
+ self.update(
313
+ self.benchmark_task_states[self.current_index].task_id,
314
+ **self.benchmark_task_states[self.current_index].current,
315
+ )
316
+
317
+ def complete_benchmark(self, benchmark: GenerativeBenchmark):
318
+ self.benchmark_task_states[self.current_index].complete(benchmark)
319
+ self.update(
320
+ self.benchmark_task_states[self.current_index].task_id,
321
+ **self.benchmark_task_states[self.current_index].current,
98
322
  )
99
323
 
100
- return min(int(max(number_percent, duration_percent)), 1000)
324
+ def finalize(self):
325
+ self.stop()
326
+
327
+
328
+ @dataclass
329
+ class _GenerativeProgressTaskState:
330
+ strategy_type: StrategyType
331
+ task_id: TaskID = None
332
+ strategy: SchedulingStrategy | None = None
333
+ benchmark_status: Literal[
334
+ "pending", "in_warmup", "in_progress", "in_cooldown", "completed"
335
+ ] = "pending"
336
+ progress: float | None = None
337
+ start_time: float = -1.0
338
+ successful_requests: int = 0
339
+ cancelled_requests: int = 0
340
+ errored_requests: int = 0
341
+ request_concurrency: int = 0
342
+ requests_per_second: float = 0
343
+ request_latency: float = 0
344
+ output_tokens: int = 0
345
+ output_tokens_rate: float = 0
346
+ prompt_tokens: int = 0
347
+ total_tokens_rate: float = 0
348
+ time_to_first_token: float = 0
349
+ inter_token_latency: float = 0
350
+ queued_time: float = 0
351
+ request_targeted_start_delay: float = 0
352
+ scheduler_overheads_time: float = 0
101
353
 
102
354
  @property
103
- def fields(self) -> dict[str, str]:
104
- fields = {
355
+ def current(self) -> dict[str, Any]:
356
+ return {
105
357
  "start_time": self.formatted_start_time,
358
+ "description": str(self.strategy or self.strategy_type),
106
359
  "progress_status": self.formatted_progress_status,
107
360
  "requests_summary": self.formatted_requests_summary,
361
+ "tokens_summary": self.formatted_tokens_summary,
362
+ "scheduler_stats": self.formatted_scheduler_stats,
363
+ "completed": self.completed,
364
+ "total": self.total,
108
365
  }
109
366
 
110
- if self.display_scheduler_stats:
111
- fields["scheduler_stats"] = self.formatted_scheduler_stats
367
+ @property
368
+ def completed(self) -> float:
369
+ if self.benchmark_status == "pending":
370
+ return 0
371
+
372
+ if self.benchmark_status == "completed":
373
+ return _PROGRESS_SCALE
112
374
 
113
- return fields
375
+ return self.progress * _PROGRESS_SCALE if self.progress is not None else None
376
+
377
+ @property
378
+ def total(self) -> float:
379
+ return _PROGRESS_SCALE
114
380
 
115
381
  @property
116
382
  def formatted_start_time(self) -> str:
117
- if self.start_time is None:
383
+ if self.start_time < 0.0:
118
384
  return "--:--:--"
119
385
 
120
386
  return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
121
387
 
122
388
  @property
123
389
  def formatted_progress_status(self) -> str:
124
- if self.ended:
125
- status = "complete"
126
- color = Colors.SUCCESS
127
- elif self.compiling:
128
- status = "compiling"
129
- color = Colors.PROGRESS
130
- elif self.started and self.in_warmup:
390
+ if self.benchmark_status == "in_warmup":
131
391
  status = "warmup"
132
- color = Colors.PROGRESS
133
- elif self.started and self.in_cooldown:
134
- status = "cooldown"
135
- color = Colors.PROGRESS
136
- elif self.started:
392
+ color = Colors.progress
393
+ elif self.benchmark_status == "in_progress":
137
394
  status = "running"
138
- color = Colors.PROGRESS
395
+ color = Colors.progress
396
+ elif self.benchmark_status == "in_cooldown":
397
+ status = "cooldown"
398
+ color = Colors.progress
399
+ elif self.benchmark_status == "completed":
400
+ status = "complete"
401
+ color = Colors.success
139
402
  else:
140
403
  status = "pending"
141
- color = Colors.INFO
404
+ color = Colors.info
142
405
 
143
406
  return f"[{color}]{status.ljust(8)}[/{color}]"
144
407
 
145
408
  @property
146
409
  def formatted_requests_summary(self) -> str:
147
- if not self.started:
410
+ if self.benchmark_status == "pending":
148
411
  return " "
149
412
 
150
413
  return (
151
- f"[{Colors.INFO}]Req:[/{Colors.INFO}] "
152
- + BenchmarkerTaskProgressState.format_progress_display(
153
- value=self.requests_rate,
414
+ f"[{Colors.info}]Req:[/{Colors.info}] "
415
+ + format_value_display(
416
+ value=self.requests_per_second,
154
417
  label="req/s",
155
418
  total_characters=12,
156
419
  digits_places=4,
157
420
  decimal_places=1,
158
421
  )
159
422
  + ", "
160
- + BenchmarkerTaskProgressState.format_progress_display(
423
+ + format_value_display(
161
424
  value=self.request_latency,
162
425
  label="Lat",
163
426
  units="s",
@@ -166,32 +429,32 @@ class BenchmarkerTaskProgressState:
166
429
  decimal_places=2,
167
430
  )
168
431
  + ", "
169
- + BenchmarkerTaskProgressState.format_progress_display(
170
- value=self.requests_processing,
432
+ + format_value_display(
433
+ value=self.request_concurrency,
171
434
  label="Conc",
172
435
  total_characters=12,
173
436
  digits_places=4,
174
437
  decimal_places=1,
175
438
  )
176
439
  + ", "
177
- + BenchmarkerTaskProgressState.format_progress_display(
178
- value=self.requests_successful,
440
+ + format_value_display(
441
+ value=self.successful_requests,
179
442
  label="Comp",
180
443
  total_characters=12,
181
444
  digits_places=5,
182
445
  decimal_places=0,
183
446
  )
184
447
  + ", "
185
- + BenchmarkerTaskProgressState.format_progress_display(
186
- value=self.requests_incomplete,
448
+ + format_value_display(
449
+ value=self.cancelled_requests,
187
450
  label="Inc",
188
451
  total_characters=12,
189
452
  digits_places=5,
190
453
  decimal_places=0,
191
454
  )
192
455
  + ", "
193
- + BenchmarkerTaskProgressState.format_progress_display(
194
- value=self.requests_errored,
456
+ + format_value_display(
457
+ value=self.errored_requests,
195
458
  label="Err",
196
459
  total_characters=12,
197
460
  digits_places=5,
@@ -199,101 +462,14 @@ class BenchmarkerTaskProgressState:
199
462
  )
200
463
  )
201
464
 
202
- @property
203
- def formatted_scheduler_stats(self) -> str:
204
- if not self.started:
205
- return " "
206
-
207
- return (
208
- f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
209
- + BenchmarkerTaskProgressState.format_progress_display(
210
- value=self.worker_overheads_time_ms,
211
- label="Work OH",
212
- units="ms",
213
- total_characters=18,
214
- digits_places=3,
215
- decimal_places=1,
216
- )
217
- + ", "
218
- + BenchmarkerTaskProgressState.format_progress_display(
219
- value=self.backend_overheads_time_ms,
220
- label="Back OH",
221
- units="ms",
222
- total_characters=18,
223
- digits_places=3,
224
- decimal_places=1,
225
- )
226
- + ", "
227
- + BenchmarkerTaskProgressState.format_progress_display(
228
- value=self.requests_sleep_time_ms,
229
- label="Req Sleep",
230
- units="ms",
231
- total_characters=18,
232
- digits_places=5,
233
- decimal_places=0,
234
- )
235
- + ", "
236
- + BenchmarkerTaskProgressState.format_progress_display(
237
- value=self.requests_targeted_start_time_delay_ms,
238
- label="Start Del",
239
- units="ms",
240
- total_characters=18,
241
- digits_places=5,
242
- decimal_places=0,
243
- )
244
- )
245
-
246
- @staticmethod
247
- def format_progress_display(
248
- value: float,
249
- label: str,
250
- units: str = "",
251
- total_characters: Optional[int] = None,
252
- digits_places: Optional[int] = None,
253
- decimal_places: Optional[int] = None,
254
- ) -> str:
255
- if decimal_places is None and digits_places is None:
256
- formatted_number = f"{value}:.0f"
257
- elif digits_places is None:
258
- formatted_number = f"{value:.{decimal_places}f}"
259
- elif decimal_places is None:
260
- formatted_number = f"{value:>{digits_places}f}"
261
- else:
262
- formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
263
-
264
- result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
265
-
266
- if total_characters is not None:
267
- total_characters += len(Colors.INFO) * 2 + 5
268
-
269
- if len(result) < total_characters:
270
- result = result.rjust(total_characters)
271
-
272
- return result
273
-
274
-
275
- class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
276
- output_tokens: float = 0
277
- prompt_tokens: float = 0
278
- output_tokens_rate: float = 0
279
- total_tokens_rate: float = 0
280
- tokens_ttft: float = 0
281
- tokens_itl: float = 0
282
-
283
- @property
284
- def fields(self) -> dict[str, str]:
285
- fields = super().fields
286
- fields["tokens_summary"] = self.formatted_tokens_summary
287
- return fields
288
-
289
465
  @property
290
466
  def formatted_tokens_summary(self) -> str:
291
- if not self.started:
467
+ if self.benchmark_status == "pending":
292
468
  return " "
293
469
 
294
470
  return (
295
- f"[{Colors.INFO}]Tok:[/{Colors.INFO}] "
296
- + BenchmarkerTaskProgressState.format_progress_display(
471
+ f"[{Colors.info}]Tok:[/{Colors.info}] "
472
+ + format_value_display(
297
473
  value=self.output_tokens_rate,
298
474
  label="gen/s",
299
475
  total_characters=12,
@@ -301,7 +477,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
301
477
  decimal_places=1,
302
478
  )
303
479
  + ", "
304
- + BenchmarkerTaskProgressState.format_progress_display(
480
+ + format_value_display(
305
481
  value=self.total_tokens_rate,
306
482
  label="tot/s",
307
483
  total_characters=12,
@@ -309,8 +485,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
309
485
  decimal_places=1,
310
486
  )
311
487
  + ", "
312
- + BenchmarkerTaskProgressState.format_progress_display(
313
- value=self.tokens_ttft,
488
+ + format_value_display(
489
+ value=self.time_to_first_token,
314
490
  label="TTFT",
315
491
  units="ms",
316
492
  total_characters=12,
@@ -318,8 +494,8 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
318
494
  decimal_places=1,
319
495
  )
320
496
  + ", "
321
- + BenchmarkerTaskProgressState.format_progress_display(
322
- value=self.tokens_itl,
497
+ + format_value_display(
498
+ value=self.inter_token_latency,
323
499
  label="ITL",
324
500
  units="ms",
325
501
  total_characters=12,
@@ -327,7 +503,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
327
503
  decimal_places=1,
328
504
  )
329
505
  + ", "
330
- + BenchmarkerTaskProgressState.format_progress_display(
506
+ + format_value_display(
331
507
  value=self.prompt_tokens,
332
508
  label="Prompt",
333
509
  total_characters=12,
@@ -335,7 +511,7 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
335
511
  decimal_places=0,
336
512
  )
337
513
  + ", "
338
- + BenchmarkerTaskProgressState.format_progress_display(
514
+ + format_value_display(
339
515
  value=self.output_tokens,
340
516
  label="Gen",
341
517
  total_characters=12,
@@ -344,377 +520,220 @@ class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
344
520
  )
345
521
  )
346
522
 
523
+ @property
524
+ def formatted_scheduler_stats(self) -> str:
525
+ if self.benchmark_status == "pending":
526
+ return " "
347
527
 
348
- BTPS = TypeVar("BTPS", bound=BenchmarkerTaskProgressState)
349
-
350
-
351
- class BenchmarkerProgressDisplay(Generic[BTPS]):
352
- def __init__(self, display_scheduler_stats: bool):
353
- self.display_scheduler_stats = display_scheduler_stats
354
- self.started = False
355
- self.benchmarker_tasks_progress = Progress(*self.create_task_progress_columns())
356
- self.benchmarker_tasks_panel = Panel(
357
- self.benchmarker_tasks_progress,
358
- title="Benchmarks",
359
- title_align="left",
360
- expand=True,
361
- )
362
- self.benchmarker_progress = Progress(
363
- TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
364
- BarColumn(
365
- bar_width=None,
366
- complete_style=Colors.PROGRESS,
367
- finished_style=Colors.SUCCESS,
368
- ),
369
- TextColumn(
370
- "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
371
- style=Colors.PROGRESS,
372
- ),
373
- TextColumn("["),
374
- TimeElapsedColumn(),
375
- TextColumn("<"),
376
- TimeRemainingColumn(),
377
- TextColumn("]"),
378
- )
379
- self.benchmarker_live = Live(
380
- Group(
381
- self.benchmarker_tasks_panel,
382
- self.benchmarker_progress,
383
- ),
384
- redirect_stdout=True,
385
- redirect_stderr=True,
386
- )
387
- self.active_task: Optional[TaskID] = None
388
- self.benchmarker_tasks: list[BTPS] = []
389
- self.progress_task: Optional[TaskID] = None
390
-
391
- def update(self, result: BenchmarkerResult):
392
- if result.type_ == "run_start":
393
- if self.started:
394
- raise RuntimeError("Progress display already started.")
395
-
396
- self.handle_start(result)
397
- self.started = True
398
- elif result.type_ == "run_complete":
399
- if not self.started:
400
- raise RuntimeError("Progress display not started.")
401
-
402
- self.handle_end(result)
403
- self.started = False
404
- else:
405
- if not self.started:
406
- raise RuntimeError("Progress display not started.")
407
-
408
- self.handle_update(result)
409
-
410
- def handle_start(self, result: BenchmarkerResult):
411
- self.benchmarker_live.start()
412
-
413
- for index, strategy_type in enumerate(result.profile.strategy_types):
414
- task_id = self.benchmarker_tasks_progress.add_task(
415
- description=strategy_type,
416
- start=False,
417
- total=None,
418
- completed=0,
419
- visible=False,
528
+ return (
529
+ f"[{Colors.info}]Sys:[/{Colors.info}] , "
530
+ + format_value_display(
531
+ value=self.request_targeted_start_delay,
532
+ label="Start Del",
533
+ units="ms",
534
+ total_characters=18,
535
+ digits_places=5,
536
+ decimal_places=0,
420
537
  )
421
- task_progress_state = self.create_task_progress_state(
422
- task_id=task_id,
423
- index=index,
424
- strategy_type=strategy_type,
425
- result=result,
538
+ + format_value_display(
539
+ value=self.scheduler_overheads_time,
540
+ label="Sched OH",
541
+ units="ms",
542
+ total_characters=18,
543
+ digits_places=3,
544
+ decimal_places=1,
426
545
  )
427
- self.benchmarker_tasks.append(task_progress_state)
428
- self.benchmarker_tasks_progress.update(
429
- task_id,
430
- description=task_progress_state.description,
431
- visible=True,
432
- **task_progress_state.fields, # type: ignore[arg-type]
546
+ + ", "
547
+ + format_value_display(
548
+ value=self.queued_time,
549
+ label="Queued",
550
+ units="ms",
551
+ total_characters=18,
552
+ digits_places=5,
553
+ decimal_places=0,
433
554
  )
434
-
435
- self.progress_task = self.benchmarker_progress.add_task(
436
- "",
437
- total=len(self.benchmarker_tasks) * 1000,
438
- completed_benchmarks=0,
439
- total_benchmarks=len(self.benchmarker_tasks),
440
- )
441
-
442
- def handle_update(self, result: BenchmarkerResult):
443
- current_state: BTPS = self.benchmarker_tasks[result.current_index]
444
-
445
- if result.type_ == "scheduler_start":
446
- self.handle_update_scheduler_start(current_state, result)
447
- self.active_task = current_state.task_id
448
- elif result.type_ == "scheduler_update":
449
- self.handle_update_scheduler_update(current_state, result)
450
- elif result.type_ == "scheduler_complete":
451
- self.handle_update_scheduler_complete(current_state, result)
452
- elif result.type_ == "benchmark_compiled":
453
- self.handle_update_benchmark_compiled(current_state, result)
454
- else:
455
- raise ValueError(f"Unknown result type: {result.type_}")
456
-
457
- if self.progress_task is None:
458
- raise RuntimeError("Progress task not set.")
459
-
460
- self.benchmarker_tasks_progress.update(
461
- current_state.task_id,
462
- description=current_state.description,
463
- completed=current_state.completed,
464
- total=current_state.total,
465
- **current_state.fields, # type: ignore[arg-type]
466
- )
467
- self.benchmarker_progress.update(
468
- self.progress_task,
469
- completed=(result.current_index * 1000) + current_state.completed,
470
- total=1000 * len(self.benchmarker_tasks),
471
- completed_benchmarks=(
472
- result.current_index + (1 if current_state.ended else 0)
473
- ),
474
- total_benchmarks=len(self.benchmarker_tasks),
475
- )
476
-
477
- if current_state.ended:
478
- self.benchmarker_tasks_progress.stop_task(current_state.task_id)
479
- self.active_task = None
480
-
481
- def handle_update_scheduler_start(
482
- self, progress_state: BTPS, result: BenchmarkerResult
483
- ):
484
- if self.active_task is not None:
485
- raise RuntimeError("Active task already set.")
486
-
487
- progress_state.strategy = result.current_strategy # type: ignore[assignment]
488
- progress_state.started = True
489
- current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
490
- progress_state.start_time = (
491
- current_aggregator.requests_stats.totals.total.start_time
492
555
  )
493
- progress_state.max_number = current_aggregator.args.max_number
494
- progress_state.max_duration = current_aggregator.args.max_duration
495
-
496
- def handle_update_scheduler_update(
497
- self, progress_state: BTPS, result: BenchmarkerResult
498
- ):
499
- if self.active_task is None:
500
- raise RuntimeError("Active task not set.")
501
556
 
502
- if self.active_task != progress_state.task_id:
503
- raise RuntimeError("Active task does not match current task.")
557
+ def start(self, strategy: SchedulingStrategy):
558
+ self.strategy = strategy
559
+ self.strategy_type = strategy.type_
504
560
 
505
- current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
506
- progress_state.in_warmup = current_aggregator.in_warmup
507
- progress_state.in_cooldown = current_aggregator.in_cooldown
508
- progress_state.requests_rate = (
509
- current_aggregator.requests_stats.totals.successful.rate
510
- )
511
- progress_state.request_latency = (
512
- current_aggregator.requests_stats.request_time.mean
513
- )
514
- progress_state.requests_processing = (
515
- current_aggregator.scheduler_stats.processing_requests.last
516
- )
517
- progress_state.requests_successful = (
518
- current_aggregator.requests_stats.totals.successful.total
519
- )
520
- progress_state.requests_incomplete = (
521
- current_aggregator.requests_stats.totals.incomplete.total
522
- )
523
- progress_state.requests_errored = (
524
- current_aggregator.requests_stats.totals.errored.total
525
- )
526
- progress_state.worker_overheads_time_ms = (
527
- current_aggregator.requests_stats.scheduled_time_delay.mean_ms
528
- + current_aggregator.requests_stats.worker_start_delay.mean_ms
529
- )
530
- progress_state.backend_overheads_time_ms = (
531
- current_aggregator.requests_stats.request_time_delay.mean_ms
532
- )
533
- progress_state.requests_sleep_time_ms = (
534
- current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
535
- )
536
- progress_state.requests_targeted_start_time_delay_ms = (
537
- current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
538
- )
539
-
540
- def handle_update_scheduler_complete(
561
+ def update(
541
562
  self,
542
- progress_state: BTPS,
543
- result: BenchmarkerResult, # noqa: ARG002
544
- ):
545
- if self.active_task is None:
546
- raise RuntimeError("Active task not set.")
547
-
548
- if self.active_task != progress_state.task_id:
549
- raise RuntimeError("Active task does not match current task.")
550
-
551
- progress_state.in_warmup = False
552
- progress_state.in_cooldown = False
553
- progress_state.compiling = True
554
-
555
- def handle_update_benchmark_compiled(
556
- self, progress_state: BTPS, result: BenchmarkerResult
563
+ estimated_state: EstimatedBenchmarkState,
564
+ scheduler_state: SchedulerState,
557
565
  ):
558
- if self.active_task is None:
559
- raise RuntimeError("Active task not set.")
560
-
561
- if self.active_task != progress_state.task_id:
562
- raise RuntimeError("Active task does not match current task.")
563
-
564
- current_benchmark: Benchmark = result.current_benchmark # type: ignore[assignment]
565
- progress_state.compiling = False
566
- progress_state.ended = True
567
- progress_state.requests_rate = (
568
- current_benchmark.metrics.requests_per_second.successful.mean
569
- )
570
- progress_state.requests_processing = (
571
- current_benchmark.metrics.request_concurrency.successful.mean
566
+ self.progress = (
567
+ (1.0 - scheduler_state.remaining_fraction)
568
+ if scheduler_state.remaining_fraction is not None
569
+ else 0.0
570
+ )
571
+ self._update_processing_states(
572
+ benchmark_status=estimated_state.get_metric(
573
+ group=EstimatedBenchmarkState.benchmark_state_group,
574
+ key="status",
575
+ default=None,
576
+ ),
577
+ start_time=scheduler_state.start_time,
578
+ successful_requests=scheduler_state.successful_requests,
579
+ cancelled_requests=scheduler_state.cancelled_requests,
580
+ errored_requests=scheduler_state.errored_requests,
581
+ )
582
+ self._update_request_stats(
583
+ request_concurrency=estimated_state.get_metric(
584
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
585
+ key="concurrency_requests",
586
+ ),
587
+ requests_per_second=estimated_state.get_metric(
588
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
589
+ key="completed_requests_per_second",
590
+ ),
591
+ request_latency=estimated_state.get_metric(
592
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
593
+ key="completed_request_latency",
594
+ ),
572
595
  )
573
-
574
- def handle_end(self, result: BenchmarkerResult): # noqa: ARG002
575
- if self.progress_task is None:
576
- raise RuntimeError("Progress task not set.")
577
-
578
- self.benchmarker_progress.update(
579
- self.progress_task,
580
- completed=len(self.benchmarker_tasks) * 1000,
581
- total=len(self.benchmarker_tasks) * 1000,
582
- completed_benchmarks=len(self.benchmarker_tasks),
583
- total_benchmarks=len(self.benchmarker_tasks),
596
+ self._update_token_stats(
597
+ output_tokens=estimated_state.get_metric(
598
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
599
+ key="completed_output_tokens_total",
600
+ ),
601
+ output_tokens_rate=estimated_state.get_metric(
602
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
603
+ key="completed_output_tokens",
604
+ ),
605
+ prompt_tokens=estimated_state.get_metric(
606
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
607
+ key="completed_input_tokens_total",
608
+ ),
609
+ total_tokens_rate=estimated_state.get_metric(
610
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
611
+ key="completed_total_tokens",
612
+ ),
613
+ time_to_first_token=estimated_state.get_metric(
614
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
615
+ key="completed_time_to_first_token",
616
+ ),
617
+ inter_token_latency=estimated_state.get_metric(
618
+ group=EstimatedBenchmarkState.benchmark_metrics_group,
619
+ key="completed_inter_token_latency",
620
+ ),
584
621
  )
585
- self.benchmarker_progress.stop_task(self.progress_task)
586
- self.benchmarker_live.stop()
587
- self.active_task = None
588
- self.benchmarker_tasks = []
589
- self.progress_task = None
590
-
591
- def create_task_progress_columns(self) -> list[ProgressColumn]:
592
- columns = [
593
- TextColumn("[{task.fields[start_time]}]"),
594
- SpinnerColumn(style=Colors.PROGRESS),
595
- TaskProgressColumn(style=Colors.PROGRESS),
596
- TextColumn("{task.description}"),
597
- TextColumn("({task.fields[progress_status]})"),
598
- TextColumn(" "),
599
- ]
600
-
601
- if not self.display_scheduler_stats:
602
- columns += [
603
- TextColumn("{task.fields[requests_summary]}\n"),
604
- ]
605
- else:
606
- columns += [
607
- TextColumn(
608
- "{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
622
+ if estimated_state.get("updated_scheduler_stats"):
623
+ self._update_system_stats(
624
+ request_targeted_start_delay=estimated_state.get_metric(
625
+ group=EstimatedBenchmarkState.scheduler_state_group,
626
+ key="request_targeted_start_delay",
609
627
  ),
610
- ]
611
-
612
- return columns
628
+ queued_time=estimated_state.get_metric(
629
+ group=EstimatedBenchmarkState.scheduler_state_group,
630
+ key="queued_time",
631
+ ),
632
+ scheduler_overheads_time=0.0, # Need to add up metrics here
633
+ )
613
634
 
614
- def create_task_progress_state(
615
- self,
616
- task_id: TaskID,
617
- index: int, # noqa: ARG002
618
- strategy_type: StrategyType,
619
- result: BenchmarkerResult, # noqa: ARG002
620
- ) -> BTPS:
621
- return BenchmarkerTaskProgressState( # type: ignore[return-value]
622
- display_scheduler_stats=self.display_scheduler_stats,
623
- task_id=task_id,
624
- strategy=strategy_type,
635
+ def complete(self, benchmark: GenerativeBenchmark):
636
+ self._update_processing_states(
637
+ benchmark_status="completed",
638
+ start_time=benchmark.start_time,
639
+ successful_requests=benchmark.request_totals.successful,
640
+ cancelled_requests=benchmark.request_totals.incomplete,
641
+ errored_requests=benchmark.request_totals.errored,
642
+ )
643
+ self._update_request_stats(
644
+ request_concurrency=benchmark.metrics.request_concurrency.successful.mean,
645
+ requests_per_second=benchmark.metrics.requests_per_second.successful.mean,
646
+ request_latency=benchmark.metrics.request_latency.successful.mean,
647
+ )
648
+ self._update_token_stats(
649
+ output_tokens=benchmark.metrics.output_token_count.successful.mean,
650
+ output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean,
651
+ prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean,
652
+ total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean,
653
+ time_to_first_token=(
654
+ benchmark.metrics.time_to_first_token_ms.successful.mean
655
+ ),
656
+ inter_token_latency=(
657
+ benchmark.metrics.inter_token_latency_ms.successful.mean
658
+ ),
659
+ converted=True,
625
660
  )
626
661
 
627
-
628
- class GenerativeTextBenchmarkerProgressDisplay(
629
- BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState]
630
- ):
631
- def handle_update_scheduler_update(
662
+ def _update_processing_states(
632
663
  self,
633
- progress_state: GenerativeTextBenchmarkerTaskProgressState,
634
- result: BenchmarkerResult,
664
+ benchmark_status: Literal[
665
+ "pending", "in_warmup", "in_progress", "in_cooldown", "completed"
666
+ ],
667
+ start_time: float | None = None,
668
+ successful_requests: int | None = None,
669
+ cancelled_requests: int | None = None,
670
+ errored_requests: int | None = None,
635
671
  ):
636
- super().handle_update_scheduler_update(progress_state, result)
637
- current_aggregator: GenerativeBenchmarkAggregator = result.current_aggregator # type: ignore[assignment]
638
- progress_state.output_tokens = (
639
- current_aggregator.requests_stats.output_tokens.mean
640
- )
641
- progress_state.prompt_tokens = (
642
- current_aggregator.requests_stats.prompt_tokens.mean
643
- )
644
- progress_state.output_tokens_rate = (
645
- current_aggregator.requests_stats.output_tokens.rate
646
- )
647
- progress_state.total_tokens_rate = (
648
- current_aggregator.requests_stats.total_tokens.rate
649
- )
650
- progress_state.tokens_ttft = (
651
- current_aggregator.requests_stats.time_to_first_token.mean_ms
652
- )
653
- progress_state.tokens_itl = (
654
- current_aggregator.requests_stats.inter_token_latency.mean_ms
655
- )
656
-
657
- def handle_update_benchmark_compiled(
672
+ if benchmark_status is not None:
673
+ self.benchmark_status = benchmark_status
674
+ if start_time is not None:
675
+ self.start_time = start_time
676
+ if successful_requests is not None:
677
+ self.successful_requests = successful_requests
678
+ if cancelled_requests is not None:
679
+ self.cancelled_requests = cancelled_requests
680
+ if errored_requests is not None:
681
+ self.errored_requests = errored_requests
682
+
683
+ def _update_request_stats(
658
684
  self,
659
- progress_state: GenerativeTextBenchmarkerTaskProgressState,
660
- result: BenchmarkerResult,
685
+ request_concurrency: int | None = None,
686
+ requests_per_second: float | None = None,
687
+ request_latency: float | None = None,
661
688
  ):
662
- super().handle_update_benchmark_compiled(progress_state, result)
663
-
664
- current_benchmark: GenerativeBenchmark = result.current_benchmark # type: ignore[assignment]
665
- progress_state.request_latency = (
666
- current_benchmark.metrics.request_latency.successful.mean
667
- )
668
- progress_state.requests_successful = current_benchmark.request_totals.successful
669
- progress_state.requests_errored = current_benchmark.request_totals.errored
670
- progress_state.requests_incomplete = current_benchmark.request_totals.incomplete
671
- progress_state.output_tokens = (
672
- current_benchmark.metrics.output_token_count.successful.mean
673
- )
674
- progress_state.prompt_tokens = (
675
- current_benchmark.metrics.prompt_token_count.successful.mean
676
- )
677
- progress_state.output_tokens_rate = (
678
- current_benchmark.metrics.output_tokens_per_second.successful.mean
679
- )
680
- progress_state.total_tokens_rate = (
681
- current_benchmark.metrics.tokens_per_second.successful.mean
682
- )
683
- progress_state.tokens_ttft = (
684
- current_benchmark.metrics.time_to_first_token_ms.successful.mean
685
- )
686
- progress_state.tokens_itl = (
687
- current_benchmark.metrics.inter_token_latency_ms.successful.mean
688
- )
689
-
690
- def create_task_progress_state(
689
+ if request_concurrency is not None:
690
+ self.request_concurrency = request_concurrency
691
+ if requests_per_second is not None:
692
+ self.requests_per_second = requests_per_second
693
+ if request_latency is not None:
694
+ self.request_latency = request_latency
695
+
696
+ def _update_token_stats(
691
697
  self,
692
- task_id: TaskID,
693
- index: int, # noqa: ARG002
694
- strategy_type: StrategyType,
695
- result: BenchmarkerResult, # noqa: ARG002
696
- ) -> GenerativeTextBenchmarkerTaskProgressState:
697
- return GenerativeTextBenchmarkerTaskProgressState(
698
- display_scheduler_stats=self.display_scheduler_stats,
699
- task_id=task_id,
700
- strategy=strategy_type,
701
- )
702
-
703
- def create_task_progress_columns(self) -> list[ProgressColumn]:
704
- columns = super().create_task_progress_columns()
705
- columns = columns[:-1] # remove the last display info column
706
-
707
- if not self.display_scheduler_stats:
708
- columns += [
709
- TextColumn(
710
- "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}",
711
- ),
712
- ]
713
- else:
714
- columns += [
715
- TextColumn(
716
- "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
717
- ),
718
- ]
698
+ output_tokens: int | None = None,
699
+ output_tokens_rate: float | None = None,
700
+ prompt_tokens: int | None = None,
701
+ total_tokens_rate: float | None = None,
702
+ time_to_first_token: float | None = None,
703
+ inter_token_latency: float | None = None,
704
+ converted: bool = False,
705
+ ):
706
+ if output_tokens is not None:
707
+ self.output_tokens = output_tokens
708
+ if output_tokens_rate is not None:
709
+ self.output_tokens_rate = output_tokens_rate
710
+ if prompt_tokens is not None:
711
+ self.prompt_tokens = prompt_tokens
712
+ if total_tokens_rate is not None:
713
+ self.total_tokens_rate = total_tokens_rate
714
+ if time_to_first_token is not None:
715
+ self.time_to_first_token = time_to_first_token * (
716
+ 1000 if not converted else 1
717
+ )
718
+ if inter_token_latency is not None:
719
+ self.inter_token_latency = inter_token_latency * (
720
+ 1000 if not converted else 1
721
+ )
719
722
 
720
- return columns
723
+ def _update_system_stats(
724
+ self,
725
+ request_targeted_start_delay: float | None = None,
726
+ queued_time: float | None = None,
727
+ scheduler_overheads_time: float | None = None,
728
+ converted: bool = False,
729
+ ):
730
+ if request_targeted_start_delay is not None:
731
+ self.request_targeted_start_delay = request_targeted_start_delay * (
732
+ 1000 if not converted else 1
733
+ )
734
+ if queued_time is not None:
735
+ self.queued_time = queued_time * (1000 if not converted else 1)
736
+ if scheduler_overheads_time is not None:
737
+ self.scheduler_overheads_time = scheduler_overheads_time * (
738
+ 1000 if not converted else 1
739
+ )