guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -1,698 +1,455 @@
1
+ from __future__ import annotations
2
+
1
3
  import csv
2
4
  import json
3
5
  import math
6
+ from abc import ABC, abstractmethod
4
7
  from collections import OrderedDict
5
8
  from copy import deepcopy
6
9
  from datetime import datetime
7
10
  from pathlib import Path
8
- from typing import Any, Literal, Optional, Union
11
+ from typing import Any, ClassVar
9
12
 
10
- import yaml
11
- from pydantic import Field
13
+ from pydantic import BaseModel, ConfigDict, Field
12
14
  from rich.console import Console
13
15
  from rich.padding import Padding
14
16
  from rich.text import Text
15
17
 
16
- from guidellm.benchmark.benchmark import GenerativeBenchmark, GenerativeMetrics
17
18
  from guidellm.benchmark.profile import (
18
19
  AsyncProfile,
19
20
  ConcurrentProfile,
20
21
  SweepProfile,
21
22
  ThroughputProfile,
22
23
  )
23
- from guidellm.config import settings
24
- from guidellm.objects import (
25
- DistributionSummary,
26
- StandardBaseModel,
27
- StatusDistributionSummary,
24
+ from guidellm.benchmark.schemas import (
25
+ GenerativeBenchmark,
26
+ GenerativeBenchmarksReport,
27
+ GenerativeMetrics,
28
28
  )
29
29
  from guidellm.presentation import UIDataBuilder
30
30
  from guidellm.presentation.injector import create_report
31
- from guidellm.scheduler import strategy_display_str
32
- from guidellm.utils import Colors, split_text_list_by_length
33
- from guidellm.utils.dict import recursive_key_update
34
- from guidellm.utils.text import camelize_str
31
+ from guidellm.settings import settings
32
+ from guidellm.utils import (
33
+ Colors,
34
+ DistributionSummary,
35
+ RegistryMixin,
36
+ StatusDistributionSummary,
37
+ camelize_str,
38
+ recursive_key_update,
39
+ safe_format_timestamp,
40
+ split_text_list_by_length,
41
+ )
35
42
 
36
43
  __all__ = [
37
- "GenerativeBenchmarksConsole",
38
- "GenerativeBenchmarksReport",
44
+ "GenerativeBenchmarkerCSV",
45
+ "GenerativeBenchmarkerConsole",
46
+ "GenerativeBenchmarkerHTML",
47
+ "GenerativeBenchmarkerOutput",
39
48
  ]
40
49
 
41
50
 
42
- class GenerativeBenchmarksReport(StandardBaseModel):
43
- """
44
- A pydantic model representing a completed benchmark report.
45
- Contains a list of benchmarks along with convenience methods for finalizing
46
- and saving the report.
47
- """
48
-
49
- @staticmethod
50
- def load_file(path: Union[str, Path]) -> "GenerativeBenchmarksReport":
51
- """
52
- Load a report from a file. The file type is determined by the file extension.
53
- If the file is a directory, it expects a file named benchmarks.json under the
54
- directory.
55
-
56
- :param path: The path to load the report from.
57
- :return: The loaded report.
58
- """
59
- path, type_ = GenerativeBenchmarksReport._file_setup(path)
60
-
61
- if type_ == "json":
62
- with path.open("r") as file:
63
- model_dict = json.load(file)
64
-
65
- return GenerativeBenchmarksReport.model_validate(model_dict)
66
-
67
- if type_ == "yaml":
68
- with path.open("r") as file:
69
- model_dict = yaml.safe_load(file)
70
-
71
- return GenerativeBenchmarksReport.model_validate(model_dict)
72
-
73
- if type_ == "csv":
74
- raise ValueError(f"CSV file type is not supported for loading: {path}.")
75
-
76
- if type_ == "html":
77
- raise ValueError(f"HTML file type is not supported for loading: {path}.")
78
-
79
- raise ValueError(f"Unsupported file type: {type_} for {path}.")
80
-
81
- benchmarks: list[GenerativeBenchmark] = Field(
82
- description="The list of completed benchmarks contained within the report.",
83
- default_factory=list,
51
+ class GenerativeBenchmarkerOutput(
52
+ BaseModel, RegistryMixin[type["GenerativeBenchmarkerOutput"]], ABC
53
+ ):
54
+ model_config = ConfigDict(
55
+ extra="ignore",
56
+ arbitrary_types_allowed=True,
57
+ validate_assignment=True,
58
+ from_attributes=True,
59
+ use_enum_values=True,
84
60
  )
85
61
 
86
- def set_sample_size(
87
- self, sample_size: Optional[int]
88
- ) -> "GenerativeBenchmarksReport":
62
+ @classmethod
63
+ @abstractmethod
64
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
89
65
  """
90
- Set the sample size for each benchmark in the report. In doing this, it will
91
- reduce the contained requests of each benchmark to the sample size.
92
- If sample size is None, it will return the report as is.
66
+ Validate and process arguments for constraint creation.
93
67
 
94
- :param sample_size: The sample size to set for each benchmark.
95
- If None, the report will be returned as is.
96
- :return: The report with the sample size set for each benchmark.
97
- """
68
+ Must be implemented by subclasses to handle their specific parameter patterns.
98
69
 
99
- if sample_size is not None:
100
- for benchmark in self.benchmarks:
101
- benchmark.set_sample_size(sample_size)
102
-
103
- return self
104
-
105
- def save_file(self, path: Union[str, Path]) -> Path:
106
- """
107
- Save the report to a file. The file type is determined by the file extension.
108
- If the file is a directory, it will save the report to a file named
109
- benchmarks.json under the directory.
110
-
111
- :param path: The path to save the report to.
112
- :return: The path to the saved report.
113
- """
114
- path, type_ = GenerativeBenchmarksReport._file_setup(path)
115
-
116
- if type_ == "json":
117
- return self.save_json(path)
118
-
119
- if type_ == "yaml":
120
- return self.save_yaml(path)
121
-
122
- if type_ == "csv":
123
- return self.save_csv(path)
124
-
125
- if type_ == "html":
126
- return self.save_html(path)
127
-
128
- raise ValueError(f"Unsupported file type: {type_} for {path}.")
129
-
130
- def save_json(self, path: Union[str, Path]) -> Path:
70
+ :param args: Positional arguments passed to the constraint
71
+ :param kwargs: Keyword arguments passed to the constraint
72
+ :return: Validated dictionary of parameters for constraint creation
73
+ :raises NotImplementedError: Must be implemented by subclasses
131
74
  """
132
- Save the report to a JSON file containing all of the report data which is
133
- reloadable using the pydantic model. If the file is a directory, it will save
134
- the report to a file named benchmarks.json under the directory.
75
+ ...
135
76
 
136
- :param path: The path to save the report to.
137
- :return: The path to the saved report.
138
- """
139
- path, type_ = GenerativeBenchmarksReport._file_setup(path, "json")
140
-
141
- if type_ != "json":
142
- raise ValueError(
143
- f"Unsupported file type for saving a JSON: {type_} for {path}."
77
+ @classmethod
78
+ def resolve(
79
+ cls,
80
+ output_formats: (
81
+ tuple[str, ...]
82
+ | list[str]
83
+ | dict[
84
+ str,
85
+ Any | dict[str, Any] | GenerativeBenchmarkerOutput,
86
+ ]
87
+ | None
88
+ ),
89
+ output_path: str | Path | None,
90
+ ) -> dict[str, GenerativeBenchmarkerOutput]:
91
+ if not output_formats:
92
+ return {}
93
+
94
+ if isinstance(output_formats, list | tuple):
95
+ # support list of output keys: ["csv", "json"]
96
+ # support list of files: ["path/to/file.json", "path/to/file.csv"]
97
+ formats_list = output_formats
98
+ output_formats = {}
99
+ for output_format in formats_list:
100
+ if not isinstance(output_format, str):
101
+ raise TypeError(
102
+ f"Expected string format, got {type(output_format)} for "
103
+ f"{output_format} in {formats_list}"
104
+ )
105
+ try:
106
+ if cls.is_registered(output_format):
107
+ output_formats[output_format] = {}
108
+ else:
109
+ # treat it as a file save location
110
+ path = Path(output_format)
111
+ format_type = path.suffix[1:].lower()
112
+ output_formats[format_type] = {"output_path": path}
113
+
114
+ except Exception as err:
115
+ raise ValueError(
116
+ f"Failed to resolve output format '{output_format}': {err}"
117
+ ) from err
118
+
119
+ resolved = {}
120
+
121
+ for key, val in output_formats.items():
122
+ if isinstance(val, GenerativeBenchmarkerOutput):
123
+ resolved[key] = val
124
+ else:
125
+ output_class = cls.get_registered_object(key)
126
+ kwargs = {"output_path": output_path}
127
+
128
+ if isinstance(val, dict):
129
+ kwargs.update(val)
130
+ kwargs = output_class.validated_kwargs(**kwargs)
131
+ else:
132
+ kwargs = output_class.validated_kwargs(val, **kwargs)
133
+
134
+ resolved[key] = output_class(**kwargs)
135
+
136
+ return resolved
137
+
138
+ @abstractmethod
139
+ async def finalize(self, report: GenerativeBenchmarksReport) -> Any: ...
140
+
141
+
142
+ @GenerativeBenchmarkerOutput.register(["json", "yaml"])
143
+ class GenerativeBenchmarkerSerialized(GenerativeBenchmarkerOutput):
144
+ @classmethod
145
+ def validated_kwargs(
146
+ cls, output_path: str | Path | None, **_kwargs
147
+ ) -> dict[str, Any]:
148
+ new_kwargs = {}
149
+ if output_path is not None:
150
+ new_kwargs["output_path"] = (
151
+ Path(output_path) if not isinstance(output_path, Path) else output_path
144
152
  )
153
+ return new_kwargs
145
154
 
146
- model_dict = self.model_dump()
147
-
148
- with path.open("w", encoding="utf-8") as file:
149
- json.dump(model_dict, file, ensure_ascii=False, indent=4)
150
-
151
- return path
155
+ output_path: Path = Field(default_factory=lambda: Path.cwd())
152
156
 
153
- def save_yaml(self, path: Union[str, Path]) -> Path:
154
- """
155
- Save the report to a YAML file containing all of the report data which is
156
- reloadable using the pydantic model. If the file is a directory, it will save
157
- the report to a file named benchmarks.yaml under the directory.
158
-
159
- :param path: The path to save the report to.
160
- :return: The path to the saved report.
161
- """
157
+ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
158
+ return report.save_file(self.output_path)
162
159
 
163
- path, type_ = GenerativeBenchmarksReport._file_setup(path, "yaml")
164
-
165
- if type_ != "yaml":
166
- raise ValueError(
167
- f"Unsupported file type for saving a YAML: {type_} for {path}."
168
- )
169
160
 
170
- model_dict = self.model_dump()
171
- model_yaml = yaml.dump(model_dict)
161
+ @GenerativeBenchmarkerOutput.register("console")
162
+ class GenerativeBenchmarkerConsole(GenerativeBenchmarkerOutput):
163
+ """Console output formatter for benchmark results with rich formatting."""
172
164
 
173
- with path.open("w") as file:
174
- file.write(model_yaml)
165
+ @classmethod
166
+ def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
167
+ return {}
175
168
 
176
- return path
169
+ console: Console = Field(default_factory=Console)
177
170
 
178
- def save_csv(self, path: Union[str, Path]) -> Path:
171
+ async def finalize(self, report: GenerativeBenchmarksReport) -> str:
179
172
  """
180
- Save the report to a CSV file containing the summarized statistics and values
181
- for each report. Note, this data is not reloadable using the pydantic model.
182
- If the file is a directory, it will save the report to a file named
183
- benchmarks.csv under the directory.
173
+ Print the complete benchmark report to the console.
184
174
 
185
- :param path: The path to save the report to.
186
- :return: The path to the saved report.
175
+ :param report: The completed benchmark report.
176
+ :return:
187
177
  """
188
- path, type_ = GenerativeBenchmarksReport._file_setup(path, "csv")
178
+ self._print_benchmarks_metadata(report.benchmarks)
179
+ self._print_benchmarks_info(report.benchmarks)
180
+ self._print_benchmarks_stats(report.benchmarks)
189
181
 
190
- if type_ != "csv":
191
- raise ValueError(
192
- f"Unsupported file type for saving a CSV: {type_} for {path}."
193
- )
182
+ return "printed to console"
194
183
 
195
- with path.open("w", newline="") as file:
196
- writer = csv.writer(file)
197
- headers: list[str] = []
198
- rows: list[list[Union[str, float, list[float]]]] = []
199
-
200
- for benchmark in self.benchmarks:
201
- benchmark_headers: list[str] = []
202
- benchmark_values: list[Union[str, float, list[float]]] = []
203
-
204
- desc_headers, desc_values = self._benchmark_desc_headers_and_values(
205
- benchmark
206
- )
207
- benchmark_headers += desc_headers
208
- benchmark_values += desc_values
209
-
210
- for status in StatusDistributionSummary.model_fields:
211
- status_headers, status_values = (
212
- self._benchmark_status_headers_and_values(benchmark, status)
213
- )
214
- benchmark_headers += status_headers
215
- benchmark_values += status_values
216
-
217
- benchmark_extra_headers, benchmark_extra_values = (
218
- self._benchmark_extras_headers_and_values(benchmark)
219
- )
220
- benchmark_headers += benchmark_extra_headers
221
- benchmark_values += benchmark_extra_values
222
-
223
- if not headers:
224
- headers = benchmark_headers
225
- rows.append(benchmark_values)
226
-
227
- writer.writerow(headers)
228
- for row in rows:
229
- writer.writerow(row)
230
-
231
- return path
232
-
233
- def save_html(self, path: Union[str, Path]) -> Path:
234
- """
235
- Download html, inject report data and save to a file.
236
-
237
- :param path: The path to create the report at.
238
- :return: The path to the report.
239
- """
240
-
241
- data_builder = UIDataBuilder(self.benchmarks)
242
- data = data_builder.to_dict()
243
- camel_data = recursive_key_update(deepcopy(data), camelize_str)
244
- ui_api_data = {}
245
- for k, v in camel_data.items():
246
- key = f"window.{k} = {{}};"
247
- value = f"window.{k} = {json.dumps(v, indent=2)};\n"
248
- ui_api_data[key] = value
249
- return create_report(ui_api_data, path)
250
-
251
- @staticmethod
252
- def _file_setup(
253
- path: Union[str, Path],
254
- default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
255
- ) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
256
- path = Path(path) if not isinstance(path, Path) else path
257
-
258
- if path.is_dir():
259
- path = path / f"benchmarks.{default_file_type}"
260
-
261
- path.parent.mkdir(parents=True, exist_ok=True)
262
- path_suffix = path.suffix.lower()
263
-
264
- if path_suffix == ".json":
265
- return path, "json"
266
-
267
- if path_suffix in [".yaml", ".yml"]:
268
- return path, "yaml"
269
-
270
- if path_suffix in [".csv"]:
271
- return path, "csv"
184
+ def _print_benchmarks_metadata(self, benchmarks: list[GenerativeBenchmark]):
185
+ start_time = benchmarks[0].run_stats.start_time
186
+ end_time = benchmarks[-1].run_stats.end_time
187
+ duration = end_time - start_time
272
188
 
273
- if path_suffix in [".html"]:
274
- return path, "html"
189
+ self._print_section_header("Benchmarks Metadata")
190
+ self._print_labeled_line("Run id", str(benchmarks[0].run_id))
191
+ self._print_labeled_line("Duration", f"{duration:.1f} seconds")
192
+ self._print_labeled_line("Profile", self._get_profile_str(benchmarks[0]))
275
193
 
276
- raise ValueError(
277
- f"Unsupported file extension: {path_suffix} for {path}; "
278
- "expected json, yaml, csv, or html."
279
- )
280
-
281
- @staticmethod
282
- def _benchmark_desc_headers_and_values(
283
- benchmark: GenerativeBenchmark,
284
- ) -> tuple[list[str], list[Union[str, float]]]:
194
+ def _print_benchmarks_info(self, benchmarks: list[GenerativeBenchmark]):
195
+ sections = {
196
+ "Metadata": (0, 3),
197
+ "Requests Made": (4, 6),
198
+ "Prompt Tok/Req": (7, 9),
199
+ "Output Tok/Req": (10, 12),
200
+ "Prompt Tok Total": (13, 15),
201
+ "Output Tok Total": (16, 18),
202
+ }
285
203
  headers = [
286
- "Type",
287
- "Run Id",
288
- "Id",
289
- "Name",
204
+ "Benchmark",
290
205
  "Start Time",
291
206
  "End Time",
292
- "Duration",
293
- ]
294
- values: list[Union[str, float]] = [
295
- benchmark.type_,
296
- benchmark.run_id,
297
- benchmark.id_,
298
- strategy_display_str(benchmark.args.strategy),
299
- datetime.fromtimestamp(benchmark.start_time).strftime("%Y-%m-%d %H:%M:%S"),
300
- datetime.fromtimestamp(benchmark.end_time).strftime("%Y-%m-%d %H:%M:%S"),
301
- benchmark.duration,
302
- ]
303
-
304
- if len(headers) != len(values):
305
- raise ValueError("Headers and values length mismatch.")
306
-
307
- return headers, values
308
-
309
- @staticmethod
310
- def _benchmark_extras_headers_and_values(
311
- benchmark: GenerativeBenchmark,
312
- ) -> tuple[list[str], list[str]]:
313
- headers = ["Args", "Worker", "Request Loader", "Extras"]
314
- values: list[str] = [
315
- json.dumps(benchmark.args.model_dump()),
316
- json.dumps(benchmark.worker.model_dump()),
317
- json.dumps(benchmark.request_loader.model_dump()),
318
- json.dumps(benchmark.extras),
319
- ]
320
-
321
- if len(headers) != len(values):
322
- raise ValueError("Headers and values length mismatch.")
323
-
324
- return headers, values
325
-
326
- @staticmethod
327
- def _benchmark_status_headers_and_values(
328
- benchmark: GenerativeBenchmark, status: str
329
- ) -> tuple[list[str], list[Union[float, list[float]]]]:
330
- headers = [
331
- f"{status.capitalize()} Requests",
332
- ]
333
- values = [
334
- getattr(benchmark.request_totals, status),
207
+ "Duration (s)",
208
+ "Comp",
209
+ "Inc",
210
+ "Err",
211
+ "Comp",
212
+ "Inc",
213
+ "Err",
214
+ "Comp",
215
+ "Inc",
216
+ "Err",
217
+ "Comp",
218
+ "Inc",
219
+ "Err",
220
+ "Comp",
221
+ "Inc",
222
+ "Err",
335
223
  ]
336
224
 
337
- for metric in GenerativeMetrics.model_fields:
338
- metric_headers, metric_values = (
339
- GenerativeBenchmarksReport._benchmark_status_metrics_stats(
340
- benchmark, status, metric
341
- )
225
+ rows = []
226
+ for benchmark in benchmarks:
227
+ rows.append(
228
+ [
229
+ str(benchmark.scheduler.strategy),
230
+ safe_format_timestamp(benchmark.start_time),
231
+ safe_format_timestamp(benchmark.end_time),
232
+ f"{(benchmark.end_time - benchmark.start_time):.1f}",
233
+ f"{benchmark.request_totals.successful:.0f}",
234
+ f"{benchmark.request_totals.incomplete:.0f}",
235
+ f"{benchmark.request_totals.errored:.0f}",
236
+ f"{benchmark.metrics.prompt_token_count.successful.mean:.1f}",
237
+ f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f}",
238
+ f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}",
239
+ f"{benchmark.metrics.output_token_count.successful.mean:.1f}",
240
+ f"{benchmark.metrics.output_token_count.incomplete.mean:.1f}",
241
+ f"{benchmark.metrics.output_token_count.errored.mean:.1f}",
242
+ f"{benchmark.metrics.prompt_token_count.successful.total_sum:.0f}",
243
+ f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f}",
244
+ f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}",
245
+ f"{benchmark.metrics.output_token_count.successful.total_sum:.0f}",
246
+ f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f}",
247
+ f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}",
248
+ ]
342
249
  )
343
- headers += metric_headers
344
- values += metric_values
345
250
 
346
- if len(headers) != len(values):
347
- raise ValueError("Headers and values length mismatch.")
251
+ self._print_table(headers, rows, "Benchmarks Info", sections)
348
252
 
349
- return headers, values
350
-
351
- @staticmethod
352
- def _benchmark_status_metrics_stats(
353
- benchmark: GenerativeBenchmark,
354
- status: str,
355
- metric: str,
356
- ) -> tuple[list[str], list[Union[float, list[float]]]]:
357
- status_display = status.capitalize()
358
- metric_display = metric.replace("_", " ").capitalize()
359
- status_dist_summary: StatusDistributionSummary = getattr(
360
- benchmark.metrics, metric
361
- )
362
- dist_summary: DistributionSummary = getattr(status_dist_summary, status)
253
+ def _print_benchmarks_stats(self, benchmarks: list[GenerativeBenchmark]):
254
+ sections = {
255
+ "Metadata": (0, 0),
256
+ "Request Stats": (1, 2),
257
+ "Out Tok/sec": (3, 3),
258
+ "Tot Tok/sec": (4, 4),
259
+ "Req Latency (sec)": (5, 7),
260
+ "TTFT (ms)": (8, 10),
261
+ "ITL (ms)": (11, 13),
262
+ "TPOT (ms)": (14, 16),
263
+ }
363
264
  headers = [
364
- f"{status_display} {metric_display} mean",
365
- f"{status_display} {metric_display} median",
366
- f"{status_display} {metric_display} std dev",
367
- (
368
- f"{status_display} {metric_display} "
369
- "[min, 0.1, 1, 5, 10, 25, 75, 90, 95, 99, max]"
370
- ),
371
- ]
372
- values: list[Union[float, list[float]]] = [
373
- dist_summary.mean,
374
- dist_summary.median,
375
- dist_summary.std_dev,
376
- [
377
- dist_summary.min,
378
- dist_summary.percentiles.p001,
379
- dist_summary.percentiles.p01,
380
- dist_summary.percentiles.p05,
381
- dist_summary.percentiles.p10,
382
- dist_summary.percentiles.p25,
383
- dist_summary.percentiles.p75,
384
- dist_summary.percentiles.p90,
385
- dist_summary.percentiles.p95,
386
- dist_summary.percentiles.p99,
387
- dist_summary.max,
388
- ],
265
+ "Benchmark",
266
+ "Per Second",
267
+ "Concurrency",
268
+ "mean",
269
+ "mean",
270
+ "mean",
271
+ "median",
272
+ "p99",
273
+ "mean",
274
+ "median",
275
+ "p99",
276
+ "mean",
277
+ "median",
278
+ "p99",
279
+ "mean",
280
+ "median",
281
+ "p99",
389
282
  ]
390
283
 
391
- if len(headers) != len(values):
392
- raise ValueError("Headers and values length mismatch.")
393
-
394
- return headers, values
395
-
396
-
397
- class GenerativeBenchmarksConsole:
398
- """
399
- A class for outputting progress and benchmark results to the console.
400
- Utilizes the rich library for formatting, enabling colored and styled output.
401
- """
402
-
403
- def __init__(self, enabled: bool = True):
404
- """
405
- :param enabled: Whether to enable console output. Defaults to True.
406
- If False, all console output will be suppressed.
407
- """
408
- self.enabled = enabled
409
- self.benchmarks: Optional[list[GenerativeBenchmark]] = None
410
- self.console = Console()
284
+ rows = []
285
+ for benchmark in benchmarks:
286
+ rows.append(
287
+ [
288
+ str(benchmark.scheduler.strategy),
289
+ f"{benchmark.metrics.requests_per_second.successful.mean:.2f}",
290
+ f"{benchmark.metrics.request_concurrency.successful.mean:.2f}",
291
+ f"{benchmark.metrics.output_tokens_per_second.successful.mean:.1f}",
292
+ f"{benchmark.metrics.tokens_per_second.successful.mean:.1f}",
293
+ f"{benchmark.metrics.request_latency.successful.mean:.2f}",
294
+ f"{benchmark.metrics.request_latency.successful.median:.2f}",
295
+ f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}",
296
+ f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f}",
297
+ f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f}",
298
+ f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}",
299
+ f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f}",
300
+ f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f}",
301
+ f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}",
302
+ f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f}",
303
+ f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f}",
304
+ f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}",
305
+ ]
306
+ )
411
307
 
412
- @property
413
- def benchmarks_profile_str(self) -> str:
414
- """
415
- :return: A string representation of the profile used for the benchmarks.
416
- """
417
- profile = self.benchmarks[0].args.profile if self.benchmarks else None
308
+ self._print_table(headers, rows, "Benchmarks Stats", sections)
418
309
 
310
+ def _get_profile_str(self, benchmark: GenerativeBenchmark) -> str:
311
+ profile = benchmark.benchmarker.profile
419
312
  if profile is None:
420
313
  return "None"
421
314
 
422
- profile_args = OrderedDict(
423
- {
424
- "type": profile.type_,
425
- "strategies": profile.strategy_types,
426
- }
427
- )
428
-
429
- if isinstance(profile, ConcurrentProfile):
430
- profile_args["streams"] = str(profile.streams)
431
- elif isinstance(profile, ThroughputProfile):
432
- profile_args["max_concurrency"] = str(profile.max_concurrency)
433
- elif isinstance(profile, AsyncProfile):
434
- profile_args["max_concurrency"] = str(profile.max_concurrency)
435
- profile_args["rate"] = str(profile.rate)
436
- profile_args["initial_burst"] = str(profile.initial_burst)
437
- elif isinstance(profile, SweepProfile):
438
- profile_args["sweep_size"] = str(profile.sweep_size)
439
-
440
- return ", ".join(f"{key}={value}" for key, value in profile_args.items())
441
-
442
- @property
443
- def benchmarks_args_str(self) -> str:
444
- """
445
- :return: A string representation of the arguments used for the benchmarks.
446
- """
447
- args = self.benchmarks[0].args if self.benchmarks else None
448
-
449
- if args is None:
450
- return "None"
451
-
452
- args_dict = OrderedDict(
453
- {
454
- "max_number": args.max_number,
455
- "max_duration": args.max_duration,
456
- "warmup_number": args.warmup_number,
457
- "warmup_duration": args.warmup_duration,
458
- "cooldown_number": args.cooldown_number,
459
- "cooldown_duration": args.cooldown_duration,
460
- }
461
- )
462
-
463
- return ", ".join(f"{key}={value}" for key, value in args_dict.items())
464
-
465
- @property
466
- def benchmarks_worker_desc_str(self) -> str:
467
- """
468
- :return: A string representation of the worker used for the benchmarks.
469
- """
470
- return str(self.benchmarks[0].worker) if self.benchmarks else "None"
471
-
472
- @property
473
- def benchmarks_request_loader_desc_str(self) -> str:
474
- """
475
- :return: A string representation of the request loader used for the benchmarks.
476
- """
477
- return str(self.benchmarks[0].request_loader) if self.benchmarks else "None"
478
-
479
- @property
480
- def benchmarks_extras_str(self) -> str:
481
- """
482
- :return: A string representation of the extras used for the benchmarks.
483
- """
484
- extras = self.benchmarks[0].extras if self.benchmarks else None
315
+ profile_args = OrderedDict(
316
+ {
317
+ "type": profile.type_,
318
+ "strategies": getattr(profile, "strategy_types", []),
319
+ }
320
+ )
485
321
 
486
- if not extras:
487
- return "None"
322
+ if isinstance(profile, ConcurrentProfile):
323
+ profile_args["streams"] = str(profile.streams)
324
+ elif isinstance(profile, ThroughputProfile):
325
+ profile_args["max_concurrency"] = str(profile.max_concurrency)
326
+ elif isinstance(profile, AsyncProfile):
327
+ profile_args["max_concurrency"] = str(profile.max_concurrency)
328
+ profile_args["rate"] = str(profile.rate)
329
+ elif isinstance(profile, SweepProfile):
330
+ profile_args["sweep_size"] = str(profile.sweep_size)
488
331
 
489
- return ", ".join(f"{key}={value}" for key, value in extras.items())
332
+ return ", ".join(f"{key}={value}" for key, value in profile_args.items())
490
333
 
491
- def print_section_header(self, title: str, indent: int = 0, new_lines: int = 2):
492
- """
493
- Print out a styled section header to the console.
494
- The title is underlined, bolded, and colored with the INFO color.
495
-
496
- :param title: The title of the section.
497
- :param indent: The number of spaces to indent the title.
498
- Defaults to 0.
499
- :param new_lines: The number of new lines to print before the title.
500
- Defaults to 2.
501
- """
502
- self.print_line(
503
- value=f"{title}:",
504
- style=f"bold underline {Colors.INFO}",
334
+ def _print_section_header(self, title: str, indent: int = 0, new_lines: int = 2):
335
+ self._print_line(
336
+ f"{title}:",
337
+ f"bold underline {Colors.info}",
505
338
  indent=indent,
506
339
  new_lines=new_lines,
507
340
  )
508
341
 
509
- def print_labeled_line(
342
+ def _print_labeled_line(
510
343
  self, label: str, value: str, indent: int = 4, new_lines: int = 0
511
344
  ):
512
- """
513
- Print out a styled, labeled line (label: value) to the console.
514
- The label is bolded and colored with the INFO color,
515
- and the value is italicized.
516
-
517
- :param label: The label of the line.
518
- :param value: The value of the line.
519
- :param indent: The number of spaces to indent the line.
520
- Defaults to 4.
521
- :param new_lines: The number of new lines to print before the line.
522
- Defaults to 0.
523
- """
524
- self.print_line(
525
- value=[label + ":", value],
526
- style=["bold " + Colors.INFO, "italic"],
345
+ self._print_line(
346
+ [label + ":", value],
347
+ ["bold " + Colors.info, "italic"],
527
348
  new_lines=new_lines,
528
349
  indent=indent,
529
350
  )
530
351
 
531
- def print_line(
352
+ def _print_line(
532
353
  self,
533
- value: Union[str, list[str]],
534
- style: Union[str, list[str]] = "",
354
+ value: str | list[str],
355
+ style: str | list[str] = "",
535
356
  indent: int = 0,
536
357
  new_lines: int = 0,
537
358
  ):
538
- """
539
- Print out a a value to the console as a line with optional indentation.
540
-
541
- :param value: The value to print.
542
- :param style: The style to apply to the value.
543
- Defaults to none.
544
- :param indent: The number of spaces to indent the line.
545
- Defaults to 0.
546
- :param new_lines: The number of new lines to print before the value.
547
- Defaults to 0.
548
- """
549
- if not self.enabled:
550
- return
551
-
552
359
  text = Text()
553
-
554
360
  for _ in range(new_lines):
555
361
  text.append("\n")
556
362
 
557
363
  if not isinstance(value, list):
558
364
  value = [value]
559
-
560
365
  if not isinstance(style, list):
561
366
  style = [style for _ in range(len(value))]
562
367
 
563
368
  if len(value) != len(style):
564
369
  raise ValueError(
565
- f"Value and style length mismatch. Value length: {len(value)}, "
566
- f"Style length: {len(style)}."
370
+ f"Value and style length mismatch: {len(value)} vs {len(style)}"
567
371
  )
568
372
 
569
- for val, sty in zip(value, style):
373
+ for val, sty in zip(value, style, strict=False):
570
374
  text.append(val, style=sty)
571
375
 
572
376
  self.console.print(Padding.indent(text, indent))
573
377
 
574
- def print_table(
378
+ def _print_table(
575
379
  self,
576
380
  headers: list[str],
577
381
  rows: list[list[Any]],
578
382
  title: str,
579
- sections: Optional[dict[str, tuple[int, int]]] = None,
580
- max_char_per_col: int = 2**10,
383
+ sections: dict[str, tuple[int, int]] | None = None,
384
+ max_char_per_col: int = 1024,
581
385
  indent: int = 0,
582
386
  new_lines: int = 2,
583
387
  ):
584
- """
585
- Print a table to the console with the given headers and rows.
586
-
587
- :param headers: The headers of the table.
588
- :param rows: The rows of the table.
589
- :param title: The title of the table.
590
- :param sections: The sections of the table grouping columns together.
591
- This is a mapping of the section display name to a tuple of the start and
592
- end column indices. If None, no sections are added (default).
593
- :param max_char_per_col: The maximum number of characters per column.
594
- :param indent: The number of spaces to indent the table.
595
- Defaults to 0.
596
- :param new_lines: The number of new lines to print before the table.
597
- Defaults to 0.
598
- """
599
-
600
388
  if rows and any(len(row) != len(headers) for row in rows):
601
389
  raise ValueError(
602
- f"Headers and rows length mismatch. Headers length: {len(headers)}, "
603
- f"Row length: {len(rows[0]) if rows else 'N/A'}."
390
+ "Headers and rows length mismatch: "
391
+ f"{len(headers)} vs {len(rows[0]) if rows else 'N/A'}"
604
392
  )
605
393
 
606
- max_characters_per_column = self.calculate_max_chars_per_column(
394
+ max_chars_per_column = self._calculate_max_chars_per_column(
607
395
  headers, rows, sections, max_char_per_col
608
396
  )
609
397
 
610
- self.print_section_header(title, indent=indent, new_lines=new_lines)
611
- self.print_table_divider(
612
- max_characters_per_column, include_separators=False, indent=indent
613
- )
398
+ self._print_section_header(title, indent=indent, new_lines=new_lines)
399
+ self._print_table_divider(max_chars_per_column, False, indent)
614
400
  if sections:
615
- self.print_table_sections(
616
- sections, max_characters_per_column, indent=indent
617
- )
618
- self.print_table_row(
619
- split_text_list_by_length(headers, max_characters_per_column),
620
- style=f"bold {Colors.INFO}",
621
- indent=indent,
622
- )
623
- self.print_table_divider(
624
- max_characters_per_column, include_separators=True, indent=indent
401
+ self._print_table_sections(sections, max_chars_per_column, indent)
402
+ self._print_table_row(
403
+ split_text_list_by_length(headers, max_chars_per_column),
404
+ f"bold {Colors.info}",
405
+ indent,
625
406
  )
407
+ self._print_table_divider(max_chars_per_column, True, indent)
626
408
  for row in rows:
627
- self.print_table_row(
628
- split_text_list_by_length(row, max_characters_per_column),
629
- style="italic",
630
- indent=indent,
409
+ self._print_table_row(
410
+ split_text_list_by_length(row, max_chars_per_column),
411
+ "italic",
412
+ indent,
631
413
  )
632
- self.print_table_divider(
633
- max_characters_per_column, include_separators=False, indent=indent
634
- )
414
+ self._print_table_divider(max_chars_per_column, False, indent)
635
415
 
636
- def calculate_max_chars_per_column(
416
+ def _calculate_max_chars_per_column(
637
417
  self,
638
418
  headers: list[str],
639
419
  rows: list[list[Any]],
640
- sections: Optional[dict[str, tuple[int, int]]],
420
+ sections: dict[str, tuple[int, int]] | None,
641
421
  max_char_per_col: int,
642
422
  ) -> list[int]:
643
- """
644
- Calculate the maximum number of characters per column in the table.
645
- This is done by checking the length of the headers, rows, and optional sections
646
- to ensure all columns are accounted for and spaced correctly.
647
-
648
- :param headers: The headers of the table.
649
- :param rows: The rows of the table.
650
- :param sections: The sections of the table grouping columns together.
651
- This is a mapping of the section display name to a tuple of the start and
652
- end column indices. If None, no sections are added (default).
653
- :param max_char_per_col: The maximum number of characters per column.
654
- :return: A list of the maximum number of characters per column.
655
- """
656
- max_characters_per_column = []
423
+ """Calculate maximum characters per column for table formatting."""
424
+ max_chars_per_column = []
657
425
  for ind in range(len(headers)):
658
- max_characters_per_column.append(min(len(headers[ind]), max_char_per_col))
659
-
426
+ max_chars_per_column.append(min(len(headers[ind]), max_char_per_col))
660
427
  for row in rows:
661
- max_characters_per_column[ind] = max(
662
- max_characters_per_column[ind], len(str(row[ind]))
428
+ max_chars_per_column[ind] = max(
429
+ max_chars_per_column[ind], len(str(row[ind]))
663
430
  )
664
431
 
665
432
  if not sections:
666
- return max_characters_per_column
433
+ return max_chars_per_column
667
434
 
668
- for section in sections:
669
- start_col, end_col = sections[section]
670
- min_section_len = len(section) + (
671
- end_col - start_col
672
- ) # ensure we have enough space for separators
435
+ for section, (start_col, end_col) in sections.items():
436
+ min_section_len = len(section) + (end_col - start_col)
673
437
  chars_in_columns = sum(
674
- max_characters_per_column[start_col : end_col + 1]
438
+ max_chars_per_column[start_col : end_col + 1]
675
439
  ) + 2 * (end_col - start_col)
676
440
  if min_section_len > chars_in_columns:
677
441
  add_chars_per_col = math.ceil(
678
442
  (min_section_len - chars_in_columns) / (end_col - start_col + 1)
679
443
  )
680
444
  for col in range(start_col, end_col + 1):
681
- max_characters_per_column[col] += add_chars_per_col
445
+ max_chars_per_column[col] += add_chars_per_col
682
446
 
683
- return max_characters_per_column
447
+ return max_chars_per_column
684
448
 
685
- def print_table_divider(
449
+ def _print_table_divider(
686
450
  self, max_chars_per_column: list[int], include_separators: bool, indent: int = 0
687
451
  ):
688
- """
689
- Print a divider line for the table (top and bottom of table with '=' characters)
690
-
691
- :param max_chars_per_column: The maximum number of characters per column.
692
- :param include_separators: Whether to include separators between columns.
693
- :param indent: The number of spaces to indent the line.
694
- Defaults to 0.
695
- """
452
+ """Print table divider line."""
696
453
  if include_separators:
697
454
  columns = [
698
455
  settings.table_headers_border_char * max_chars
@@ -705,29 +462,15 @@ class GenerativeBenchmarksConsole:
705
462
  settings.table_border_char * (max_chars + 2)
706
463
  for max_chars in max_chars_per_column
707
464
  ]
708
-
709
465
  columns[-1] = columns[-1][:-2]
710
- self.print_line(value=columns, style=Colors.INFO, indent=indent)
466
+ self._print_line(columns, Colors.info, indent)
711
467
 
712
- def print_table_sections(
468
+ def _print_table_sections(
713
469
  self,
714
470
  sections: dict[str, tuple[int, int]],
715
471
  max_chars_per_column: list[int],
716
472
  indent: int = 0,
717
473
  ):
718
- """
719
- Print the sections of the table with corresponding separators to the columns
720
- the sections are mapped to to ensure it is compliant with a CSV format.
721
- For example, a section named "Metadata" with columns 0-3 will print this:
722
- Metadata ,,,,
723
- Where the spaces plus the separators at the end will span the columns 0-3.
724
- All columns must be accounted for in the sections.
725
-
726
- :param sections: The sections of the table.
727
- :param max_chars_per_column: The maximum number of characters per column.
728
- :param indent: The number of spaces to indent the line.
729
- Defaults to 0.
730
- """
731
474
  section_tuples = [(start, end, name) for name, (start, end) in sections.items()]
732
475
  section_tuples.sort(key=lambda x: x[0])
733
476
 
@@ -751,30 +494,23 @@ class GenerativeBenchmarksConsole:
751
494
  end_col - start_col + 1
752
495
  )
753
496
  num_separators = end_col - start_col
754
- line_values.append(section)
755
- line_styles.append("bold " + Colors.INFO)
756
- line_values.append(
757
- " " * (section_length - len(section) - num_separators - 2)
497
+ line_values.extend(
498
+ [
499
+ section,
500
+ " " * (section_length - len(section) - num_separators - 2),
501
+ settings.table_column_separator_char * num_separators,
502
+ settings.table_column_separator_char + " ",
503
+ ]
758
504
  )
759
- line_styles.append("")
760
- line_values.append(settings.table_column_separator_char * num_separators)
761
- line_styles.append("")
762
- line_values.append(settings.table_column_separator_char + " ")
763
- line_styles.append(Colors.INFO)
505
+ line_styles.extend(["bold " + Colors.info, "", "", Colors.info])
506
+
764
507
  line_values = line_values[:-1]
765
508
  line_styles = line_styles[:-1]
766
- self.print_line(value=line_values, style=line_styles, indent=indent)
509
+ self._print_line(line_values, line_styles, indent)
767
510
 
768
- def print_table_row(
511
+ def _print_table_row(
769
512
  self, column_lines: list[list[str]], style: str, indent: int = 0
770
513
  ):
771
- """
772
- Print a single row of a table to the console.
773
-
774
- :param column_lines: The lines of text to print for each column.
775
- :param indent: The number of spaces to indent the line.
776
- Defaults to 0.
777
- """
778
514
  for row in range(len(column_lines[0])):
779
515
  print_line = []
780
516
  print_styles = []
@@ -786,212 +522,224 @@ class GenerativeBenchmarksConsole:
786
522
  " ",
787
523
  ]
788
524
  )
789
- print_styles.extend([style, Colors.INFO, ""])
525
+ print_styles.extend([style, Colors.info, ""])
790
526
  print_line = print_line[:-2]
791
527
  print_styles = print_styles[:-2]
792
- self.print_line(value=print_line, style=print_styles, indent=indent)
528
+ self._print_line(print_line, print_styles, indent)
793
529
 
794
- def print_benchmarks_metadata(self):
795
- """
796
- Print out the metadata of the benchmarks to the console including the run id,
797
- duration, profile, args, worker, request loader, and extras.
798
- """
799
530
 
800
- if not self.benchmarks:
801
- raise ValueError(
802
- "No benchmarks to print metadata for. Please set benchmarks first."
803
- )
531
+ @GenerativeBenchmarkerOutput.register("csv")
532
+ class GenerativeBenchmarkerCSV(GenerativeBenchmarkerOutput):
533
+ """CSV output formatter for benchmark results."""
804
534
 
805
- start_time = self.benchmarks[0].run_stats.start_time
806
- end_time = self.benchmarks[-1].run_stats.end_time
807
- duration = end_time - start_time
535
+ DEFAULT_FILE: ClassVar[str] = "benchmarks.csv"
808
536
 
809
- self.print_section_header(title="Benchmarks Metadata")
810
- self.print_labeled_line(
811
- label="Run id",
812
- value=str(self.benchmarks[0].run_id),
813
- )
814
- self.print_labeled_line(
815
- label="Duration",
816
- value=f"{duration:.1f} seconds",
817
- )
818
- self.print_labeled_line(
819
- label="Profile",
820
- value=self.benchmarks_profile_str,
821
- )
822
- self.print_labeled_line(
823
- label="Args",
824
- value=self.benchmarks_args_str,
825
- )
826
- self.print_labeled_line(
827
- label="Worker",
828
- value=self.benchmarks_worker_desc_str,
829
- )
830
- self.print_labeled_line(
831
- label="Request Loader",
832
- value=self.benchmarks_request_loader_desc_str,
833
- )
834
- self.print_labeled_line(
835
- label="Extras",
836
- value=self.benchmarks_extras_str,
837
- )
537
+ @classmethod
538
+ def validated_kwargs(
539
+ cls, output_path: str | Path | None, **_kwargs
540
+ ) -> dict[str, Any]:
541
+ new_kwargs = {}
542
+ if output_path is not None:
543
+ new_kwargs["output_path"] = (
544
+ Path(output_path) if not isinstance(output_path, Path) else output_path
545
+ )
546
+ return new_kwargs
838
547
 
839
- def print_benchmarks_info(self):
548
+ output_path: Path = Field(default_factory=lambda: Path.cwd())
549
+
550
+ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
840
551
  """
841
- Print out the benchmark information to the console including the start time,
842
- end time, duration, request totals, and token totals for each benchmark.
552
+ Save the benchmark report as a CSV file.
553
+
554
+ :param report: The completed benchmark report.
555
+ :return: Path to the saved CSV file.
843
556
  """
844
- if not self.benchmarks:
845
- raise ValueError(
846
- "No benchmarks to print info for. Please set benchmarks first."
847
- )
557
+ output_path = self.output_path
558
+ if output_path.is_dir():
559
+ output_path = output_path / GenerativeBenchmarkerCSV.DEFAULT_FILE
560
+ output_path.parent.mkdir(parents=True, exist_ok=True)
848
561
 
849
- sections = {
850
- "Metadata": (0, 3),
851
- "Requests Made": (4, 6),
852
- "Prompt Tok/Req": (7, 9),
853
- "Output Tok/Req": (10, 12),
854
- "Prompt Tok Total": (13, 15),
855
- "Output Tok Total": (16, 18),
856
- }
562
+ with output_path.open("w", newline="") as file:
563
+ writer = csv.writer(file)
564
+ headers: list[str] = []
565
+ rows: list[list[str | float | list[float]]] = []
566
+
567
+ for benchmark in report.benchmarks:
568
+ benchmark_headers: list[str] = []
569
+ benchmark_values: list[str | float | list[float]] = []
570
+
571
+ # Add basic run description info
572
+ desc_headers, desc_values = self._get_benchmark_desc_headers_and_values(
573
+ benchmark
574
+ )
575
+ benchmark_headers.extend(desc_headers)
576
+ benchmark_values.extend(desc_values)
577
+
578
+ # Add status-based metrics
579
+ for status in StatusDistributionSummary.model_fields:
580
+ status_headers, status_values = (
581
+ self._get_benchmark_status_headers_and_values(benchmark, status)
582
+ )
583
+ benchmark_headers.extend(status_headers)
584
+ benchmark_values.extend(status_values)
585
+
586
+ # Add extra fields
587
+ extras_headers, extras_values = (
588
+ self._get_benchmark_extras_headers_and_values(benchmark)
589
+ )
590
+ benchmark_headers.extend(extras_headers)
591
+ benchmark_values.extend(extras_values)
592
+
593
+ if not headers:
594
+ headers = benchmark_headers
595
+ rows.append(benchmark_values)
596
+
597
+ writer.writerow(headers)
598
+ for row in rows:
599
+ writer.writerow(row)
600
+
601
+ return output_path
602
+
603
+ def _get_benchmark_desc_headers_and_values(
604
+ self, benchmark: GenerativeBenchmark
605
+ ) -> tuple[list[str], list[str | float]]:
606
+ """Get description headers and values for a benchmark."""
857
607
  headers = [
858
- "Benchmark",
608
+ "Type",
609
+ "Run Id",
610
+ "Id",
611
+ "Name",
859
612
  "Start Time",
860
613
  "End Time",
861
- "Duration (s)",
862
- "Comp",
863
- "Inc",
864
- "Err",
865
- "Comp",
866
- "Inc",
867
- "Err",
868
- "Comp",
869
- "Inc",
870
- "Err",
871
- "Comp",
872
- "Inc",
873
- "Err",
874
- "Comp",
875
- "Inc",
876
- "Err",
614
+ "Duration",
877
615
  ]
878
- rows = []
616
+ values: list[str | float] = [
617
+ benchmark.type_,
618
+ benchmark.run_id,
619
+ benchmark.id_,
620
+ str(benchmark.scheduler.strategy),
621
+ datetime.fromtimestamp(benchmark.start_time).strftime("%Y-%m-%d %H:%M:%S"),
622
+ datetime.fromtimestamp(benchmark.end_time).strftime("%Y-%m-%d %H:%M:%S"),
623
+ benchmark.duration,
624
+ ]
625
+ return headers, values
879
626
 
880
- for benchmark in self.benchmarks:
881
- rows.append(
882
- [
883
- strategy_display_str(benchmark.args.strategy),
884
- f"{datetime.fromtimestamp(benchmark.start_time).strftime('%H:%M:%S')}",
885
- f"{datetime.fromtimestamp(benchmark.end_time).strftime('%H:%M:%S')}",
886
- f"{(benchmark.end_time - benchmark.start_time):.1f}",
887
- f"{benchmark.request_totals.successful:.0f}",
888
- f"{benchmark.request_totals.incomplete:.0f}",
889
- f"{benchmark.request_totals.errored:.0f}",
890
- f"{benchmark.metrics.prompt_token_count.successful.mean:.1f}",
891
- f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f}",
892
- f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}",
893
- f"{benchmark.metrics.output_token_count.successful.mean:.1f}",
894
- f"{benchmark.metrics.output_token_count.incomplete.mean:.1f}",
895
- f"{benchmark.metrics.output_token_count.errored.mean:.1f}",
896
- f"{benchmark.metrics.prompt_token_count.successful.total_sum:.0f}",
897
- f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f}",
898
- f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}",
899
- f"{benchmark.metrics.output_token_count.successful.total_sum:.0f}",
900
- f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f}",
901
- f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}",
902
- ]
627
+ def _get_benchmark_status_headers_and_values(
628
+ self, benchmark: GenerativeBenchmark, status: str
629
+ ) -> tuple[list[str], list[float | list[float]]]:
630
+ """Get status-based metrics headers and values for a benchmark."""
631
+ headers = [f"{status.capitalize()} Requests"]
632
+ values = [getattr(benchmark.request_totals, status)]
633
+
634
+ for metric in GenerativeMetrics.model_fields:
635
+ metric_headers, metric_values = self._get_benchmark_status_metrics_stats(
636
+ benchmark, status, metric
903
637
  )
638
+ headers.extend(metric_headers)
639
+ values.extend(metric_values)
904
640
 
905
- self.print_table(
906
- headers=headers, rows=rows, title="Benchmarks Info", sections=sections
907
- )
641
+ return headers, values
908
642
 
909
- def print_benchmarks_stats(self):
910
- """
911
- Print out the benchmark statistics to the console including the requests per
912
- second, request concurrency, output tokens per second, total tokens per second,
913
- request latency, time to first token, inter token latency, and time per output
914
- token for each benchmark.
915
- """
916
- if not self.benchmarks:
917
- raise ValueError(
918
- "No benchmarks to print stats for. Please set benchmarks first."
919
- )
643
+ def _get_benchmark_status_metrics_stats(
644
+ self, benchmark: GenerativeBenchmark, status: str, metric: str
645
+ ) -> tuple[list[str], list[float | list[float]]]:
646
+ """Get statistical metrics for a specific status and metric."""
647
+ status_display = status.capitalize()
648
+ metric_display = metric.replace("_", " ").capitalize()
649
+ status_dist_summary: StatusDistributionSummary = getattr(
650
+ benchmark.metrics, metric
651
+ )
652
+ if not hasattr(status_dist_summary, status):
653
+ return [], []
654
+ dist_summary: DistributionSummary = getattr(status_dist_summary, status)
920
655
 
921
- sections = {
922
- "Metadata": (0, 0),
923
- "Request Stats": (1, 2),
924
- "Out Tok/sec": (3, 3),
925
- "Tot Tok/sec": (4, 4),
926
- "Req Latency (sec)": (5, 7),
927
- "TTFT (ms)": (8, 10),
928
- "ITL (ms)": (11, 13),
929
- "TPOT (ms)": (14, 16),
930
- }
931
656
  headers = [
932
- "Benchmark",
933
- "Per Second",
934
- "Concurrency",
935
- "mean",
936
- "mean",
937
- "mean",
938
- "median",
939
- "p99",
940
- "mean",
941
- "median",
942
- "p99",
943
- "mean",
944
- "median",
945
- "p99",
946
- "mean",
947
- "median",
948
- "p99",
657
+ f"{status_display} {metric_display} mean",
658
+ f"{status_display} {metric_display} median",
659
+ f"{status_display} {metric_display} std dev",
660
+ (
661
+ f"{status_display} {metric_display} "
662
+ "[min, 0.1, 1, 5, 10, 25, 75, 90, 95, 99, max]"
663
+ ),
949
664
  ]
950
- rows = []
665
+ values: list[float | list[float]] = [
666
+ dist_summary.mean,
667
+ dist_summary.median,
668
+ dist_summary.std_dev,
669
+ [
670
+ dist_summary.min,
671
+ dist_summary.percentiles.p001,
672
+ dist_summary.percentiles.p01,
673
+ dist_summary.percentiles.p05,
674
+ dist_summary.percentiles.p10,
675
+ dist_summary.percentiles.p25,
676
+ dist_summary.percentiles.p75,
677
+ dist_summary.percentiles.p90,
678
+ dist_summary.percentiles.p95,
679
+ dist_summary.percentiles.p99,
680
+ dist_summary.max,
681
+ ],
682
+ ]
683
+ return headers, values
951
684
 
952
- for benchmark in self.benchmarks:
953
- rows.append(
954
- [
955
- strategy_display_str(benchmark.args.strategy),
956
- f"{benchmark.metrics.requests_per_second.successful.mean:.2f}",
957
- f"{benchmark.metrics.request_concurrency.successful.mean:.2f}",
958
- f"{benchmark.metrics.output_tokens_per_second.successful.mean:.1f}",
959
- f"{benchmark.metrics.tokens_per_second.successful.mean:.1f}",
960
- f"{benchmark.metrics.request_latency.successful.mean:.2f}",
961
- f"{benchmark.metrics.request_latency.successful.median:.2f}",
962
- f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}",
963
- f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f}",
964
- f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f}",
965
- f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}",
966
- f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f}",
967
- f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f}",
968
- f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}",
969
- f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f}",
970
- f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f}",
971
- f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}",
972
- ]
685
+ def _get_benchmark_extras_headers_and_values(
686
+ self,
687
+ benchmark: GenerativeBenchmark,
688
+ ) -> tuple[list[str], list[str]]:
689
+ headers = ["Profile", "Backend", "Generator Data"]
690
+ values: list[str] = [
691
+ benchmark.benchmarker.profile.model_dump_json(),
692
+ json.dumps(benchmark.benchmarker.backend),
693
+ json.dumps(benchmark.benchmarker.requests["data"]),
694
+ ]
695
+
696
+ if len(headers) != len(values):
697
+ raise ValueError("Headers and values length mismatch.")
698
+
699
+ return headers, values
700
+
701
+
702
+ @GenerativeBenchmarkerOutput.register("html")
703
+ class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput):
704
+ """HTML output formatter for benchmark results."""
705
+
706
+ DEFAULT_FILE: ClassVar[str] = "benchmarks.html"
707
+
708
+ @classmethod
709
+ def validated_kwargs(
710
+ cls, output_path: str | Path | None, **_kwargs
711
+ ) -> dict[str, Any]:
712
+ new_kwargs = {}
713
+ if output_path is not None:
714
+ new_kwargs["output_path"] = (
715
+ Path(output_path) if not isinstance(output_path, Path) else output_path
973
716
  )
717
+ return new_kwargs
974
718
 
975
- self.print_table(
976
- headers=headers,
977
- rows=rows,
978
- title="Benchmarks Stats",
979
- sections=sections,
980
- )
719
+ output_path: Path = Field(default_factory=lambda: Path.cwd())
981
720
 
982
- def print_full_report(self):
721
+ async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
983
722
  """
984
- Print out the benchmark statistics to the console.
985
- Temporarily enables the console if it's disabled.
723
+ Save the benchmark report as an HTML file.
986
724
 
987
- Format:
988
- - Metadata
989
- - Info
990
- - Stats
725
+ :param report: The completed benchmark report.
726
+ :return: Path to the saved HTML file.
991
727
  """
992
- orig_enabled = self.enabled
993
- self.enabled = True
994
- self.print_benchmarks_metadata()
995
- self.print_benchmarks_info()
996
- self.print_benchmarks_stats()
997
- self.enabled = orig_enabled
728
+ output_path = self.output_path
729
+ if output_path.is_dir():
730
+ output_path = output_path / GenerativeBenchmarkerHTML.DEFAULT_FILE
731
+ output_path.parent.mkdir(parents=True, exist_ok=True)
732
+
733
+ data_builder = UIDataBuilder(report.benchmarks)
734
+ data = data_builder.to_dict()
735
+ camel_data = recursive_key_update(deepcopy(data), camelize_str)
736
+
737
+ ui_api_data = {}
738
+ for k, v in camel_data.items():
739
+ placeholder_key = f"window.{k} = {{}};"
740
+ replacement_value = f"window.{k} = {json.dumps(v, indent=2)};\n"
741
+ ui_api_data[placeholder_key] = replacement_value
742
+
743
+ create_report(ui_api_data, output_path)
744
+
745
+ return output_path