guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -1,165 +1,598 @@
1
- from collections.abc import Iterable
1
+ """
2
+ Primary interface for executing and re-importing generative text benchmarks.
3
+
4
+ This module orchestrates comprehensive benchmarking workflows by coordinating backend
5
+ initialization, data loading, profile configuration, and output generation. It provides
6
+ two main entry points: `benchmark_generative_text` for executing new benchmarks and
7
+ `reimport_benchmarks_report` for re-exporting existing results. The resolution functions
8
+ convert user-provided arguments into fully configured components, handling backend
9
+ validation, data preprocessing, profile constraints, and output format specifications.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from collections.abc import Callable, Mapping, MutableMapping
2
15
  from pathlib import Path
3
- from typing import Any, Literal, Optional, Union
16
+ from typing import Any, Literal
4
17
 
5
- from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
6
- from transformers import ( # type: ignore[import]
7
- PreTrainedTokenizerBase,
8
- )
18
+ from torch.utils.data import Sampler
19
+ from transformers import PreTrainedTokenizerBase
20
+ from typing_extensions import TypeAliasType
9
21
 
10
- from guidellm.backend import Backend, BackendType
11
- from guidellm.benchmark.benchmarker import GenerativeBenchmarker
12
- from guidellm.benchmark.output import (
13
- GenerativeBenchmarksConsole,
22
+ from guidellm.backends import Backend, BackendType
23
+ from guidellm.benchmark.benchmarker import Benchmarker
24
+ from guidellm.benchmark.outputs import (
25
+ GenerativeBenchmarkerConsole,
26
+ GenerativeBenchmarkerOutput,
27
+ )
28
+ from guidellm.benchmark.profiles import Profile, ProfileType
29
+ from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
30
+ from guidellm.benchmark.schemas import (
31
+ BenchmarkGenerativeTextArgs,
32
+ GenerativeBenchmark,
33
+ GenerativeBenchmarkAccumulator,
14
34
  GenerativeBenchmarksReport,
15
35
  )
16
- from guidellm.benchmark.profile import ProfileType, create_profile
17
- from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
- from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
19
- from guidellm.request import GenerativeRequestLoader
20
- from guidellm.scheduler import StrategyType
36
+ from guidellm.benchmark.schemas.base import TransientPhaseConfig
37
+ from guidellm.data import (
38
+ DataLoader,
39
+ DatasetPreprocessor,
40
+ GenerativeRequestCollator,
41
+ PreprocessorRegistry,
42
+ ProcessorFactory,
43
+ RequestFormatter,
44
+ )
45
+ from guidellm.data.preprocessors import GenerativeColumnMapper
46
+ from guidellm.scheduler import (
47
+ ConstraintInitializer,
48
+ NonDistributedEnvironment,
49
+ StrategyType,
50
+ )
51
+ from guidellm.schemas import GenerationRequest, GenerationResponse
52
+ from guidellm.settings import settings
53
+ from guidellm.utils import Console, InfoMixin
54
+
55
+ __all__ = [
56
+ "benchmark_generative_text",
57
+ "reimport_benchmarks_report",
58
+ ]
59
+
60
+
61
+ # Type Aliases
62
+
63
+ OutputFormatT = TypeAliasType(
64
+ "OutputFormatT",
65
+ tuple[str, ...]
66
+ | list[str]
67
+ | Mapping[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
68
+ | None,
69
+ )
70
+ """Output format specification as strings, mappings, or configured output instances"""
71
+
72
+ ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
73
+ """Processor input as model identifier, path to tokenizer, or tokenizer instance"""
74
+
75
+
76
+ # Helper Functions
77
+
78
+
79
+ async def resolve_backend(
80
+ backend: BackendType | Backend,
81
+ target: str,
82
+ model: str | None,
83
+ console: Console | None = None,
84
+ **backend_kwargs: dict[str, Any],
85
+ ) -> tuple[Backend, str]:
86
+ """
87
+ Initialize and validate a backend instance for benchmarking execution.
88
+
89
+ Handles backend creation from type identifiers or pre-configured instances,
90
+ performs startup validation, and resolves the default model if not specified.
91
+ The backend is shut down after validation to ensure clean state for subsequent
92
+ benchmark execution.
93
+
94
+ :param backend: Backend type identifier or pre-configured Backend instance
95
+ :param target: Target endpoint URL or connection string for the backend
96
+ :param model: Model identifier to use with the backend, or None to use default
97
+ :param console: Console instance for progress reporting, or None
98
+ :param backend_kwargs: Additional keyword arguments passed to backend initialization
99
+ :return: Tuple of initialized Backend instance and resolved model identifier
100
+ """
101
+ console_step = (
102
+ console.print_update_step(title=f"Initializing backend {backend}")
103
+ if console
104
+ else None
105
+ )
106
+ backend_instance = (
107
+ Backend.create(backend, target=target, model=model, **(backend_kwargs or {}))
108
+ if not isinstance(backend, Backend)
109
+ else backend
110
+ )
111
+
112
+ if console_step:
113
+ console_step.update(
114
+ f"{backend_instance.__class__.__name__} backend initialized"
115
+ )
116
+
117
+ await backend_instance.process_startup()
118
+ await backend_instance.validate()
119
+
120
+ if model is None:
121
+ if console_step:
122
+ console_step.update(
123
+ title="Resolving default model from backend.default_model",
124
+ status_level="info",
125
+ )
126
+ model = await backend_instance.default_model()
21
127
 
128
+ await backend_instance.process_shutdown()
22
129
 
23
- async def benchmark_with_scenario(scenario: Scenario, **kwargs):
130
+ if console_step:
131
+ console_step.finish(
132
+ title=(
133
+ f"{backend_instance.__class__.__name__} backend validated "
134
+ f"with model {model}"
135
+ ),
136
+ details=backend_instance.info,
137
+ status_level="success",
138
+ )
139
+
140
+ return backend_instance, model
141
+
142
+
143
+ async def resolve_processor(
144
+ processor: ProcessorInputT | None,
145
+ model: str | None,
146
+ console: Console | None = None,
147
+ ) -> ProcessorInputT | None:
24
148
  """
25
- Run a benchmark using a scenario and specify any extra arguments
149
+ Resolve the tokenization processor, defaulting to model if not provided.
150
+
151
+ :param processor: Processor identifier, path, tokenizer instance, or None
152
+ :param model: Model identifier to use as fallback processor
153
+ :param console: Console instance for progress reporting, or None
154
+ :return: Resolved processor or None if neither processor nor model provided
26
155
  """
156
+ console_step = (
157
+ console.print_update_step(title=f"Resolving processor {processor}")
158
+ if console
159
+ else None
160
+ )
27
161
 
28
- if isinstance(scenario, GenerativeTextScenario):
29
- return await benchmark_generative_text(**vars(scenario), **kwargs)
162
+ if processor is not None:
163
+ if console_step:
164
+ console_step.finish(
165
+ title="Processor resolved",
166
+ details=f"Using processor '{processor}'",
167
+ status_level="success",
168
+ )
30
169
  else:
31
- raise ValueError(f"Unsupported Scenario type {type(scenario)}")
170
+ processor = model
171
+ if console_step:
172
+ console_step.finish(
173
+ title="Processor resolved",
174
+ details=f"Using model '{processor}' as processor",
175
+ status_level="success",
176
+ )
32
177
 
178
+ return processor
33
179
 
34
- async def benchmark_generative_text(
35
- target: str,
36
- backend_type: BackendType,
37
- backend_args: Optional[dict[str, Any]],
38
- model: Optional[str],
39
- processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
40
- processor_args: Optional[dict[str, Any]],
41
- data: Union[
42
- str,
43
- Path,
44
- Iterable[Union[str, dict[str, Any]]],
45
- Dataset,
46
- DatasetDict,
47
- IterableDataset,
48
- IterableDatasetDict,
49
- ],
50
- data_args: Optional[dict[str, Any]],
51
- data_sampler: Optional[Literal["random"]],
52
- rate_type: Union[StrategyType, ProfileType],
53
- rate: Optional[Union[float, list[float]]],
54
- max_seconds: Optional[float],
55
- max_requests: Optional[int],
56
- warmup_percent: Optional[float],
57
- cooldown_percent: Optional[float],
58
- output_path: Optional[Union[str, Path]],
59
- output_extras: Optional[dict[str, Any]],
60
- output_sampling: Optional[int],
180
+
181
+ async def resolve_request_loader(
182
+ data: list[Any],
183
+ model: str,
184
+ data_args: list[dict[str, Any]] | None,
185
+ data_samples: int,
186
+ processor: ProcessorInputT | None,
187
+ processor_args: dict[str, Any] | None,
188
+ data_column_mapper: (
189
+ DatasetPreprocessor
190
+ | dict[str, str | list[str]]
191
+ | Literal["generative_column_mapper"]
192
+ ),
193
+ data_request_formatter: (RequestFormatter | dict[str, str] | str),
194
+ data_collator: Callable | Literal["generative"] | None,
195
+ data_sampler: Sampler[int] | Literal["shuffle"] | None,
196
+ data_num_workers: int | None,
61
197
  random_seed: int,
62
- show_progress: bool = True,
63
- show_progress_scheduler_stats: bool = False,
64
- output_console: bool = True,
65
- ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
66
- console = GenerativeBenchmarksConsole(enabled=show_progress)
67
- console.print_line("Creating backend...")
68
- backend = Backend.create(
69
- backend_type, target=target, model=model, **(backend_args or {})
70
- )
71
- await backend.validate()
72
- console.print_line(
73
- f"Backend {backend_type} connected to {target} for model {backend.model}."
198
+ console: Console | None = None,
199
+ **dataloader_kwargs: dict[str, Any] | None,
200
+ ) -> DataLoader[GenerationRequest]:
201
+ """
202
+ Construct a DataLoader for GenerationRequest objects from raw data inputs.
203
+
204
+ Initializes and configures the data pipeline including column mapping, request
205
+ formatting, collation, and sampling. Resolves string-based preprocessor identifiers
206
+ from the PreprocessorRegistry and creates appropriate instances with provided
207
+ configurations.
208
+
209
+ :param data: List of data sources to load requests from
210
+ :param model: Model identifier for request formatting
211
+ :param data_args: Arguments for each data source in the data list
212
+ :param data_samples: Number of samples to draw from the dataset
213
+ :param processor: Processor for tokenization operations
214
+ :param processor_args: Arguments for processor initialization
215
+ :param data_column_mapper: Preprocessor or mapping for standardizing column names
216
+ :param data_request_formatter: Preprocessor or config for formatting requests
217
+ :param data_collator: Collation function or type for batching requests
218
+ :param data_sampler: Sampler instance or type for data sampling
219
+ :param data_num_workers: Number of worker processes for data loading
220
+ :param random_seed: Seed for reproducible random operations
221
+ :param console: Console instance for progress reporting, or None
222
+ :param dataloader_kwargs: Additional arguments passed to DataLoader initialization
223
+ :return: Configured DataLoader instance for GenerationRequest objects
224
+ :raises ValueError: If request formatter type is not registered in
225
+ PreprocessorRegistry
226
+ :raises TypeError: If registered request formatter is not a RequestFormatter
227
+ subclass
228
+ """
229
+ console_step = (
230
+ console.print_update_step(title=f"Initializing request loader from {data}")
231
+ if console
232
+ else None
74
233
  )
75
234
 
76
- if processor is None:
77
- processor = backend.model
235
+ data_column_mapper_instance: DatasetPreprocessor
236
+ if isinstance(data_column_mapper, DatasetPreprocessor):
237
+ data_column_mapper_instance = data_column_mapper
238
+ else:
239
+ column_mappings = (
240
+ data_column_mapper if isinstance(data_column_mapper, dict) else None
241
+ )
242
+ data_column_mapper_instance = GenerativeColumnMapper(
243
+ column_mappings=column_mappings # type: ignore[arg-type]
244
+ )
245
+
246
+ data_request_formatter_instance: RequestFormatter
247
+ if isinstance(data_request_formatter, RequestFormatter):
248
+ data_request_formatter_instance = data_request_formatter
249
+ else:
250
+ if isinstance(data_request_formatter, str):
251
+ request_type = data_request_formatter
252
+ formatter_kwargs: dict[str, Any] = {}
253
+ else:
254
+ # Extract request_type from formatter dictionary
255
+ formatter_dict = dict(data_request_formatter)
256
+ request_type = formatter_dict.pop("request_type", settings.preferred_route)
257
+ formatter_kwargs = formatter_dict
78
258
 
79
- console.print_line("Creating request loader...")
80
- request_loader = GenerativeRequestLoader(
259
+ if (
260
+ formatter_class := PreprocessorRegistry.get_registered_object(request_type)
261
+ ) is None:
262
+ raise ValueError(
263
+ f"Request formatter '{request_type}' is not registered in the "
264
+ f"PreprocessorRegistry."
265
+ )
266
+ if not issubclass(formatter_class, RequestFormatter):
267
+ raise TypeError(
268
+ f"Request formatter '{request_type}' is not a subclass of "
269
+ f"RequestFormatter."
270
+ )
271
+
272
+ data_request_formatter_instance = formatter_class(
273
+ model=model,
274
+ **formatter_kwargs,
275
+ )
276
+
277
+ # Cast to proper types for the DataLoader preprocessors list
278
+ preprocessors_list: list[DatasetPreprocessor] = [
279
+ data_column_mapper_instance,
280
+ data_request_formatter_instance,
281
+ ]
282
+
283
+ request_loader: DataLoader[GenerationRequest] = DataLoader(
81
284
  data=data,
82
285
  data_args=data_args,
83
- processor=processor,
84
- processor_args=processor_args,
85
- shuffle=data_sampler == "random",
86
- iter_type=(
87
- "finite" # assume a finite dataset is our limit
88
- if max_requests is None and max_seconds is None
89
- else "infinite" # default to infinite so we don't run out of data
286
+ data_samples=data_samples,
287
+ processor_factory=ProcessorFactory(
288
+ processor=processor if processor is not None else model,
289
+ processor_args=processor_args,
90
290
  ),
291
+ preprocessors=preprocessors_list,
292
+ collator=(
293
+ data_collator if callable(data_collator) else GenerativeRequestCollator()
294
+ ),
295
+ sampler=data_sampler,
296
+ num_workers=data_num_workers,
91
297
  random_seed=random_seed,
298
+ **(dataloader_kwargs or {}),
92
299
  )
93
- unique_requests = request_loader.num_unique_items(raise_err=False)
94
- console.print_line(
95
- f"Created loader with {unique_requests} unique requests from {data}.\n\n"
96
- if unique_requests > 0
97
- else f"Created loader with unknown number unique requests from {data}.\n\n"
300
+
301
+ if console_step:
302
+ console_step.finish(
303
+ title=(
304
+ f"Request loader initialized with "
305
+ f"{data_samples if data_samples > 0 else 'inf'} "
306
+ "unique requests"
307
+ ),
308
+ details=InfoMixin.extract_from_obj(request_loader),
309
+ status_level="success",
310
+ )
311
+
312
+ return request_loader
313
+
314
+
315
+ async def resolve_profile(
316
+ profile: StrategyType | ProfileType | Profile,
317
+ rate: list[float] | None,
318
+ random_seed: int,
319
+ rampup: float,
320
+ constraints: MutableMapping[str, ConstraintInitializer | Any],
321
+ max_seconds: int | float | None,
322
+ max_requests: int | None,
323
+ max_errors: int | None,
324
+ max_error_rate: float | None,
325
+ max_global_error_rate: float | None,
326
+ over_saturation: dict[str, Any] | None = None,
327
+ console: Console | None = None,
328
+ ) -> Profile:
329
+ """
330
+ Resolve and configure a benchmark profile with rate and constraint settings.
331
+
332
+ Constructs a Profile instance from type identifiers or validates pre-configured
333
+ profiles. Constraint parameters are merged into the constraints dictionary before
334
+ profile creation.
335
+
336
+ :param profile: Profile type identifier or pre-configured Profile instance
337
+ :param rate: Request rate(s) for the benchmark execution
338
+ :param random_seed: Seed for reproducible random operations
339
+ :param warmup: Warm-up phase configuration for the benchmark execution
340
+ (used for ramp-up duration calculation)
341
+ :param constraints: Dictionary of constraint initializers for benchmark limits
342
+ :param max_seconds: Maximum duration in seconds for the benchmark
343
+ :param max_requests: Maximum number of requests to process
344
+ :param max_errors: Maximum number of errors before stopping
345
+ :param max_error_rate: Maximum error rate threshold before stopping
346
+ :param max_global_error_rate: Maximum global error rate threshold before stopping
347
+ :param over_saturation: Over-saturation detection configuration (dict)
348
+ :param console: Console instance for progress reporting, or None
349
+ :return: Configured Profile instance ready for benchmarking
350
+ :raises ValueError: If constraints are provided with a pre-configured Profile
351
+ """
352
+ console_step = (
353
+ console.print_update_step(title=f"Resolving profile {profile}")
354
+ if console
355
+ else None
98
356
  )
99
357
 
100
- profile = create_profile(rate_type=rate_type, rate=rate)
101
- benchmarker = GenerativeBenchmarker(
102
- backend=backend,
103
- request_loader=request_loader,
104
- request_loader_description=request_loader.description,
105
- benchmark_save_extras=output_extras,
358
+ for key, val in {
359
+ "max_seconds": max_seconds,
360
+ "max_requests": max_requests,
361
+ "max_errors": max_errors,
362
+ "max_error_rate": max_error_rate,
363
+ "max_global_error_rate": max_global_error_rate,
364
+ "over_saturation": over_saturation,
365
+ }.items():
366
+ if val is not None:
367
+ constraints[key] = val
368
+
369
+ if not isinstance(profile, Profile):
370
+ profile = Profile.create(
371
+ rate_type=profile,
372
+ rate=rate,
373
+ random_seed=random_seed,
374
+ rampup_duration=rampup,
375
+ constraints={**constraints},
376
+ )
377
+ elif constraints:
378
+ raise ValueError(
379
+ "Constraints must be empty when providing a Profile instance. "
380
+ f"Provided constraints: {constraints} ; provided profile: {profile}"
381
+ )
382
+ elif rampup > 0.0:
383
+ raise ValueError(
384
+ "Ramp-up duration must not be set when providing a Profile instance. "
385
+ f"Provided rampup: {rampup} ; provided profile: {profile}"
386
+ )
387
+
388
+ if console_step:
389
+ console_step.finish(
390
+ title=f"{profile.__class__.__name__} profile resolved",
391
+ details=InfoMixin.extract_from_obj(profile),
392
+ status_level="success",
393
+ )
394
+
395
+ return profile
396
+
397
+
398
+ async def resolve_output_formats(
399
+ outputs: list[str] | tuple[str],
400
+ output_dir: str | Path | None,
401
+ console: Console | None = None,
402
+ ) -> dict[str, GenerativeBenchmarkerOutput]:
403
+ """
404
+ Resolve output format specifications into configured output handler instances.
405
+
406
+ :param outputs: Specification of desired output files/types
407
+ :param output_dir: Base path for output file generation, or None for default
408
+ :param console: Console instance for progress reporting, or None
409
+ :return: Dictionary mapping format names to configured output handler instances
410
+ """
411
+ console_step = (
412
+ console.print_update_step(title="Resolving output formats") if console else None
413
+ )
414
+
415
+ resolved = GenerativeBenchmarkerOutput.resolve(
416
+ outputs=outputs, output_dir=output_dir
417
+ )
418
+
419
+ if console_step:
420
+ console_step.finish(
421
+ title="Output formats resolved",
422
+ details={key: str(val) for key, val in resolved.items()},
423
+ status_level="success",
424
+ )
425
+
426
+ return resolved
427
+
428
+
429
+ # Main Entrypoints Functions
430
+
431
+
432
+ async def benchmark_generative_text(
433
+ args: BenchmarkGenerativeTextArgs,
434
+ progress: GenerativeConsoleBenchmarkerProgress | None = None,
435
+ console: Console | None = None,
436
+ **constraints: str | ConstraintInitializer | Any,
437
+ ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
438
+ """
439
+ Execute a comprehensive generative text benchmarking workflow.
440
+
441
+ Orchestrates the full benchmarking pipeline by resolving all components from
442
+ provided arguments, executing benchmark runs across configured profiles, and
443
+ finalizing results in specified output formats. Components include backend
444
+ initialization, data loading, profile configuration, and output generation.
445
+
446
+ :param args: Configuration arguments for the benchmark execution
447
+ :param progress: Progress tracker for benchmark execution, or None for no tracking
448
+ :param console: Console instance for status reporting, or None for silent operation
449
+ :param constraints: Additional constraint initializers for benchmark limits
450
+ :return: Tuple of GenerativeBenchmarksReport and dictionary of output format
451
+ results
452
+ """
453
+ backend, model = await resolve_backend(
454
+ backend=args.backend,
455
+ target=args.target,
456
+ model=args.model,
457
+ console=console,
458
+ **(args.backend_kwargs or {}),
459
+ )
460
+ processor = await resolve_processor(
461
+ processor=args.processor, model=model, console=console
462
+ )
463
+ request_loader = await resolve_request_loader(
464
+ data=args.data,
465
+ model=model,
466
+ data_args=args.data_args,
467
+ data_samples=args.data_samples,
106
468
  processor=processor,
107
- processor_args=processor_args,
469
+ processor_args=args.processor_args,
470
+ data_column_mapper=args.data_column_mapper,
471
+ data_request_formatter=args.data_request_formatter,
472
+ data_collator=args.data_collator,
473
+ data_sampler=args.data_sampler,
474
+ data_num_workers=args.data_num_workers,
475
+ random_seed=args.random_seed,
476
+ console=console,
477
+ **(args.dataloader_kwargs or {}),
108
478
  )
109
- progress = (
110
- GenerativeTextBenchmarkerProgressDisplay(
111
- display_scheduler_stats=show_progress_scheduler_stats
479
+
480
+ warmup = TransientPhaseConfig.create_from_value(args.warmup)
481
+ cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
482
+ if console:
483
+ console.print_update(
484
+ title="Resolved transient phase configurations",
485
+ details="\n".join(
486
+ [
487
+ f"Warmup: {warmup}",
488
+ f"Cooldown: {cooldown}",
489
+ f"Rampup (Throughput/Concurrent): {args.rampup}",
490
+ ]
491
+ ),
492
+ status="success",
112
493
  )
113
- if show_progress
114
- else None
494
+
495
+ profile = await resolve_profile(
496
+ profile=args.profile,
497
+ rate=args.rate,
498
+ random_seed=args.random_seed,
499
+ rampup=args.rampup,
500
+ constraints=constraints,
501
+ max_seconds=args.max_seconds,
502
+ max_requests=args.max_requests,
503
+ max_errors=args.max_errors,
504
+ max_error_rate=args.max_error_rate,
505
+ max_global_error_rate=args.max_global_error_rate,
506
+ over_saturation=args.over_saturation,
507
+ console=console,
115
508
  )
116
- report = GenerativeBenchmarksReport()
509
+ output_formats = await resolve_output_formats(
510
+ outputs=args.outputs, output_dir=args.output_dir, console=console
511
+ )
512
+
513
+ report = GenerativeBenchmarksReport(args=args)
514
+ if console:
515
+ console.print_update(
516
+ title="Setup complete, starting benchmarks...", status="success"
517
+ )
518
+ console.print("\n\n")
117
519
 
118
- async for result in benchmarker.run(
520
+ benchmarker: Benchmarker[
521
+ GenerativeBenchmark, GenerationRequest, GenerationResponse
522
+ ] = Benchmarker()
523
+ async for benchmark in benchmarker.run(
524
+ accumulator_class=GenerativeBenchmarkAccumulator,
525
+ benchmark_class=GenerativeBenchmark,
526
+ requests=request_loader,
527
+ backend=backend,
119
528
  profile=profile,
120
- max_number_per_strategy=max_requests,
121
- max_duration_per_strategy=max_seconds,
122
- warmup_percent_per_strategy=warmup_percent,
123
- cooldown_percent_per_strategy=cooldown_percent,
529
+ environment=NonDistributedEnvironment(),
530
+ progress=progress,
531
+ sample_requests=args.sample_requests,
532
+ warmup=warmup,
533
+ cooldown=cooldown,
534
+ prefer_response_metrics=args.prefer_response_metrics,
124
535
  ):
125
- if progress:
126
- progress.update(result)
127
-
128
- if result.type_ == "benchmark_compiled":
129
- if result.current_benchmark is None:
130
- raise ValueError("Current benchmark is None")
131
- report.benchmarks.append(
132
- result.current_benchmark.set_sample_size(output_sampling)
133
- )
134
-
135
- if output_console:
136
- console.benchmarks = report.benchmarks
137
- console.print_full_report()
536
+ if benchmark:
537
+ report.benchmarks.append(benchmark)
138
538
 
139
- if output_path:
140
- console.print_line("\nSaving benchmarks report...")
141
- saved_path = report.save_file(output_path)
142
- console.print_line(f"Benchmarks report saved to {saved_path}")
143
- else:
144
- saved_path = None
539
+ output_format_results = {}
540
+ for key, output in output_formats.items():
541
+ output_result = await output.finalize(report)
542
+ output_format_results[key] = output_result
145
543
 
146
- console.print_line("\nBenchmarking complete.")
544
+ if console:
545
+ await GenerativeBenchmarkerConsole(console=console).finalize(report)
546
+ console.print("\n\n")
547
+ console.print_update(
548
+ title=(
549
+ "Benchmarking complete, generated "
550
+ f"{len(report.benchmarks)} benchmark(s)"
551
+ ),
552
+ status="success",
553
+ )
554
+ for key, value in output_format_results.items():
555
+ console.print_update(title=f" {key:<8}: {value}", status="debug")
147
556
 
148
- return report, saved_path
557
+ return report, output_format_results
149
558
 
150
559
 
151
- def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
560
+ async def reimport_benchmarks_report(
561
+ file: Path,
562
+ output_path: Path | None,
563
+ output_formats: OutputFormatT = ("console", "json", "html", "csv"),
564
+ ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
152
565
  """
153
- The command-line entry point for re-importing and displaying an
154
- existing benchmarks report. Can also specify
155
- Assumes the file provided exists.
566
+ Load and re-export an existing benchmarks report in specified output formats.
567
+
568
+ :param file: Path to the existing benchmark report file to load
569
+ :param output_path: Base path for output file generation, or None for default
570
+ :param output_formats: Specification of desired output formats for the report
571
+ :return: Tuple of loaded GenerativeBenchmarksReport and dictionary of output
572
+ results
156
573
  """
157
- console = GenerativeBenchmarksConsole(enabled=True)
158
- report = GenerativeBenchmarksReport.load_file(file)
159
- console.benchmarks = report.benchmarks
160
- console.print_full_report()
161
-
162
- if output_path:
163
- console.print_line("\nSaving benchmarks report...")
164
- saved_path = report.save_file(output_path)
165
- console.print_line(f"Benchmarks report saved to {saved_path}")
574
+ console = Console()
575
+
576
+ with console.print_update_step(
577
+ title=f"Loading benchmarks from {file}..."
578
+ ) as console_step:
579
+ report = GenerativeBenchmarksReport.load_file(file)
580
+ console_step.finish(
581
+ "Import of old benchmarks complete;"
582
+ f" loaded {len(report.benchmarks)} benchmark(s)"
583
+ )
584
+
585
+ resolved_output_formats = await resolve_output_formats(
586
+ output_formats, # type: ignore[arg-type]
587
+ output_path,
588
+ console=console,
589
+ )
590
+ output_format_results = {}
591
+ for key, output in resolved_output_formats.items():
592
+ output_result = await output.finalize(report)
593
+ output_format_results[key] = output_result
594
+
595
+ for key, value in output_format_results.items():
596
+ console.print_update(title=f" {key:<8}: {value}", status="debug")
597
+
598
+ return report, output_format_results