guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a18.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -1,165 +1,503 @@
1
- from collections.abc import Iterable
1
+ """
2
+ High-level entry points for executing generative text benchmarks.
3
+
4
+ This module provides the primary interface for running generative text benchmarks
5
+ through the `benchmark_generative_text` function and re-importing existing benchmark
6
+ reports via `reimport_benchmarks_report`. It orchestrates the initialization and
7
+ coordination of backends, data loaders, profiles, and output formats to execute
8
+ comprehensive benchmarking workflows. The module handles all resolution logic for
9
+ converting user-provided arguments into fully configured components ready for
10
+ benchmarking execution.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections.abc import Callable
2
16
  from pathlib import Path
3
- from typing import Any, Literal, Optional, Union
17
+ from typing import Any, Literal
4
18
 
5
- from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
6
- from transformers import ( # type: ignore[import]
7
- PreTrainedTokenizerBase,
8
- )
19
+ from torch.utils.data import Sampler
20
+ from transformers import PreTrainedTokenizerBase
21
+ from typing_extensions import TypeAliasType
9
22
 
10
- from guidellm.backend import Backend, BackendType
11
- from guidellm.benchmark.benchmarker import GenerativeBenchmarker
12
- from guidellm.benchmark.output import (
13
- GenerativeBenchmarksConsole,
23
+ from guidellm.backends import Backend, BackendType
24
+ from guidellm.benchmark.benchmarker import Benchmarker
25
+ from guidellm.benchmark.output import GenerativeBenchmarkerOutput
26
+ from guidellm.benchmark.profile import Profile, ProfileType
27
+ from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
28
+ from guidellm.benchmark.schemas import (
29
+ BenchmarkGenerativeTextArgs,
30
+ GenerativeBenchmark,
14
31
  GenerativeBenchmarksReport,
15
32
  )
16
- from guidellm.benchmark.profile import ProfileType, create_profile
17
- from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
- from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
19
- from guidellm.request import GenerativeRequestLoader
20
- from guidellm.scheduler import StrategyType
33
+ from guidellm.data import (
34
+ DataLoader,
35
+ DatasetPreprocessor,
36
+ GenerativeRequestCollator,
37
+ PreprocessorRegistry,
38
+ ProcessorFactory,
39
+ )
40
+ from guidellm.data.preprocessors import GenerativeColumnMapper
41
+ from guidellm.scheduler import (
42
+ ConstraintInitializer,
43
+ NonDistributedEnvironment,
44
+ StrategyType,
45
+ )
46
+ from guidellm.schemas import GenerationRequest, GenerationResponse
47
+ from guidellm.utils import Console, InfoMixin
48
+
49
+ __all__ = [
50
+ "benchmark_generative_text",
51
+ "reimport_benchmarks_report",
52
+ ]
53
+
54
+
55
+ # Helper Functions
56
+
57
+ OutputFormatT = TypeAliasType(
58
+ "OutputFormatT",
59
+ tuple[str, ...]
60
+ | list[str]
61
+ | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
62
+ | None,
63
+ )
21
64
 
65
+ ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
22
66
 
23
- async def benchmark_with_scenario(scenario: Scenario, **kwargs):
67
+
68
+ async def resolve_backend(
69
+ backend: BackendType | Backend,
70
+ target: str,
71
+ model: str | None,
72
+ console: Console | None = None,
73
+ **backend_kwargs: dict[str, Any],
74
+ ) -> tuple[Backend, str | None]:
24
75
  """
25
- Run a benchmark using a scenario and specify any extra arguments
76
+ Initialize and validate a backend instance for benchmarking.
77
+
78
+ :param backend: Backend type identifier or pre-configured Backend instance
79
+ :param target: Target endpoint URL or connection string for the backend
80
+ :param model: Model identifier to use with the backend, or None to use default
81
+ :param console: Console instance for progress reporting, or None
82
+ :param backend_kwargs: Additional keyword arguments passed to backend initialization
83
+ :return: Tuple of initialized Backend instance and resolved model identifier
26
84
  """
85
+ console_step = (
86
+ console.print_update_step(title=f"Initializing backend {backend}")
87
+ if console
88
+ else None
89
+ )
90
+ backend = (
91
+ Backend.create(backend, target=target, model=model, **(backend_kwargs or {}))
92
+ if not isinstance(backend, Backend)
93
+ else backend
94
+ )
95
+
96
+ if console_step:
97
+ console_step.update(f"{backend.__class__.__name__} backend initialized")
98
+
99
+ await backend.process_startup()
100
+ await backend.validate()
101
+
102
+ if model is None:
103
+ if console_step:
104
+ console_step.update(
105
+ title="Resolving default model from backend.default_model",
106
+ status_level="info",
107
+ )
108
+ model = await backend.default_model()
109
+
110
+ await backend.process_shutdown()
111
+
112
+ if console_step:
113
+ console_step.finish(
114
+ title=(
115
+ f"{backend.__class__.__name__} backend validated with model {model}"
116
+ ),
117
+ details=backend.info,
118
+ status_level="success",
119
+ )
120
+
121
+ return backend, model
122
+
27
123
 
28
- if isinstance(scenario, GenerativeTextScenario):
29
- return await benchmark_generative_text(**vars(scenario), **kwargs)
124
+ async def resolve_processor(
125
+ processor: ProcessorInputT | None,
126
+ model: str | None,
127
+ console: Console | None = None,
128
+ ) -> ProcessorInputT | None:
129
+ """
130
+ Resolve the processor for tokenization, defaulting to model if not provided.
131
+
132
+ :param processor: Processor identifier, path, tokenizer instance, or None
133
+ :param model: Model identifier to use as fallback processor
134
+ :param console: Console instance for progress reporting, or None
135
+ :return: Resolved processor or None if neither processor nor model provided
136
+ """
137
+ console_step = (
138
+ console.print_update_step(title=f"Resolving processor {processor}")
139
+ if console
140
+ else None
141
+ )
142
+
143
+ if processor is not None:
144
+ if console_step:
145
+ console_step.finish(
146
+ title="Processor resolved",
147
+ details=f"Using processor '{processor}'",
148
+ status_level="success",
149
+ )
30
150
  else:
31
- raise ValueError(f"Unsupported Scenario type {type(scenario)}")
151
+ processor = model
152
+ if console_step:
153
+ console_step.finish(
154
+ title="Processor resolved",
155
+ details=f"Using model '{processor}' as processor",
156
+ status_level="success",
157
+ )
32
158
 
159
+ return processor
33
160
 
34
- async def benchmark_generative_text(
35
- target: str,
36
- backend_type: BackendType,
37
- backend_args: Optional[dict[str, Any]],
38
- model: Optional[str],
39
- processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
40
- processor_args: Optional[dict[str, Any]],
41
- data: Union[
42
- str,
43
- Path,
44
- Iterable[Union[str, dict[str, Any]]],
45
- Dataset,
46
- DatasetDict,
47
- IterableDataset,
48
- IterableDatasetDict,
49
- ],
50
- data_args: Optional[dict[str, Any]],
51
- data_sampler: Optional[Literal["random"]],
52
- rate_type: Union[StrategyType, ProfileType],
53
- rate: Optional[Union[float, list[float]]],
54
- max_seconds: Optional[float],
55
- max_requests: Optional[int],
56
- warmup_percent: Optional[float],
57
- cooldown_percent: Optional[float],
58
- output_path: Optional[Union[str, Path]],
59
- output_extras: Optional[dict[str, Any]],
60
- output_sampling: Optional[int],
161
+
162
+ async def resolve_request_loader(
163
+ data: list[Any],
164
+ model: str | None,
165
+ data_args: list[dict[str, Any]] | None,
166
+ data_samples: int,
167
+ processor: ProcessorInputT | None,
168
+ processor_args: dict[str, Any] | None,
169
+ data_column_mapper: (
170
+ DatasetPreprocessor | dict[str, str] | Literal["generative_column_mapper"]
171
+ ),
172
+ data_request_formatter: (DatasetPreprocessor | dict[str, str] | str),
173
+ data_collator: Callable | Literal["generative"] | None,
174
+ data_sampler: Sampler[int] | Literal["shuffle"] | None,
175
+ data_num_workers: int | None,
61
176
  random_seed: int,
62
- show_progress: bool = True,
63
- show_progress_scheduler_stats: bool = False,
64
- output_console: bool = True,
65
- ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
66
- console = GenerativeBenchmarksConsole(enabled=show_progress)
67
- console.print_line("Creating backend...")
68
- backend = Backend.create(
69
- backend_type, target=target, model=model, **(backend_args or {})
70
- )
71
- await backend.validate()
72
- console.print_line(
73
- f"Backend {backend_type} connected to {target} for model {backend.model}."
177
+ console: Console | None = None,
178
+ **dataloader_kwargs: dict[str, Any] | None,
179
+ ) -> DataLoader[GenerationRequest]:
180
+ """
181
+ Construct a DataLoader for GenerationRequest objects from raw data inputs.
182
+
183
+ :param data: List of data sources to load requests from
184
+ :param model: Model identifier for request formatting
185
+ :param data_args: Arguments for each data source in the data list
186
+ :param data_samples: Number of samples to draw from the dataset
187
+ :param processor: Processor for tokenization operations
188
+ :param processor_args: Arguments for processor initialization
189
+ :param data_column_mapper: Preprocessor or mapping for standardizing column names
190
+ :param data_request_formatter: Preprocessor or config for formatting requests
191
+ :param data_collator: Collation function or type for batching requests
192
+ :param data_sampler: Sampler instance or type for data sampling
193
+ :param data_num_workers: Number of worker processes for data loading
194
+ :param random_seed: Seed for reproducible random operations
195
+ :param console: Console instance for progress reporting, or None
196
+ :param dataloader_kwargs: Additional arguments passed to DataLoader initialization
197
+ :return: Configured DataLoader instance for GenerationRequest objects
198
+ """
199
+ console_step = (
200
+ console.print_update_step(title=f"Initializing request loader from {data}")
201
+ if console
202
+ else None
74
203
  )
75
204
 
76
- if processor is None:
77
- processor = backend.model
205
+ if not isinstance(data_column_mapper, DatasetPreprocessor):
206
+ column_mappings = (
207
+ data_column_mapper if isinstance(data_column_mapper, dict) else None
208
+ )
209
+ data_column_mapper = GenerativeColumnMapper(
210
+ column_mappings=column_mappings,
211
+ )
212
+ if not isinstance(data_request_formatter, DatasetPreprocessor):
213
+ request_type = (
214
+ data_request_formatter
215
+ if isinstance(data_request_formatter, str)
216
+ else data_request_formatter.pop("request_type", "chat_completions")
217
+ )
218
+ data_request_formatter = PreprocessorRegistry.get_registered_object(
219
+ request_type
220
+ )(
221
+ model=model,
222
+ **(
223
+ data_request_formatter
224
+ if isinstance(data_request_formatter, dict)
225
+ else {}
226
+ ),
227
+ )
78
228
 
79
- console.print_line("Creating request loader...")
80
- request_loader = GenerativeRequestLoader(
229
+ request_loader = DataLoader(
81
230
  data=data,
82
231
  data_args=data_args,
83
- processor=processor,
84
- processor_args=processor_args,
85
- shuffle=data_sampler == "random",
86
- iter_type=(
87
- "finite" # assume a finite dataset is our limit
88
- if max_requests is None and max_seconds is None
89
- else "infinite" # default to infinite so we don't run out of data
232
+ data_samples=data_samples,
233
+ processor_factory=ProcessorFactory(
234
+ processor=processor, processor_args=processor_args
235
+ ),
236
+ preprocessors=[data_column_mapper, data_request_formatter],
237
+ collator=(
238
+ data_collator if callable(data_collator) else GenerativeRequestCollator()
90
239
  ),
240
+ sampler=data_sampler,
241
+ num_workers=data_num_workers,
91
242
  random_seed=random_seed,
243
+ **(dataloader_kwargs or {}),
92
244
  )
93
- unique_requests = request_loader.num_unique_items(raise_err=False)
94
- console.print_line(
95
- f"Created loader with {unique_requests} unique requests from {data}.\n\n"
96
- if unique_requests > 0
97
- else f"Created loader with unknown number unique requests from {data}.\n\n"
245
+
246
+ if console_step:
247
+ console_step.finish(
248
+ title=(
249
+ f"Request loader initialized with "
250
+ f"{data_samples if data_samples > 0 else 'inf'} "
251
+ f"unique requests from {data}"
252
+ ),
253
+ details=InfoMixin.extract_from_obj(request_loader),
254
+ status_level="success",
255
+ )
256
+
257
+ return request_loader
258
+
259
+
260
+ async def resolve_profile(
261
+ profile: StrategyType | ProfileType | Profile,
262
+ rate: float | list[float] | None,
263
+ random_seed: int,
264
+ constraints: dict[str, ConstraintInitializer | Any],
265
+ max_seconds: int | float | None,
266
+ max_requests: int | None,
267
+ max_errors: int | None,
268
+ max_error_rate: float | None,
269
+ max_global_error_rate: float | None,
270
+ console: Console | None = None,
271
+ ) -> Profile:
272
+ """
273
+ Resolve and configure a benchmark profile with rate and constraint settings.
274
+
275
+ :param profile: Profile type identifier or pre-configured Profile instance
276
+ :param rate: Request rate(s) for the benchmark execution
277
+ :param random_seed: Seed for reproducible random operations
278
+ :param constraints: Dictionary of constraint initializers for benchmark limits
279
+ :param max_seconds: Maximum duration in seconds for the benchmark
280
+ :param max_requests: Maximum number of requests to process
281
+ :param max_errors: Maximum number of errors before stopping
282
+ :param max_error_rate: Maximum error rate threshold before stopping
283
+ :param max_global_error_rate: Maximum global error rate threshold before stopping
284
+ :param console: Console instance for progress reporting, or None
285
+ :return: Configured Profile instance ready for benchmarking
286
+ :raises ValueError: If constraints are provided with a pre-configured Profile
287
+ """
288
+ console_step = (
289
+ console.print_update_step(title=f"Resolving profile {profile}")
290
+ if console
291
+ else None
98
292
  )
99
293
 
100
- profile = create_profile(rate_type=rate_type, rate=rate)
101
- benchmarker = GenerativeBenchmarker(
102
- backend=backend,
103
- request_loader=request_loader,
104
- request_loader_description=request_loader.description,
105
- benchmark_save_extras=output_extras,
106
- processor=processor,
107
- processor_args=processor_args,
294
+ for key, val in {
295
+ "max_seconds": max_seconds,
296
+ "max_requests": max_requests,
297
+ "max_errors": max_errors,
298
+ "max_error_rate": max_error_rate,
299
+ "max_global_error_rate": max_global_error_rate,
300
+ }.items():
301
+ if val is not None:
302
+ constraints[key] = val
303
+ if not isinstance(profile, Profile):
304
+ profile = Profile.create(
305
+ rate_type=profile,
306
+ rate=rate,
307
+ random_seed=random_seed,
308
+ constraints={**constraints},
309
+ )
310
+ elif constraints:
311
+ raise ValueError(
312
+ "Constraints must be empty when providing a Profile instance. "
313
+ f"Provided constraints: {constraints} ; provided profile: {profile}"
314
+ )
315
+
316
+ if console_step:
317
+ console_step.finish(
318
+ title=f"{profile.__class__.__name__} profile resolved",
319
+ details=InfoMixin.extract_from_obj(profile),
320
+ status_level="success",
321
+ )
322
+
323
+ return profile
324
+
325
+
326
+ async def resolve_output_formats(
327
+ output_formats: OutputFormatT,
328
+ output_path: str | Path | None,
329
+ console: Console | None = None,
330
+ ) -> dict[str, GenerativeBenchmarkerOutput]:
331
+ """
332
+ Resolve output format specifications into configured output handler instances.
333
+
334
+ :param output_formats: Specification of desired output formats
335
+ :param output_path: Base path for output file generation, or None for default
336
+ :param console: Console instance for progress reporting, or None
337
+ :return: Dictionary mapping format names to configured output handler instances
338
+ """
339
+ console_step = (
340
+ console.print_update_step(title="Resolving output formats") if console else None
108
341
  )
109
- progress = (
110
- GenerativeTextBenchmarkerProgressDisplay(
111
- display_scheduler_stats=show_progress_scheduler_stats
342
+
343
+ resolved = GenerativeBenchmarkerOutput.resolve(
344
+ output_formats=output_formats, output_path=output_path
345
+ )
346
+
347
+ if console_step:
348
+ console_step.finish(
349
+ title="Output formats resolved",
350
+ details={key: str(val) for key, val in resolved.items()},
351
+ status_level="success",
112
352
  )
113
- if show_progress
114
- else None
353
+
354
+ return resolved
355
+
356
+
357
+ # Main Entrypoints Functions
358
+
359
+
360
+ async def benchmark_generative_text(
361
+ args: BenchmarkGenerativeTextArgs,
362
+ progress: GenerativeConsoleBenchmarkerProgress | None = None,
363
+ console: Console | None = None,
364
+ **constraints: dict[str, ConstraintInitializer | Any],
365
+ ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
366
+ """
367
+ Execute a comprehensive generative text benchmarking workflow.
368
+
369
+ Orchestrates the full benchmarking pipeline by resolving all components (backend,
370
+ data loader, profile, outputs) from provided arguments, executing the benchmark
371
+ runs, and finalizing results in the specified output formats.
372
+
373
+ :param args: Configuration arguments for the benchmark execution
374
+ :param progress: Progress tracker for benchmark execution, or None for no tracking
375
+ :param console: Console instance for status reporting, or None for silent operation
376
+ :param constraints: Additional constraint initializers for benchmark limits
377
+ :return: Tuple of GenerativeBenchmarksReport and dictionary of output format results
378
+ """
379
+ backend, model = await resolve_backend(
380
+ backend=args.backend,
381
+ target=args.target,
382
+ model=args.model,
383
+ console=console,
384
+ **(args.backend_kwargs or {}),
115
385
  )
116
- report = GenerativeBenchmarksReport()
386
+ processor = await resolve_processor(
387
+ processor=args.processor, model=model, console=console
388
+ )
389
+ request_loader = await resolve_request_loader(
390
+ data=args.data,
391
+ model=model,
392
+ data_args=args.data_args,
393
+ data_samples=args.data_samples,
394
+ processor=processor,
395
+ processor_args=args.processor_args,
396
+ data_column_mapper=args.data_column_mapper,
397
+ data_request_formatter=args.data_request_formatter,
398
+ data_collator=args.data_collator,
399
+ data_sampler=args.data_sampler,
400
+ data_num_workers=args.data_num_workers,
401
+ random_seed=args.random_seed,
402
+ console=console,
403
+ **(args.dataloader_kwargs or {}),
404
+ )
405
+ profile = await resolve_profile(
406
+ profile=args.profile,
407
+ rate=args.rate,
408
+ random_seed=args.random_seed,
409
+ constraints=constraints,
410
+ max_seconds=args.max_seconds,
411
+ max_requests=args.max_requests,
412
+ max_errors=args.max_errors,
413
+ max_error_rate=args.max_error_rate,
414
+ max_global_error_rate=args.max_global_error_rate,
415
+ console=console,
416
+ )
417
+ output_formats = await resolve_output_formats(
418
+ output_formats=args.output_formats,
419
+ output_path=args.output_path,
420
+ console=console,
421
+ )
422
+
423
+ report = GenerativeBenchmarksReport(args=args)
424
+ if console:
425
+ console.print_update(
426
+ title="Setup complete, starting benchmarks...", status="success"
427
+ )
428
+ console.print("\n\n")
117
429
 
118
- async for result in benchmarker.run(
430
+ benchmarker: Benchmarker[
431
+ GenerativeBenchmark, GenerationRequest, GenerationResponse
432
+ ] = Benchmarker()
433
+ async for benchmark in benchmarker.run(
434
+ benchmark_class=args.benchmark_cls,
435
+ requests=request_loader,
436
+ backend=backend,
119
437
  profile=profile,
120
- max_number_per_strategy=max_requests,
121
- max_duration_per_strategy=max_seconds,
122
- warmup_percent_per_strategy=warmup_percent,
123
- cooldown_percent_per_strategy=cooldown_percent,
438
+ environment=NonDistributedEnvironment(),
439
+ progress=progress,
440
+ sample_requests=args.sample_requests,
441
+ warmup=args.warmup,
442
+ cooldown=args.cooldown,
443
+ prefer_response_metrics=args.prefer_response_metrics,
124
444
  ):
125
- if progress:
126
- progress.update(result)
127
-
128
- if result.type_ == "benchmark_compiled":
129
- if result.current_benchmark is None:
130
- raise ValueError("Current benchmark is None")
131
- report.benchmarks.append(
132
- result.current_benchmark.set_sample_size(output_sampling)
133
- )
445
+ if benchmark:
446
+ report.benchmarks.append(benchmark)
134
447
 
135
- if output_console:
136
- console.benchmarks = report.benchmarks
137
- console.print_full_report()
448
+ output_format_results = {}
449
+ for key, output in output_formats.items():
450
+ output_result = await output.finalize(report)
451
+ output_format_results[key] = output_result
138
452
 
139
- if output_path:
140
- console.print_line("\nSaving benchmarks report...")
141
- saved_path = report.save_file(output_path)
142
- console.print_line(f"Benchmarks report saved to {saved_path}")
143
- else:
144
- saved_path = None
145
-
146
- console.print_line("\nBenchmarking complete.")
453
+ if console:
454
+ console.print("\n\n")
455
+ console.print_update(
456
+ title=(
457
+ "Benchmarking complete, generated "
458
+ f"{len(report.benchmarks)} benchmark(s)"
459
+ ),
460
+ status="success",
461
+ )
462
+ for key, value in output_format_results.items():
463
+ console.print_update(title=f" {key:<8}: {value}", status="debug")
147
464
 
148
- return report, saved_path
465
+ return report, output_format_results
149
466
 
150
467
 
151
- def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
468
+ async def reimport_benchmarks_report(
469
+ file: Path,
470
+ output_path: Path | None,
471
+ output_formats: OutputFormatT = ("console", "json", "html", "csv"),
472
+ ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
152
473
  """
153
- The command-line entry point for re-importing and displaying an
154
- existing benchmarks report. Can also specify
155
- Assumes the file provided exists.
474
+ Load and re-export an existing benchmarks report in specified formats.
475
+
476
+ :param file: Path to the existing benchmark report file to load
477
+ :param output_path: Base path for output file generation, or None for default
478
+ :param output_formats: Specification of desired output formats for the report
479
+ :return: Tuple of loaded GenerativeBenchmarksReport and dictionary of output results
156
480
  """
157
- console = GenerativeBenchmarksConsole(enabled=True)
158
- report = GenerativeBenchmarksReport.load_file(file)
159
- console.benchmarks = report.benchmarks
160
- console.print_full_report()
161
-
162
- if output_path:
163
- console.print_line("\nSaving benchmarks report...")
164
- saved_path = report.save_file(output_path)
165
- console.print_line(f"Benchmarks report saved to {saved_path}")
481
+ console = Console()
482
+
483
+ with console.print_update_step(
484
+ title=f"Loading benchmarks from {file}..."
485
+ ) as console_step:
486
+ report = GenerativeBenchmarksReport.load_file(file)
487
+ console_step.finish(
488
+ "Import of old benchmarks complete;"
489
+ f" loaded {len(report.benchmarks)} benchmark(s)"
490
+ )
491
+
492
+ output_formats = await resolve_output_formats(
493
+ output_formats, output_path, console=console
494
+ )
495
+ output_format_results = {}
496
+ for key, output in output_formats.items():
497
+ output_result = await output.finalize(report)
498
+ output_format_results[key] = output_result
499
+
500
+ for key, value in output_format_results.items():
501
+ console.print_update(title=f" {key:<8}: {value}", status="debug")
502
+
503
+ return report, output_format_results