guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +452 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +150 -317
- guidellm/benchmark/entrypoints.py +467 -128
- guidellm/benchmark/output.py +519 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2086 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +144 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +348 -0
- guidellm/data/loaders.py +149 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +404 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +31 -0
- guidellm/data/processor.py +31 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +226 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +71 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
- guidellm-0.4.0a169.dist-info/RECORD +95 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
|
@@ -1,165 +1,504 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
High-level entry points for executing generative text benchmarks.
|
|
3
|
+
|
|
4
|
+
This module provides the primary interface for running generative text benchmarks
|
|
5
|
+
through the `benchmark_generative_text` function and re-importing existing benchmark
|
|
6
|
+
reports via `reimport_benchmarks_report`. It orchestrates the initialization and
|
|
7
|
+
coordination of backends, data loaders, profiles, and output formats to execute
|
|
8
|
+
comprehensive benchmarking workflows. The module handles all resolution logic for
|
|
9
|
+
converting user-provided arguments into fully configured components ready for
|
|
10
|
+
benchmarking execution.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from collections.abc import Callable
|
|
2
16
|
from pathlib import Path
|
|
3
|
-
from typing import Any, Literal
|
|
17
|
+
from typing import Any, Literal
|
|
4
18
|
|
|
5
|
-
from
|
|
6
|
-
from transformers import
|
|
7
|
-
|
|
8
|
-
)
|
|
19
|
+
from torch.utils.data import Sampler
|
|
20
|
+
from transformers import PreTrainedTokenizerBase
|
|
21
|
+
from typing_extensions import TypeAliasType
|
|
9
22
|
|
|
10
|
-
from guidellm.
|
|
11
|
-
from guidellm.benchmark.benchmarker import
|
|
12
|
-
from guidellm.benchmark.output import
|
|
13
|
-
|
|
23
|
+
from guidellm.backends import Backend, BackendType
|
|
24
|
+
from guidellm.benchmark.benchmarker import Benchmarker
|
|
25
|
+
from guidellm.benchmark.output import GenerativeBenchmarkerOutput
|
|
26
|
+
from guidellm.benchmark.profile import Profile, ProfileType
|
|
27
|
+
from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
|
|
28
|
+
from guidellm.benchmark.schemas import (
|
|
29
|
+
BenchmarkGenerativeTextArgs,
|
|
30
|
+
GenerativeBenchmark,
|
|
14
31
|
GenerativeBenchmarksReport,
|
|
15
32
|
)
|
|
16
|
-
from guidellm.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
33
|
+
from guidellm.data import (
|
|
34
|
+
DataLoader,
|
|
35
|
+
DatasetPreprocessor,
|
|
36
|
+
GenerativeRequestCollator,
|
|
37
|
+
PreprocessorRegistry,
|
|
38
|
+
ProcessorFactory,
|
|
39
|
+
)
|
|
40
|
+
from guidellm.data.preprocessors import GenerativeColumnMapper
|
|
41
|
+
from guidellm.scheduler import (
|
|
42
|
+
ConstraintInitializer,
|
|
43
|
+
NonDistributedEnvironment,
|
|
44
|
+
StrategyType,
|
|
45
|
+
)
|
|
46
|
+
from guidellm.schemas import GenerationRequest, GenerationResponse
|
|
47
|
+
from guidellm.utils import Console, InfoMixin
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"benchmark_generative_text",
|
|
51
|
+
"reimport_benchmarks_report",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Helper Functions
|
|
56
|
+
|
|
57
|
+
OutputFormatT = TypeAliasType(
|
|
58
|
+
"OutputFormatT",
|
|
59
|
+
tuple[str, ...]
|
|
60
|
+
| list[str]
|
|
61
|
+
| dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
|
|
62
|
+
| None,
|
|
63
|
+
)
|
|
21
64
|
|
|
65
|
+
ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
|
|
22
66
|
|
|
23
|
-
|
|
67
|
+
|
|
68
|
+
async def resolve_backend(
|
|
69
|
+
backend: BackendType | Backend,
|
|
70
|
+
target: str,
|
|
71
|
+
model: str | None,
|
|
72
|
+
console: Console | None = None,
|
|
73
|
+
**backend_kwargs: dict[str, Any],
|
|
74
|
+
) -> tuple[Backend, str | None]:
|
|
24
75
|
"""
|
|
25
|
-
|
|
76
|
+
Initialize and validate a backend instance for benchmarking.
|
|
77
|
+
|
|
78
|
+
:param backend: Backend type identifier or pre-configured Backend instance
|
|
79
|
+
:param target: Target endpoint URL or connection string for the backend
|
|
80
|
+
:param model: Model identifier to use with the backend, or None to use default
|
|
81
|
+
:param console: Console instance for progress reporting, or None
|
|
82
|
+
:param backend_kwargs: Additional keyword arguments passed to backend initialization
|
|
83
|
+
:return: Tuple of initialized Backend instance and resolved model identifier
|
|
26
84
|
"""
|
|
85
|
+
console_step = (
|
|
86
|
+
console.print_update_step(title=f"Initializing backend {backend}")
|
|
87
|
+
if console
|
|
88
|
+
else None
|
|
89
|
+
)
|
|
90
|
+
backend = (
|
|
91
|
+
Backend.create(backend, target=target, model=model, **(backend_kwargs or {}))
|
|
92
|
+
if not isinstance(backend, Backend)
|
|
93
|
+
else backend
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if console_step:
|
|
97
|
+
console_step.update(f"{backend.__class__.__name__} backend initialized")
|
|
98
|
+
|
|
99
|
+
await backend.process_startup()
|
|
100
|
+
await backend.validate()
|
|
101
|
+
|
|
102
|
+
if model is None:
|
|
103
|
+
if console_step:
|
|
104
|
+
console_step.update(
|
|
105
|
+
title="Resolving default model from backend.default_model",
|
|
106
|
+
status_level="info",
|
|
107
|
+
)
|
|
108
|
+
model = await backend.default_model()
|
|
109
|
+
|
|
110
|
+
await backend.process_shutdown()
|
|
111
|
+
|
|
112
|
+
if console_step:
|
|
113
|
+
console_step.finish(
|
|
114
|
+
title=(
|
|
115
|
+
f"{backend.__class__.__name__} backend validated with model {model}"
|
|
116
|
+
),
|
|
117
|
+
details=backend.info,
|
|
118
|
+
status_level="success",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return backend, model
|
|
122
|
+
|
|
27
123
|
|
|
28
|
-
|
|
29
|
-
|
|
124
|
+
async def resolve_processor(
|
|
125
|
+
processor: ProcessorInputT | None,
|
|
126
|
+
model: str | None,
|
|
127
|
+
console: Console | None = None,
|
|
128
|
+
) -> ProcessorInputT | None:
|
|
129
|
+
"""
|
|
130
|
+
Resolve the processor for tokenization, defaulting to model if not provided.
|
|
131
|
+
|
|
132
|
+
:param processor: Processor identifier, path, tokenizer instance, or None
|
|
133
|
+
:param model: Model identifier to use as fallback processor
|
|
134
|
+
:param console: Console instance for progress reporting, or None
|
|
135
|
+
:return: Resolved processor or None if neither processor nor model provided
|
|
136
|
+
"""
|
|
137
|
+
console_step = (
|
|
138
|
+
console.print_update_step(title=f"Resolving processor {processor}")
|
|
139
|
+
if console
|
|
140
|
+
else None
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if processor is not None:
|
|
144
|
+
if console_step:
|
|
145
|
+
console_step.finish(
|
|
146
|
+
title="Processor resolved",
|
|
147
|
+
details=f"Using processor '{processor}'",
|
|
148
|
+
status_level="success",
|
|
149
|
+
)
|
|
30
150
|
else:
|
|
31
|
-
|
|
151
|
+
processor = model
|
|
152
|
+
if console_step:
|
|
153
|
+
console_step.finish(
|
|
154
|
+
title="Processor resolved",
|
|
155
|
+
details=f"Using model '{processor}' as processor",
|
|
156
|
+
status_level="success",
|
|
157
|
+
)
|
|
32
158
|
|
|
159
|
+
return processor
|
|
33
160
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
],
|
|
50
|
-
data_args: Optional[dict[str, Any]],
|
|
51
|
-
data_sampler: Optional[Literal["random"]],
|
|
52
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
53
|
-
rate: Optional[Union[float, list[float]]],
|
|
54
|
-
max_seconds: Optional[float],
|
|
55
|
-
max_requests: Optional[int],
|
|
56
|
-
warmup_percent: Optional[float],
|
|
57
|
-
cooldown_percent: Optional[float],
|
|
58
|
-
output_path: Optional[Union[str, Path]],
|
|
59
|
-
output_extras: Optional[dict[str, Any]],
|
|
60
|
-
output_sampling: Optional[int],
|
|
161
|
+
|
|
162
|
+
async def resolve_request_loader(
|
|
163
|
+
data: list[Any],
|
|
164
|
+
model: str | None,
|
|
165
|
+
data_args: list[dict[str, Any]] | None,
|
|
166
|
+
data_samples: int,
|
|
167
|
+
processor: ProcessorInputT | None,
|
|
168
|
+
processor_args: dict[str, Any] | None,
|
|
169
|
+
data_column_mapper: (
|
|
170
|
+
DatasetPreprocessor | dict[str, str] | Literal["generative_column_mapper"]
|
|
171
|
+
),
|
|
172
|
+
data_request_formatter: (DatasetPreprocessor | dict[str, str] | str),
|
|
173
|
+
data_collator: Callable | Literal["generative"] | None,
|
|
174
|
+
data_sampler: Sampler[int] | Literal["shuffle"] | None,
|
|
175
|
+
data_num_workers: int | None,
|
|
61
176
|
random_seed: int,
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
177
|
+
console: Console | None = None,
|
|
178
|
+
**dataloader_kwargs: dict[str, Any] | None,
|
|
179
|
+
) -> DataLoader[GenerationRequest]:
|
|
180
|
+
"""
|
|
181
|
+
Construct a DataLoader for GenerationRequest objects from raw data inputs.
|
|
182
|
+
|
|
183
|
+
:param data: List of data sources to load requests from
|
|
184
|
+
:param model: Model identifier for request formatting
|
|
185
|
+
:param data_args: Arguments for each data source in the data list
|
|
186
|
+
:param data_samples: Number of samples to draw from the dataset
|
|
187
|
+
:param processor: Processor for tokenization operations
|
|
188
|
+
:param processor_args: Arguments for processor initialization
|
|
189
|
+
:param data_column_mapper: Preprocessor or mapping for standardizing column names
|
|
190
|
+
:param data_request_formatter: Preprocessor or config for formatting requests
|
|
191
|
+
:param data_collator: Collation function or type for batching requests
|
|
192
|
+
:param data_sampler: Sampler instance or type for data sampling
|
|
193
|
+
:param data_num_workers: Number of worker processes for data loading
|
|
194
|
+
:param random_seed: Seed for reproducible random operations
|
|
195
|
+
:param console: Console instance for progress reporting, or None
|
|
196
|
+
:param dataloader_kwargs: Additional arguments passed to DataLoader initialization
|
|
197
|
+
:return: Configured DataLoader instance for GenerationRequest objects
|
|
198
|
+
"""
|
|
199
|
+
console_step = (
|
|
200
|
+
console.print_update_step(title=f"Initializing request loader from {data}")
|
|
201
|
+
if console
|
|
202
|
+
else None
|
|
74
203
|
)
|
|
75
204
|
|
|
76
|
-
if
|
|
77
|
-
|
|
205
|
+
if not isinstance(data_column_mapper, DatasetPreprocessor):
|
|
206
|
+
column_mappings = (
|
|
207
|
+
data_column_mapper if isinstance(data_column_mapper, dict) else None
|
|
208
|
+
)
|
|
209
|
+
data_column_mapper = GenerativeColumnMapper(
|
|
210
|
+
column_mappings=column_mappings,
|
|
211
|
+
)
|
|
212
|
+
if not isinstance(data_request_formatter, DatasetPreprocessor):
|
|
213
|
+
request_type = (
|
|
214
|
+
data_request_formatter
|
|
215
|
+
if isinstance(data_request_formatter, str)
|
|
216
|
+
else data_request_formatter.pop("request_type", "chat_completions")
|
|
217
|
+
)
|
|
218
|
+
data_request_formatter = PreprocessorRegistry.get_registered_object(
|
|
219
|
+
request_type
|
|
220
|
+
)(
|
|
221
|
+
model=model,
|
|
222
|
+
**(
|
|
223
|
+
data_request_formatter
|
|
224
|
+
if isinstance(data_request_formatter, dict)
|
|
225
|
+
else {}
|
|
226
|
+
),
|
|
227
|
+
)
|
|
78
228
|
|
|
79
|
-
|
|
80
|
-
request_loader = GenerativeRequestLoader(
|
|
229
|
+
request_loader = DataLoader(
|
|
81
230
|
data=data,
|
|
82
231
|
data_args=data_args,
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
232
|
+
data_samples=data_samples,
|
|
233
|
+
processor_factory=ProcessorFactory(
|
|
234
|
+
processor=processor, processor_args=processor_args
|
|
235
|
+
),
|
|
236
|
+
preprocessors=[data_column_mapper, data_request_formatter],
|
|
237
|
+
collator=(
|
|
238
|
+
data_collator if callable(data_collator) else GenerativeRequestCollator()
|
|
90
239
|
),
|
|
240
|
+
sampler=data_sampler,
|
|
241
|
+
num_workers=data_num_workers,
|
|
91
242
|
random_seed=random_seed,
|
|
243
|
+
**(dataloader_kwargs or {}),
|
|
92
244
|
)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
245
|
+
|
|
246
|
+
if console_step:
|
|
247
|
+
console_step.finish(
|
|
248
|
+
title=(
|
|
249
|
+
f"Request loader initialized with "
|
|
250
|
+
f"{data_samples if data_samples > 0 else 'inf'} "
|
|
251
|
+
f"unique requests from {data}"
|
|
252
|
+
),
|
|
253
|
+
details=InfoMixin.extract_from_obj(request_loader),
|
|
254
|
+
status_level="success",
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return request_loader
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
async def resolve_profile(
|
|
261
|
+
profile: StrategyType | ProfileType | Profile,
|
|
262
|
+
rate: float | list[float] | None,
|
|
263
|
+
random_seed: int,
|
|
264
|
+
constraints: dict[str, ConstraintInitializer | Any],
|
|
265
|
+
max_seconds: int | float | None,
|
|
266
|
+
max_requests: int | None,
|
|
267
|
+
max_errors: int | None,
|
|
268
|
+
max_error_rate: float | None,
|
|
269
|
+
max_global_error_rate: float | None,
|
|
270
|
+
console: Console | None = None,
|
|
271
|
+
) -> Profile:
|
|
272
|
+
"""
|
|
273
|
+
Resolve and configure a benchmark profile with rate and constraint settings.
|
|
274
|
+
|
|
275
|
+
:param profile: Profile type identifier or pre-configured Profile instance
|
|
276
|
+
:param rate: Request rate(s) for the benchmark execution
|
|
277
|
+
:param random_seed: Seed for reproducible random operations
|
|
278
|
+
:param constraints: Dictionary of constraint initializers for benchmark limits
|
|
279
|
+
:param max_seconds: Maximum duration in seconds for the benchmark
|
|
280
|
+
:param max_requests: Maximum number of requests to process
|
|
281
|
+
:param max_errors: Maximum number of errors before stopping
|
|
282
|
+
:param max_error_rate: Maximum error rate threshold before stopping
|
|
283
|
+
:param max_global_error_rate: Maximum global error rate threshold before stopping
|
|
284
|
+
:param console: Console instance for progress reporting, or None
|
|
285
|
+
:return: Configured Profile instance ready for benchmarking
|
|
286
|
+
:raises ValueError: If constraints are provided with a pre-configured Profile
|
|
287
|
+
"""
|
|
288
|
+
console_step = (
|
|
289
|
+
console.print_update_step(title=f"Resolving profile {profile}")
|
|
290
|
+
if console
|
|
291
|
+
else None
|
|
98
292
|
)
|
|
99
293
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
294
|
+
for key, val in {
|
|
295
|
+
"max_seconds": max_seconds,
|
|
296
|
+
"max_requests": max_requests,
|
|
297
|
+
"max_errors": max_errors,
|
|
298
|
+
"max_error_rate": max_error_rate,
|
|
299
|
+
"max_global_error_rate": max_global_error_rate,
|
|
300
|
+
}.items():
|
|
301
|
+
if val is not None:
|
|
302
|
+
constraints[key] = val
|
|
303
|
+
if not isinstance(profile, Profile):
|
|
304
|
+
profile = Profile.create(
|
|
305
|
+
rate_type=profile,
|
|
306
|
+
rate=rate,
|
|
307
|
+
random_seed=random_seed,
|
|
308
|
+
constraints={**constraints},
|
|
309
|
+
)
|
|
310
|
+
elif constraints:
|
|
311
|
+
raise ValueError(
|
|
312
|
+
"Constraints must be empty when providing a Profile instance. "
|
|
313
|
+
f"Provided constraints: {constraints} ; provided profile: {profile}"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if console_step:
|
|
317
|
+
console_step.finish(
|
|
318
|
+
title=f"{profile.__class__.__name__} profile resolved",
|
|
319
|
+
details=InfoMixin.extract_from_obj(profile),
|
|
320
|
+
status_level="success",
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
return profile
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
async def resolve_output_formats(
|
|
327
|
+
output_formats: OutputFormatT,
|
|
328
|
+
output_path: str | Path | None,
|
|
329
|
+
console: Console | None = None,
|
|
330
|
+
) -> dict[str, GenerativeBenchmarkerOutput]:
|
|
331
|
+
"""
|
|
332
|
+
Resolve output format specifications into configured output handler instances.
|
|
333
|
+
|
|
334
|
+
:param output_formats: Specification of desired output formats
|
|
335
|
+
:param output_path: Base path for output file generation, or None for default
|
|
336
|
+
:param console: Console instance for progress reporting, or None
|
|
337
|
+
:return: Dictionary mapping format names to configured output handler instances
|
|
338
|
+
"""
|
|
339
|
+
console_step = (
|
|
340
|
+
console.print_update_step(title="Resolving output formats") if console else None
|
|
108
341
|
)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
342
|
+
|
|
343
|
+
resolved = GenerativeBenchmarkerOutput.resolve(
|
|
344
|
+
output_formats=output_formats, output_path=output_path
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if console_step:
|
|
348
|
+
console_step.finish(
|
|
349
|
+
title="Output formats resolved",
|
|
350
|
+
details={key: str(val) for key, val in resolved.items()},
|
|
351
|
+
status_level="success",
|
|
112
352
|
)
|
|
113
|
-
|
|
114
|
-
|
|
353
|
+
|
|
354
|
+
return resolved
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# Main Entrypoints Functions
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
async def benchmark_generative_text(
|
|
361
|
+
args: BenchmarkGenerativeTextArgs,
|
|
362
|
+
progress: GenerativeConsoleBenchmarkerProgress | None = None,
|
|
363
|
+
console: Console | None = None,
|
|
364
|
+
**constraints: dict[str, ConstraintInitializer | Any],
|
|
365
|
+
) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
|
|
366
|
+
"""
|
|
367
|
+
Execute a comprehensive generative text benchmarking workflow.
|
|
368
|
+
|
|
369
|
+
Orchestrates the full benchmarking pipeline by resolving all components (backend,
|
|
370
|
+
data loader, profile, outputs) from provided arguments, executing the benchmark
|
|
371
|
+
runs, and finalizing results in the specified output formats.
|
|
372
|
+
|
|
373
|
+
:param args: Configuration arguments for the benchmark execution
|
|
374
|
+
:param progress: Progress tracker for benchmark execution, or None for no tracking
|
|
375
|
+
:param console: Console instance for status reporting, or None for silent operation
|
|
376
|
+
:param constraints: Additional constraint initializers for benchmark limits
|
|
377
|
+
:return: Tuple of GenerativeBenchmarksReport and dictionary of output format results
|
|
378
|
+
"""
|
|
379
|
+
backend, model = await resolve_backend(
|
|
380
|
+
backend=args.backend,
|
|
381
|
+
target=args.target,
|
|
382
|
+
model=args.model,
|
|
383
|
+
console=console,
|
|
384
|
+
**(args.backend_kwargs or {}),
|
|
115
385
|
)
|
|
116
|
-
|
|
386
|
+
processor = await resolve_processor(
|
|
387
|
+
processor=args.processor, model=model, console=console
|
|
388
|
+
)
|
|
389
|
+
request_loader = await resolve_request_loader(
|
|
390
|
+
data=args.data,
|
|
391
|
+
model=model,
|
|
392
|
+
data_args=args.data_args,
|
|
393
|
+
data_samples=args.data_samples,
|
|
394
|
+
processor=processor,
|
|
395
|
+
processor_args=args.processor_args,
|
|
396
|
+
data_column_mapper=args.data_column_mapper,
|
|
397
|
+
data_request_formatter=args.data_request_formatter,
|
|
398
|
+
data_collator=args.data_collator,
|
|
399
|
+
data_sampler=args.data_sampler,
|
|
400
|
+
data_num_workers=args.data_num_workers,
|
|
401
|
+
random_seed=args.random_seed,
|
|
402
|
+
console=console,
|
|
403
|
+
**(args.dataloader_kwargs or {}),
|
|
404
|
+
)
|
|
405
|
+
profile = await resolve_profile(
|
|
406
|
+
profile=args.profile,
|
|
407
|
+
rate=args.rate,
|
|
408
|
+
random_seed=args.random_seed,
|
|
409
|
+
constraints=constraints,
|
|
410
|
+
max_seconds=args.max_seconds,
|
|
411
|
+
max_requests=args.max_requests,
|
|
412
|
+
max_errors=args.max_errors,
|
|
413
|
+
max_error_rate=args.max_error_rate,
|
|
414
|
+
max_global_error_rate=args.max_global_error_rate,
|
|
415
|
+
console=console,
|
|
416
|
+
)
|
|
417
|
+
output_formats = await resolve_output_formats(
|
|
418
|
+
output_formats=args.output_formats,
|
|
419
|
+
output_path=args.output_path,
|
|
420
|
+
console=console,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
report = GenerativeBenchmarksReport(args=args)
|
|
424
|
+
if console:
|
|
425
|
+
console.print_update(
|
|
426
|
+
title="Setup complete, starting benchmarks...", status="success"
|
|
427
|
+
)
|
|
428
|
+
console.print("\n\n")
|
|
117
429
|
|
|
118
|
-
|
|
430
|
+
benchmarker: Benchmarker[
|
|
431
|
+
GenerativeBenchmark, GenerationRequest, GenerationResponse
|
|
432
|
+
] = Benchmarker()
|
|
433
|
+
async for benchmark in benchmarker.run(
|
|
434
|
+
benchmark_class=args.benchmark_cls,
|
|
435
|
+
requests=request_loader,
|
|
436
|
+
backend=backend,
|
|
119
437
|
profile=profile,
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
438
|
+
environment=NonDistributedEnvironment(),
|
|
439
|
+
data=args.data,
|
|
440
|
+
progress=progress,
|
|
441
|
+
sample_requests=args.sample_requests,
|
|
442
|
+
warmup=args.warmup,
|
|
443
|
+
cooldown=args.cooldown,
|
|
444
|
+
prefer_response_metrics=args.prefer_response_metrics,
|
|
124
445
|
):
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if result.type_ == "benchmark_compiled":
|
|
129
|
-
if result.current_benchmark is None:
|
|
130
|
-
raise ValueError("Current benchmark is None")
|
|
131
|
-
report.benchmarks.append(
|
|
132
|
-
result.current_benchmark.set_sample_size(output_sampling)
|
|
133
|
-
)
|
|
446
|
+
if benchmark:
|
|
447
|
+
report.benchmarks.append(benchmark)
|
|
134
448
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
449
|
+
output_format_results = {}
|
|
450
|
+
for key, output in output_formats.items():
|
|
451
|
+
output_result = await output.finalize(report)
|
|
452
|
+
output_format_results[key] = output_result
|
|
138
453
|
|
|
139
|
-
if
|
|
140
|
-
console.
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
454
|
+
if console:
|
|
455
|
+
console.print("\n\n")
|
|
456
|
+
console.print_update(
|
|
457
|
+
title=(
|
|
458
|
+
"Benchmarking complete, generated "
|
|
459
|
+
f"{len(report.benchmarks)} benchmark(s)"
|
|
460
|
+
),
|
|
461
|
+
status="success",
|
|
462
|
+
)
|
|
463
|
+
for key, value in output_format_results.items():
|
|
464
|
+
console.print_update(title=f" {key:<8}: {value}", status="debug")
|
|
147
465
|
|
|
148
|
-
return report,
|
|
466
|
+
return report, output_format_results
|
|
149
467
|
|
|
150
468
|
|
|
151
|
-
def reimport_benchmarks_report(
|
|
469
|
+
async def reimport_benchmarks_report(
|
|
470
|
+
file: Path,
|
|
471
|
+
output_path: Path | None,
|
|
472
|
+
output_formats: OutputFormatT = ("console", "json", "html", "csv"),
|
|
473
|
+
) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
|
|
152
474
|
"""
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
475
|
+
Load and re-export an existing benchmarks report in specified formats.
|
|
476
|
+
|
|
477
|
+
:param file: Path to the existing benchmark report file to load
|
|
478
|
+
:param output_path: Base path for output file generation, or None for default
|
|
479
|
+
:param output_formats: Specification of desired output formats for the report
|
|
480
|
+
:return: Tuple of loaded GenerativeBenchmarksReport and dictionary of output results
|
|
156
481
|
"""
|
|
157
|
-
console =
|
|
158
|
-
|
|
159
|
-
console.
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
482
|
+
console = Console()
|
|
483
|
+
|
|
484
|
+
with console.print_update_step(
|
|
485
|
+
title=f"Loading benchmarks from {file}..."
|
|
486
|
+
) as console_step:
|
|
487
|
+
report = GenerativeBenchmarksReport.load_file(file)
|
|
488
|
+
console_step.finish(
|
|
489
|
+
"Import of old benchmarks complete;"
|
|
490
|
+
f" loaded {len(report.benchmarks)} benchmark(s)"
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
output_formats = await resolve_output_formats(
|
|
494
|
+
output_formats, output_path, console=console
|
|
495
|
+
)
|
|
496
|
+
output_format_results = {}
|
|
497
|
+
for key, output in output_formats.items():
|
|
498
|
+
output_result = await output.finalize(report)
|
|
499
|
+
output_format_results[key] = output_result
|
|
500
|
+
|
|
501
|
+
for key, value in output_format_results.items():
|
|
502
|
+
console.print_update(title=f" {key:<8}: {value}", status="debug")
|
|
503
|
+
|
|
504
|
+
return report, output_format_results
|