guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -1,165 +1,598 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Primary interface for executing and re-importing generative text benchmarks.
|
|
3
|
+
|
|
4
|
+
This module orchestrates comprehensive benchmarking workflows by coordinating backend
|
|
5
|
+
initialization, data loading, profile configuration, and output generation. It provides
|
|
6
|
+
two main entry points: `benchmark_generative_text` for executing new benchmarks and
|
|
7
|
+
`reimport_benchmarks_report` for re-exporting existing results. The resolution functions
|
|
8
|
+
convert user-provided arguments into fully configured components, handling backend
|
|
9
|
+
validation, data preprocessing, profile constraints, and output format specifications.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections.abc import Callable, Mapping, MutableMapping
|
|
2
15
|
from pathlib import Path
|
|
3
|
-
from typing import Any, Literal
|
|
16
|
+
from typing import Any, Literal
|
|
4
17
|
|
|
5
|
-
from
|
|
6
|
-
from transformers import
|
|
7
|
-
|
|
8
|
-
)
|
|
18
|
+
from torch.utils.data import Sampler
|
|
19
|
+
from transformers import PreTrainedTokenizerBase
|
|
20
|
+
from typing_extensions import TypeAliasType
|
|
9
21
|
|
|
10
|
-
from guidellm.
|
|
11
|
-
from guidellm.benchmark.benchmarker import
|
|
12
|
-
from guidellm.benchmark.
|
|
13
|
-
|
|
22
|
+
from guidellm.backends import Backend, BackendType
|
|
23
|
+
from guidellm.benchmark.benchmarker import Benchmarker
|
|
24
|
+
from guidellm.benchmark.outputs import (
|
|
25
|
+
GenerativeBenchmarkerConsole,
|
|
26
|
+
GenerativeBenchmarkerOutput,
|
|
27
|
+
)
|
|
28
|
+
from guidellm.benchmark.profiles import Profile, ProfileType
|
|
29
|
+
from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
|
|
30
|
+
from guidellm.benchmark.schemas import (
|
|
31
|
+
BenchmarkGenerativeTextArgs,
|
|
32
|
+
GenerativeBenchmark,
|
|
33
|
+
GenerativeBenchmarkAccumulator,
|
|
14
34
|
GenerativeBenchmarksReport,
|
|
15
35
|
)
|
|
16
|
-
from guidellm.benchmark.
|
|
17
|
-
from guidellm.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
36
|
+
from guidellm.benchmark.schemas.base import TransientPhaseConfig
|
|
37
|
+
from guidellm.data import (
|
|
38
|
+
DataLoader,
|
|
39
|
+
DatasetPreprocessor,
|
|
40
|
+
GenerativeRequestCollator,
|
|
41
|
+
PreprocessorRegistry,
|
|
42
|
+
ProcessorFactory,
|
|
43
|
+
RequestFormatter,
|
|
44
|
+
)
|
|
45
|
+
from guidellm.data.preprocessors import GenerativeColumnMapper
|
|
46
|
+
from guidellm.scheduler import (
|
|
47
|
+
ConstraintInitializer,
|
|
48
|
+
NonDistributedEnvironment,
|
|
49
|
+
StrategyType,
|
|
50
|
+
)
|
|
51
|
+
from guidellm.schemas import GenerationRequest, GenerationResponse
|
|
52
|
+
from guidellm.settings import settings
|
|
53
|
+
from guidellm.utils import Console, InfoMixin
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"benchmark_generative_text",
|
|
57
|
+
"reimport_benchmarks_report",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Type Aliases
|
|
62
|
+
|
|
63
|
+
OutputFormatT = TypeAliasType(
|
|
64
|
+
"OutputFormatT",
|
|
65
|
+
tuple[str, ...]
|
|
66
|
+
| list[str]
|
|
67
|
+
| Mapping[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
|
|
68
|
+
| None,
|
|
69
|
+
)
|
|
70
|
+
"""Output format specification as strings, mappings, or configured output instances"""
|
|
71
|
+
|
|
72
|
+
ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
|
|
73
|
+
"""Processor input as model identifier, path to tokenizer, or tokenizer instance"""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Helper Functions
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
async def resolve_backend(
|
|
80
|
+
backend: BackendType | Backend,
|
|
81
|
+
target: str,
|
|
82
|
+
model: str | None,
|
|
83
|
+
console: Console | None = None,
|
|
84
|
+
**backend_kwargs: dict[str, Any],
|
|
85
|
+
) -> tuple[Backend, str]:
|
|
86
|
+
"""
|
|
87
|
+
Initialize and validate a backend instance for benchmarking execution.
|
|
88
|
+
|
|
89
|
+
Handles backend creation from type identifiers or pre-configured instances,
|
|
90
|
+
performs startup validation, and resolves the default model if not specified.
|
|
91
|
+
The backend is shut down after validation to ensure clean state for subsequent
|
|
92
|
+
benchmark execution.
|
|
93
|
+
|
|
94
|
+
:param backend: Backend type identifier or pre-configured Backend instance
|
|
95
|
+
:param target: Target endpoint URL or connection string for the backend
|
|
96
|
+
:param model: Model identifier to use with the backend, or None to use default
|
|
97
|
+
:param console: Console instance for progress reporting, or None
|
|
98
|
+
:param backend_kwargs: Additional keyword arguments passed to backend initialization
|
|
99
|
+
:return: Tuple of initialized Backend instance and resolved model identifier
|
|
100
|
+
"""
|
|
101
|
+
console_step = (
|
|
102
|
+
console.print_update_step(title=f"Initializing backend {backend}")
|
|
103
|
+
if console
|
|
104
|
+
else None
|
|
105
|
+
)
|
|
106
|
+
backend_instance = (
|
|
107
|
+
Backend.create(backend, target=target, model=model, **(backend_kwargs or {}))
|
|
108
|
+
if not isinstance(backend, Backend)
|
|
109
|
+
else backend
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if console_step:
|
|
113
|
+
console_step.update(
|
|
114
|
+
f"{backend_instance.__class__.__name__} backend initialized"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
await backend_instance.process_startup()
|
|
118
|
+
await backend_instance.validate()
|
|
119
|
+
|
|
120
|
+
if model is None:
|
|
121
|
+
if console_step:
|
|
122
|
+
console_step.update(
|
|
123
|
+
title="Resolving default model from backend.default_model",
|
|
124
|
+
status_level="info",
|
|
125
|
+
)
|
|
126
|
+
model = await backend_instance.default_model()
|
|
21
127
|
|
|
128
|
+
await backend_instance.process_shutdown()
|
|
22
129
|
|
|
23
|
-
|
|
130
|
+
if console_step:
|
|
131
|
+
console_step.finish(
|
|
132
|
+
title=(
|
|
133
|
+
f"{backend_instance.__class__.__name__} backend validated "
|
|
134
|
+
f"with model {model}"
|
|
135
|
+
),
|
|
136
|
+
details=backend_instance.info,
|
|
137
|
+
status_level="success",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return backend_instance, model
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def resolve_processor(
|
|
144
|
+
processor: ProcessorInputT | None,
|
|
145
|
+
model: str | None,
|
|
146
|
+
console: Console | None = None,
|
|
147
|
+
) -> ProcessorInputT | None:
|
|
24
148
|
"""
|
|
25
|
-
|
|
149
|
+
Resolve the tokenization processor, defaulting to model if not provided.
|
|
150
|
+
|
|
151
|
+
:param processor: Processor identifier, path, tokenizer instance, or None
|
|
152
|
+
:param model: Model identifier to use as fallback processor
|
|
153
|
+
:param console: Console instance for progress reporting, or None
|
|
154
|
+
:return: Resolved processor or None if neither processor nor model provided
|
|
26
155
|
"""
|
|
156
|
+
console_step = (
|
|
157
|
+
console.print_update_step(title=f"Resolving processor {processor}")
|
|
158
|
+
if console
|
|
159
|
+
else None
|
|
160
|
+
)
|
|
27
161
|
|
|
28
|
-
if
|
|
29
|
-
|
|
162
|
+
if processor is not None:
|
|
163
|
+
if console_step:
|
|
164
|
+
console_step.finish(
|
|
165
|
+
title="Processor resolved",
|
|
166
|
+
details=f"Using processor '{processor}'",
|
|
167
|
+
status_level="success",
|
|
168
|
+
)
|
|
30
169
|
else:
|
|
31
|
-
|
|
170
|
+
processor = model
|
|
171
|
+
if console_step:
|
|
172
|
+
console_step.finish(
|
|
173
|
+
title="Processor resolved",
|
|
174
|
+
details=f"Using model '{processor}' as processor",
|
|
175
|
+
status_level="success",
|
|
176
|
+
)
|
|
32
177
|
|
|
178
|
+
return processor
|
|
33
179
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
],
|
|
50
|
-
|
|
51
|
-
data_sampler: Optional[Literal["random"]],
|
|
52
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
53
|
-
rate: Optional[Union[float, list[float]]],
|
|
54
|
-
max_seconds: Optional[float],
|
|
55
|
-
max_requests: Optional[int],
|
|
56
|
-
warmup_percent: Optional[float],
|
|
57
|
-
cooldown_percent: Optional[float],
|
|
58
|
-
output_path: Optional[Union[str, Path]],
|
|
59
|
-
output_extras: Optional[dict[str, Any]],
|
|
60
|
-
output_sampling: Optional[int],
|
|
180
|
+
|
|
181
|
+
async def resolve_request_loader(
|
|
182
|
+
data: list[Any],
|
|
183
|
+
model: str,
|
|
184
|
+
data_args: list[dict[str, Any]] | None,
|
|
185
|
+
data_samples: int,
|
|
186
|
+
processor: ProcessorInputT | None,
|
|
187
|
+
processor_args: dict[str, Any] | None,
|
|
188
|
+
data_column_mapper: (
|
|
189
|
+
DatasetPreprocessor
|
|
190
|
+
| dict[str, str | list[str]]
|
|
191
|
+
| Literal["generative_column_mapper"]
|
|
192
|
+
),
|
|
193
|
+
data_request_formatter: (RequestFormatter | dict[str, str] | str),
|
|
194
|
+
data_collator: Callable | Literal["generative"] | None,
|
|
195
|
+
data_sampler: Sampler[int] | Literal["shuffle"] | None,
|
|
196
|
+
data_num_workers: int | None,
|
|
61
197
|
random_seed: int,
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
198
|
+
console: Console | None = None,
|
|
199
|
+
**dataloader_kwargs: dict[str, Any] | None,
|
|
200
|
+
) -> DataLoader[GenerationRequest]:
|
|
201
|
+
"""
|
|
202
|
+
Construct a DataLoader for GenerationRequest objects from raw data inputs.
|
|
203
|
+
|
|
204
|
+
Initializes and configures the data pipeline including column mapping, request
|
|
205
|
+
formatting, collation, and sampling. Resolves string-based preprocessor identifiers
|
|
206
|
+
from the PreprocessorRegistry and creates appropriate instances with provided
|
|
207
|
+
configurations.
|
|
208
|
+
|
|
209
|
+
:param data: List of data sources to load requests from
|
|
210
|
+
:param model: Model identifier for request formatting
|
|
211
|
+
:param data_args: Arguments for each data source in the data list
|
|
212
|
+
:param data_samples: Number of samples to draw from the dataset
|
|
213
|
+
:param processor: Processor for tokenization operations
|
|
214
|
+
:param processor_args: Arguments for processor initialization
|
|
215
|
+
:param data_column_mapper: Preprocessor or mapping for standardizing column names
|
|
216
|
+
:param data_request_formatter: Preprocessor or config for formatting requests
|
|
217
|
+
:param data_collator: Collation function or type for batching requests
|
|
218
|
+
:param data_sampler: Sampler instance or type for data sampling
|
|
219
|
+
:param data_num_workers: Number of worker processes for data loading
|
|
220
|
+
:param random_seed: Seed for reproducible random operations
|
|
221
|
+
:param console: Console instance for progress reporting, or None
|
|
222
|
+
:param dataloader_kwargs: Additional arguments passed to DataLoader initialization
|
|
223
|
+
:return: Configured DataLoader instance for GenerationRequest objects
|
|
224
|
+
:raises ValueError: If request formatter type is not registered in
|
|
225
|
+
PreprocessorRegistry
|
|
226
|
+
:raises TypeError: If registered request formatter is not a RequestFormatter
|
|
227
|
+
subclass
|
|
228
|
+
"""
|
|
229
|
+
console_step = (
|
|
230
|
+
console.print_update_step(title=f"Initializing request loader from {data}")
|
|
231
|
+
if console
|
|
232
|
+
else None
|
|
74
233
|
)
|
|
75
234
|
|
|
76
|
-
|
|
77
|
-
|
|
235
|
+
data_column_mapper_instance: DatasetPreprocessor
|
|
236
|
+
if isinstance(data_column_mapper, DatasetPreprocessor):
|
|
237
|
+
data_column_mapper_instance = data_column_mapper
|
|
238
|
+
else:
|
|
239
|
+
column_mappings = (
|
|
240
|
+
data_column_mapper if isinstance(data_column_mapper, dict) else None
|
|
241
|
+
)
|
|
242
|
+
data_column_mapper_instance = GenerativeColumnMapper(
|
|
243
|
+
column_mappings=column_mappings # type: ignore[arg-type]
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
data_request_formatter_instance: RequestFormatter
|
|
247
|
+
if isinstance(data_request_formatter, RequestFormatter):
|
|
248
|
+
data_request_formatter_instance = data_request_formatter
|
|
249
|
+
else:
|
|
250
|
+
if isinstance(data_request_formatter, str):
|
|
251
|
+
request_type = data_request_formatter
|
|
252
|
+
formatter_kwargs: dict[str, Any] = {}
|
|
253
|
+
else:
|
|
254
|
+
# Extract request_type from formatter dictionary
|
|
255
|
+
formatter_dict = dict(data_request_formatter)
|
|
256
|
+
request_type = formatter_dict.pop("request_type", settings.preferred_route)
|
|
257
|
+
formatter_kwargs = formatter_dict
|
|
78
258
|
|
|
79
|
-
|
|
80
|
-
|
|
259
|
+
if (
|
|
260
|
+
formatter_class := PreprocessorRegistry.get_registered_object(request_type)
|
|
261
|
+
) is None:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"Request formatter '{request_type}' is not registered in the "
|
|
264
|
+
f"PreprocessorRegistry."
|
|
265
|
+
)
|
|
266
|
+
if not issubclass(formatter_class, RequestFormatter):
|
|
267
|
+
raise TypeError(
|
|
268
|
+
f"Request formatter '{request_type}' is not a subclass of "
|
|
269
|
+
f"RequestFormatter."
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
data_request_formatter_instance = formatter_class(
|
|
273
|
+
model=model,
|
|
274
|
+
**formatter_kwargs,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Cast to proper types for the DataLoader preprocessors list
|
|
278
|
+
preprocessors_list: list[DatasetPreprocessor] = [
|
|
279
|
+
data_column_mapper_instance,
|
|
280
|
+
data_request_formatter_instance,
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
request_loader: DataLoader[GenerationRequest] = DataLoader(
|
|
81
284
|
data=data,
|
|
82
285
|
data_args=data_args,
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"finite" # assume a finite dataset is our limit
|
|
88
|
-
if max_requests is None and max_seconds is None
|
|
89
|
-
else "infinite" # default to infinite so we don't run out of data
|
|
286
|
+
data_samples=data_samples,
|
|
287
|
+
processor_factory=ProcessorFactory(
|
|
288
|
+
processor=processor if processor is not None else model,
|
|
289
|
+
processor_args=processor_args,
|
|
90
290
|
),
|
|
291
|
+
preprocessors=preprocessors_list,
|
|
292
|
+
collator=(
|
|
293
|
+
data_collator if callable(data_collator) else GenerativeRequestCollator()
|
|
294
|
+
),
|
|
295
|
+
sampler=data_sampler,
|
|
296
|
+
num_workers=data_num_workers,
|
|
91
297
|
random_seed=random_seed,
|
|
298
|
+
**(dataloader_kwargs or {}),
|
|
92
299
|
)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
300
|
+
|
|
301
|
+
if console_step:
|
|
302
|
+
console_step.finish(
|
|
303
|
+
title=(
|
|
304
|
+
f"Request loader initialized with "
|
|
305
|
+
f"{data_samples if data_samples > 0 else 'inf'} "
|
|
306
|
+
"unique requests"
|
|
307
|
+
),
|
|
308
|
+
details=InfoMixin.extract_from_obj(request_loader),
|
|
309
|
+
status_level="success",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
return request_loader
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
async def resolve_profile(
|
|
316
|
+
profile: StrategyType | ProfileType | Profile,
|
|
317
|
+
rate: list[float] | None,
|
|
318
|
+
random_seed: int,
|
|
319
|
+
rampup: float,
|
|
320
|
+
constraints: MutableMapping[str, ConstraintInitializer | Any],
|
|
321
|
+
max_seconds: int | float | None,
|
|
322
|
+
max_requests: int | None,
|
|
323
|
+
max_errors: int | None,
|
|
324
|
+
max_error_rate: float | None,
|
|
325
|
+
max_global_error_rate: float | None,
|
|
326
|
+
over_saturation: dict[str, Any] | None = None,
|
|
327
|
+
console: Console | None = None,
|
|
328
|
+
) -> Profile:
|
|
329
|
+
"""
|
|
330
|
+
Resolve and configure a benchmark profile with rate and constraint settings.
|
|
331
|
+
|
|
332
|
+
Constructs a Profile instance from type identifiers or validates pre-configured
|
|
333
|
+
profiles. Constraint parameters are merged into the constraints dictionary before
|
|
334
|
+
profile creation.
|
|
335
|
+
|
|
336
|
+
:param profile: Profile type identifier or pre-configured Profile instance
|
|
337
|
+
:param rate: Request rate(s) for the benchmark execution
|
|
338
|
+
:param random_seed: Seed for reproducible random operations
|
|
339
|
+
:param warmup: Warm-up phase configuration for the benchmark execution
|
|
340
|
+
(used for ramp-up duration calculation)
|
|
341
|
+
:param constraints: Dictionary of constraint initializers for benchmark limits
|
|
342
|
+
:param max_seconds: Maximum duration in seconds for the benchmark
|
|
343
|
+
:param max_requests: Maximum number of requests to process
|
|
344
|
+
:param max_errors: Maximum number of errors before stopping
|
|
345
|
+
:param max_error_rate: Maximum error rate threshold before stopping
|
|
346
|
+
:param max_global_error_rate: Maximum global error rate threshold before stopping
|
|
347
|
+
:param over_saturation: Over-saturation detection configuration (dict)
|
|
348
|
+
:param console: Console instance for progress reporting, or None
|
|
349
|
+
:return: Configured Profile instance ready for benchmarking
|
|
350
|
+
:raises ValueError: If constraints are provided with a pre-configured Profile
|
|
351
|
+
"""
|
|
352
|
+
console_step = (
|
|
353
|
+
console.print_update_step(title=f"Resolving profile {profile}")
|
|
354
|
+
if console
|
|
355
|
+
else None
|
|
98
356
|
)
|
|
99
357
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
358
|
+
for key, val in {
|
|
359
|
+
"max_seconds": max_seconds,
|
|
360
|
+
"max_requests": max_requests,
|
|
361
|
+
"max_errors": max_errors,
|
|
362
|
+
"max_error_rate": max_error_rate,
|
|
363
|
+
"max_global_error_rate": max_global_error_rate,
|
|
364
|
+
"over_saturation": over_saturation,
|
|
365
|
+
}.items():
|
|
366
|
+
if val is not None:
|
|
367
|
+
constraints[key] = val
|
|
368
|
+
|
|
369
|
+
if not isinstance(profile, Profile):
|
|
370
|
+
profile = Profile.create(
|
|
371
|
+
rate_type=profile,
|
|
372
|
+
rate=rate,
|
|
373
|
+
random_seed=random_seed,
|
|
374
|
+
rampup_duration=rampup,
|
|
375
|
+
constraints={**constraints},
|
|
376
|
+
)
|
|
377
|
+
elif constraints:
|
|
378
|
+
raise ValueError(
|
|
379
|
+
"Constraints must be empty when providing a Profile instance. "
|
|
380
|
+
f"Provided constraints: {constraints} ; provided profile: {profile}"
|
|
381
|
+
)
|
|
382
|
+
elif rampup > 0.0:
|
|
383
|
+
raise ValueError(
|
|
384
|
+
"Ramp-up duration must not be set when providing a Profile instance. "
|
|
385
|
+
f"Provided rampup: {rampup} ; provided profile: {profile}"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
if console_step:
|
|
389
|
+
console_step.finish(
|
|
390
|
+
title=f"{profile.__class__.__name__} profile resolved",
|
|
391
|
+
details=InfoMixin.extract_from_obj(profile),
|
|
392
|
+
status_level="success",
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
return profile
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
async def resolve_output_formats(
|
|
399
|
+
outputs: list[str] | tuple[str],
|
|
400
|
+
output_dir: str | Path | None,
|
|
401
|
+
console: Console | None = None,
|
|
402
|
+
) -> dict[str, GenerativeBenchmarkerOutput]:
|
|
403
|
+
"""
|
|
404
|
+
Resolve output format specifications into configured output handler instances.
|
|
405
|
+
|
|
406
|
+
:param outputs: Specification of desired output files/types
|
|
407
|
+
:param output_dir: Base path for output file generation, or None for default
|
|
408
|
+
:param console: Console instance for progress reporting, or None
|
|
409
|
+
:return: Dictionary mapping format names to configured output handler instances
|
|
410
|
+
"""
|
|
411
|
+
console_step = (
|
|
412
|
+
console.print_update_step(title="Resolving output formats") if console else None
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
resolved = GenerativeBenchmarkerOutput.resolve(
|
|
416
|
+
outputs=outputs, output_dir=output_dir
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
if console_step:
|
|
420
|
+
console_step.finish(
|
|
421
|
+
title="Output formats resolved",
|
|
422
|
+
details={key: str(val) for key, val in resolved.items()},
|
|
423
|
+
status_level="success",
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
return resolved
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# Main Entrypoints Functions
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
async def benchmark_generative_text(
|
|
433
|
+
args: BenchmarkGenerativeTextArgs,
|
|
434
|
+
progress: GenerativeConsoleBenchmarkerProgress | None = None,
|
|
435
|
+
console: Console | None = None,
|
|
436
|
+
**constraints: str | ConstraintInitializer | Any,
|
|
437
|
+
) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
|
|
438
|
+
"""
|
|
439
|
+
Execute a comprehensive generative text benchmarking workflow.
|
|
440
|
+
|
|
441
|
+
Orchestrates the full benchmarking pipeline by resolving all components from
|
|
442
|
+
provided arguments, executing benchmark runs across configured profiles, and
|
|
443
|
+
finalizing results in specified output formats. Components include backend
|
|
444
|
+
initialization, data loading, profile configuration, and output generation.
|
|
445
|
+
|
|
446
|
+
:param args: Configuration arguments for the benchmark execution
|
|
447
|
+
:param progress: Progress tracker for benchmark execution, or None for no tracking
|
|
448
|
+
:param console: Console instance for status reporting, or None for silent operation
|
|
449
|
+
:param constraints: Additional constraint initializers for benchmark limits
|
|
450
|
+
:return: Tuple of GenerativeBenchmarksReport and dictionary of output format
|
|
451
|
+
results
|
|
452
|
+
"""
|
|
453
|
+
backend, model = await resolve_backend(
|
|
454
|
+
backend=args.backend,
|
|
455
|
+
target=args.target,
|
|
456
|
+
model=args.model,
|
|
457
|
+
console=console,
|
|
458
|
+
**(args.backend_kwargs or {}),
|
|
459
|
+
)
|
|
460
|
+
processor = await resolve_processor(
|
|
461
|
+
processor=args.processor, model=model, console=console
|
|
462
|
+
)
|
|
463
|
+
request_loader = await resolve_request_loader(
|
|
464
|
+
data=args.data,
|
|
465
|
+
model=model,
|
|
466
|
+
data_args=args.data_args,
|
|
467
|
+
data_samples=args.data_samples,
|
|
106
468
|
processor=processor,
|
|
107
|
-
processor_args=processor_args,
|
|
469
|
+
processor_args=args.processor_args,
|
|
470
|
+
data_column_mapper=args.data_column_mapper,
|
|
471
|
+
data_request_formatter=args.data_request_formatter,
|
|
472
|
+
data_collator=args.data_collator,
|
|
473
|
+
data_sampler=args.data_sampler,
|
|
474
|
+
data_num_workers=args.data_num_workers,
|
|
475
|
+
random_seed=args.random_seed,
|
|
476
|
+
console=console,
|
|
477
|
+
**(args.dataloader_kwargs or {}),
|
|
108
478
|
)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
479
|
+
|
|
480
|
+
warmup = TransientPhaseConfig.create_from_value(args.warmup)
|
|
481
|
+
cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
|
|
482
|
+
if console:
|
|
483
|
+
console.print_update(
|
|
484
|
+
title="Resolved transient phase configurations",
|
|
485
|
+
details="\n".join(
|
|
486
|
+
[
|
|
487
|
+
f"Warmup: {warmup}",
|
|
488
|
+
f"Cooldown: {cooldown}",
|
|
489
|
+
f"Rampup (Throughput/Concurrent): {args.rampup}",
|
|
490
|
+
]
|
|
491
|
+
),
|
|
492
|
+
status="success",
|
|
112
493
|
)
|
|
113
|
-
|
|
114
|
-
|
|
494
|
+
|
|
495
|
+
profile = await resolve_profile(
|
|
496
|
+
profile=args.profile,
|
|
497
|
+
rate=args.rate,
|
|
498
|
+
random_seed=args.random_seed,
|
|
499
|
+
rampup=args.rampup,
|
|
500
|
+
constraints=constraints,
|
|
501
|
+
max_seconds=args.max_seconds,
|
|
502
|
+
max_requests=args.max_requests,
|
|
503
|
+
max_errors=args.max_errors,
|
|
504
|
+
max_error_rate=args.max_error_rate,
|
|
505
|
+
max_global_error_rate=args.max_global_error_rate,
|
|
506
|
+
over_saturation=args.over_saturation,
|
|
507
|
+
console=console,
|
|
115
508
|
)
|
|
116
|
-
|
|
509
|
+
output_formats = await resolve_output_formats(
|
|
510
|
+
outputs=args.outputs, output_dir=args.output_dir, console=console
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
report = GenerativeBenchmarksReport(args=args)
|
|
514
|
+
if console:
|
|
515
|
+
console.print_update(
|
|
516
|
+
title="Setup complete, starting benchmarks...", status="success"
|
|
517
|
+
)
|
|
518
|
+
console.print("\n\n")
|
|
117
519
|
|
|
118
|
-
|
|
520
|
+
benchmarker: Benchmarker[
|
|
521
|
+
GenerativeBenchmark, GenerationRequest, GenerationResponse
|
|
522
|
+
] = Benchmarker()
|
|
523
|
+
async for benchmark in benchmarker.run(
|
|
524
|
+
accumulator_class=GenerativeBenchmarkAccumulator,
|
|
525
|
+
benchmark_class=GenerativeBenchmark,
|
|
526
|
+
requests=request_loader,
|
|
527
|
+
backend=backend,
|
|
119
528
|
profile=profile,
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
529
|
+
environment=NonDistributedEnvironment(),
|
|
530
|
+
progress=progress,
|
|
531
|
+
sample_requests=args.sample_requests,
|
|
532
|
+
warmup=warmup,
|
|
533
|
+
cooldown=cooldown,
|
|
534
|
+
prefer_response_metrics=args.prefer_response_metrics,
|
|
124
535
|
):
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if result.type_ == "benchmark_compiled":
|
|
129
|
-
if result.current_benchmark is None:
|
|
130
|
-
raise ValueError("Current benchmark is None")
|
|
131
|
-
report.benchmarks.append(
|
|
132
|
-
result.current_benchmark.set_sample_size(output_sampling)
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
if output_console:
|
|
136
|
-
console.benchmarks = report.benchmarks
|
|
137
|
-
console.print_full_report()
|
|
536
|
+
if benchmark:
|
|
537
|
+
report.benchmarks.append(benchmark)
|
|
138
538
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
else:
|
|
144
|
-
saved_path = None
|
|
539
|
+
output_format_results = {}
|
|
540
|
+
for key, output in output_formats.items():
|
|
541
|
+
output_result = await output.finalize(report)
|
|
542
|
+
output_format_results[key] = output_result
|
|
145
543
|
|
|
146
|
-
console
|
|
544
|
+
if console:
|
|
545
|
+
await GenerativeBenchmarkerConsole(console=console).finalize(report)
|
|
546
|
+
console.print("\n\n")
|
|
547
|
+
console.print_update(
|
|
548
|
+
title=(
|
|
549
|
+
"Benchmarking complete, generated "
|
|
550
|
+
f"{len(report.benchmarks)} benchmark(s)"
|
|
551
|
+
),
|
|
552
|
+
status="success",
|
|
553
|
+
)
|
|
554
|
+
for key, value in output_format_results.items():
|
|
555
|
+
console.print_update(title=f" {key:<8}: {value}", status="debug")
|
|
147
556
|
|
|
148
|
-
return report,
|
|
557
|
+
return report, output_format_results
|
|
149
558
|
|
|
150
559
|
|
|
151
|
-
def reimport_benchmarks_report(
|
|
560
|
+
async def reimport_benchmarks_report(
|
|
561
|
+
file: Path,
|
|
562
|
+
output_path: Path | None,
|
|
563
|
+
output_formats: OutputFormatT = ("console", "json", "html", "csv"),
|
|
564
|
+
) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
|
|
152
565
|
"""
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
566
|
+
Load and re-export an existing benchmarks report in specified output formats.
|
|
567
|
+
|
|
568
|
+
:param file: Path to the existing benchmark report file to load
|
|
569
|
+
:param output_path: Base path for output file generation, or None for default
|
|
570
|
+
:param output_formats: Specification of desired output formats for the report
|
|
571
|
+
:return: Tuple of loaded GenerativeBenchmarksReport and dictionary of output
|
|
572
|
+
results
|
|
156
573
|
"""
|
|
157
|
-
console =
|
|
158
|
-
|
|
159
|
-
console.
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
574
|
+
console = Console()
|
|
575
|
+
|
|
576
|
+
with console.print_update_step(
|
|
577
|
+
title=f"Loading benchmarks from {file}..."
|
|
578
|
+
) as console_step:
|
|
579
|
+
report = GenerativeBenchmarksReport.load_file(file)
|
|
580
|
+
console_step.finish(
|
|
581
|
+
"Import of old benchmarks complete;"
|
|
582
|
+
f" loaded {len(report.benchmarks)} benchmark(s)"
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
resolved_output_formats = await resolve_output_formats(
|
|
586
|
+
output_formats, # type: ignore[arg-type]
|
|
587
|
+
output_path,
|
|
588
|
+
console=console,
|
|
589
|
+
)
|
|
590
|
+
output_format_results = {}
|
|
591
|
+
for key, output in resolved_output_formats.items():
|
|
592
|
+
output_result = await output.finalize(report)
|
|
593
|
+
output_format_results[key] = output_result
|
|
594
|
+
|
|
595
|
+
for key, value in output_format_results.items():
|
|
596
|
+
console.print_update(title=f" {key:<8}: {value}", status="debug")
|
|
597
|
+
|
|
598
|
+
return report, output_format_results
|