guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ """
2
+ Benchmark data models and metrics for generative AI performance measurement.
3
+
4
+ Provides comprehensive data structures for capturing, storing, and analyzing
5
+ benchmark results from scheduler-driven generative AI workload executions.
6
+ Core abstractions include base benchmark interfaces, generative-specific
7
+ metrics with token/latency distributions, request-level statistics tracking,
8
+ and multi-benchmark reporting capabilities. These models enable detailed
9
+ performance analysis including throughput, latency, concurrency patterns, and
10
+ domain-specific metrics for text, image, video, and audio generation tasks.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Literal
16
+
17
+ from pydantic import Field, computed_field
18
+
19
+ from guidellm.benchmark.schemas.base import Benchmark, BenchmarkConfig
20
+ from guidellm.benchmark.schemas.generative.accumulator import (
21
+ GenerativeBenchmarkAccumulator,
22
+ )
23
+ from guidellm.benchmark.schemas.generative.metrics import (
24
+ GenerativeMetrics,
25
+ SchedulerMetrics,
26
+ )
27
+ from guidellm.scheduler import SchedulerState
28
+ from guidellm.schemas import (
29
+ GenerativeRequestStats,
30
+ StatusBreakdown,
31
+ StatusDistributionSummary,
32
+ )
33
+
34
+ __all__ = ["GenerativeBenchmark"]
35
+
36
+
37
+ class GenerativeBenchmark(Benchmark[GenerativeBenchmarkAccumulator]):
38
+ """
39
+ Complete generative AI benchmark results with specialized metrics.
40
+
41
+ Encapsulates comprehensive performance data from scheduler-driven generative
42
+ workload executions including request-level statistics, token/latency distributions,
43
+ throughput analysis, and concurrency patterns. Provides computed fields for temporal
44
+ analysis and status-grouped request details for detailed post-execution reporting.
45
+ """
46
+
47
+ type_: Literal["generative_benchmark"] = "generative_benchmark" # type: ignore[assignment]
48
+
49
+ config: BenchmarkConfig = Field(
50
+ description="Configuration parameters for this benchmark execution",
51
+ )
52
+ scheduler_state: SchedulerState = Field(
53
+ description="Final state of the scheduler after benchmark completion",
54
+ )
55
+ scheduler_metrics: SchedulerMetrics = Field(
56
+ description="Scheduler timing and performance statistics",
57
+ )
58
+ metrics: GenerativeMetrics = Field(
59
+ description="Performance metrics and statistical distributions",
60
+ )
61
+ requests: StatusBreakdown[
62
+ list[GenerativeRequestStats],
63
+ list[GenerativeRequestStats],
64
+ list[GenerativeRequestStats],
65
+ None,
66
+ ] = Field(
67
+ description=(
68
+ "Request details grouped by status: successful, incomplete, errored"
69
+ ),
70
+ )
71
+
72
+ @computed_field # type: ignore[prop-decorator]
73
+ @property
74
+ def start_time(self) -> float:
75
+ """
76
+ :return: Benchmark start time in seconds since epoch
77
+ """
78
+ return self.scheduler_metrics.measure_start_time
79
+
80
+ @computed_field # type: ignore[prop-decorator]
81
+ @property
82
+ def end_time(self) -> float:
83
+ """
84
+ :return: Benchmark end time in seconds since epoch
85
+ """
86
+ return self.scheduler_metrics.measure_end_time
87
+
88
+ @computed_field # type: ignore[prop-decorator]
89
+ @property
90
+ def duration(self) -> float:
91
+ """
92
+ :return: Total benchmark execution duration in seconds
93
+ """
94
+ return self.end_time - self.start_time
95
+
96
+ @computed_field # type: ignore[prop-decorator]
97
+ @property
98
+ def warmup_duration(self) -> float:
99
+ """
100
+ :return: Warmup phase duration in seconds
101
+ """
102
+ return (
103
+ self.scheduler_metrics.measure_start_time
104
+ - self.scheduler_metrics.request_start_time
105
+ )
106
+
107
+ @computed_field # type: ignore[prop-decorator]
108
+ @property
109
+ def cooldown_duration(self) -> float:
110
+ """
111
+ :return: Cooldown phase duration in seconds
112
+ """
113
+ return (
114
+ self.scheduler_metrics.request_end_time
115
+ - self.scheduler_metrics.measure_end_time
116
+ )
117
+
118
+ @property
119
+ def request_latency(self) -> StatusDistributionSummary:
120
+ """
121
+ :return: Statistical distribution of request latencies across all requests
122
+ """
123
+ return self.metrics.request_latency
124
+
125
+ @property
126
+ def request_throughput(self) -> StatusDistributionSummary:
127
+ """
128
+ :return: Statistical distribution of throughput measured in requests per second
129
+ """
130
+ return self.metrics.requests_per_second
131
+
132
+ @property
133
+ def request_concurrency(self) -> StatusDistributionSummary:
134
+ """
135
+ :return: Statistical distribution of concurrent requests throughout execution
136
+ """
137
+ return self.metrics.request_concurrency
138
+
139
+ @classmethod
140
+ def compile(
141
+ cls,
142
+ accumulator: GenerativeBenchmarkAccumulator,
143
+ scheduler_state: SchedulerState,
144
+ ) -> GenerativeBenchmark:
145
+ """
146
+ Compile final benchmark results from accumulated execution state.
147
+
148
+ :param accumulator: Accumulated benchmark state with request statistics
149
+ :param scheduler_state: Final scheduler state after execution completion
150
+ :return: Compiled generative benchmark instance with complete metrics
151
+ """
152
+ return GenerativeBenchmark(
153
+ config=accumulator.config,
154
+ scheduler_state=scheduler_state,
155
+ scheduler_metrics=SchedulerMetrics.compile(accumulator, scheduler_state),
156
+ metrics=GenerativeMetrics.compile(accumulator),
157
+ requests=StatusBreakdown(
158
+ successful=accumulator.completed.get_sampled(),
159
+ incomplete=accumulator.incomplete.get_sampled(),
160
+ errored=accumulator.errored.get_sampled(),
161
+ total=None,
162
+ ),
163
+ )
@@ -0,0 +1,381 @@
1
+ """
2
+ Configuration entrypoints for generative text benchmark execution.
3
+
4
+ Defines parameter schemas and construction logic for creating benchmark runs from
5
+ scenario files or runtime arguments. Provides flexible configuration loading with
6
+ support for built-in scenarios, custom YAML/JSON files, and programmatic overrides.
7
+ Handles serialization of complex types including backends, processors, and profiles
8
+ for persistent storage and reproduction of benchmark configurations.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import inspect
14
+ import json
15
+ from collections.abc import Callable
16
+ from pathlib import Path
17
+ from typing import Any, Literal
18
+
19
+ import yaml
20
+ from pydantic import (
21
+ AliasChoices,
22
+ AliasGenerator,
23
+ ConfigDict,
24
+ Field,
25
+ NonNegativeFloat,
26
+ ValidationError,
27
+ ValidatorFunctionWrapHandler,
28
+ field_serializer,
29
+ field_validator,
30
+ )
31
+ from torch.utils.data import Sampler
32
+ from transformers import PreTrainedTokenizerBase
33
+
34
+ from guidellm.backends import Backend, BackendType
35
+ from guidellm.benchmark.profiles import Profile, ProfileType
36
+ from guidellm.benchmark.scenarios import get_builtin_scenarios
37
+ from guidellm.benchmark.schemas.base import TransientPhaseConfig
38
+ from guidellm.data import DatasetPreprocessor, RequestFormatter
39
+ from guidellm.scheduler import StrategyType
40
+ from guidellm.schemas import StandardBaseModel
41
+
42
+ __all__ = ["BenchmarkGenerativeTextArgs"]
43
+
44
+
45
+ class BenchmarkGenerativeTextArgs(StandardBaseModel):
46
+ """
47
+ Configuration arguments for generative text benchmark execution.
48
+
49
+ Defines all parameters for benchmark setup including target endpoint, data
50
+ sources, backend configuration, processing pipeline, output formatting, and
51
+ execution constraints. Supports loading from scenario files and merging with
52
+ runtime overrides for flexible benchmark construction from multiple sources.
53
+
54
+ Example::
55
+
56
+ # Load from built-in scenario with overrides
57
+ args = BenchmarkGenerativeTextArgs.create(
58
+ scenario="chat",
59
+ target="http://localhost:8000/v1",
60
+ max_requests=1000
61
+ )
62
+
63
+ # Create from keyword arguments only
64
+ args = BenchmarkGenerativeTextArgs(
65
+ target="http://localhost:8000/v1",
66
+ data=["path/to/dataset.json"],
67
+ profile="fixed",
68
+ rate=10.0
69
+ )
70
+ """
71
+
72
+ @classmethod
73
+ def create(
74
+ cls, scenario: Path | str | None, **kwargs: dict[str, Any]
75
+ ) -> BenchmarkGenerativeTextArgs:
76
+ """
77
+ Create benchmark args from scenario file and keyword arguments.
78
+
79
+ Loads base configuration from scenario file (built-in or custom) and merges
80
+ with provided keyword arguments. Arguments explicitly set via kwargs override
81
+ scenario values, while defaulted kwargs are ignored to preserve scenario
82
+ settings.
83
+
84
+ :param scenario: Path to scenario file, built-in scenario name, or None
85
+ :param kwargs: Keyword arguments to override scenario values
86
+ :return: Configured benchmark args instance
87
+ :raises ValueError: If scenario is not found or file format is unsupported
88
+ """
89
+ constructor_kwargs = {}
90
+
91
+ if scenario is not None:
92
+ if isinstance(scenario, str) and scenario in (
93
+ builtin_scenarios := get_builtin_scenarios()
94
+ ):
95
+ scenario_path = builtin_scenarios[scenario]
96
+ elif Path(scenario).exists() and Path(scenario).is_file():
97
+ scenario_path = Path(scenario)
98
+ else:
99
+ raise ValueError(f"Scenario '{scenario}' not found.")
100
+
101
+ with scenario_path.open() as file:
102
+ if scenario_path.suffix == ".json":
103
+ scenario_data = json.load(file)
104
+ elif scenario_path.suffix in {".yaml", ".yml"}:
105
+ scenario_data = yaml.safe_load(file)
106
+ else:
107
+ raise ValueError(
108
+ f"Unsupported scenario file format: {scenario_path.suffix}"
109
+ )
110
+ if "args" in scenario_data:
111
+ # loading from a report file
112
+ scenario_data = scenario_data["args"]
113
+ constructor_kwargs.update(scenario_data)
114
+
115
+ # Apply overrides from kwargs
116
+ constructor_kwargs.update(kwargs)
117
+
118
+ return cls.model_validate(constructor_kwargs)
119
+
120
+ @classmethod
121
+ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
122
+ """
123
+ Retrieve default value for a model field.
124
+
125
+ Extracts the default value from field metadata, handling both static defaults
126
+ and factory functions.
127
+
128
+ :param field: Field name to retrieve default value for
129
+ :return: Default value for the field
130
+ :raises ValueError: If field does not exist
131
+ """
132
+ if field not in cls.model_fields:
133
+ raise ValueError(f"Field '{field}' not found in {cls.__name__}")
134
+
135
+ field_info = cls.model_fields[field]
136
+ factory = field_info.default_factory
137
+
138
+ if factory is None:
139
+ return field_info.default
140
+
141
+ if len(inspect.signature(factory).parameters) == 0:
142
+ return factory() # type: ignore[call-arg]
143
+ else:
144
+ return factory({}) # type: ignore[call-arg]
145
+
146
+ model_config = ConfigDict(
147
+ extra="ignore",
148
+ use_enum_values=True,
149
+ from_attributes=True,
150
+ arbitrary_types_allowed=True,
151
+ validate_by_alias=True,
152
+ validate_by_name=True,
153
+ alias_generator=AliasGenerator(
154
+ # Support field names with hyphens
155
+ validation_alias=lambda field_name: AliasChoices(
156
+ field_name, field_name.replace("_", "-")
157
+ ),
158
+ ),
159
+ )
160
+
161
+ # Required
162
+ target: str = Field(description="Target endpoint URL for benchmark execution")
163
+ data: list[Any] = Field(
164
+ description="List of dataset sources or data files",
165
+ default_factory=list,
166
+ min_length=1,
167
+ )
168
+ # Benchmark configuration
169
+ profile: StrategyType | ProfileType | Profile = Field(
170
+ default="sweep", description="Benchmark profile or scheduling strategy type"
171
+ )
172
+ rate: list[float] | None = Field(
173
+ default=None, description="Request rate(s) for rate-based scheduling"
174
+ )
175
+ # Backend configuration
176
+ backend: BackendType | Backend = Field(
177
+ default="openai_http", description="Backend type or instance for execution"
178
+ )
179
+ backend_kwargs: dict[str, Any] | None = Field(
180
+ default=None, description="Additional backend configuration arguments"
181
+ )
182
+ model: str | None = Field(default=None, description="Model identifier for backend")
183
+ # Data configuration
184
+ processor: str | Path | PreTrainedTokenizerBase | None = Field(
185
+ default=None, description="Tokenizer path, name, or instance for processing"
186
+ )
187
+ processor_args: dict[str, Any] | None = Field(
188
+ default=None, description="Additional tokenizer configuration arguments"
189
+ )
190
+ data_args: list[dict[str, Any]] | None = Field(
191
+ default_factory=list, # type: ignore[arg-type]
192
+ description="Per-dataset configuration arguments",
193
+ )
194
+ data_samples: int = Field(
195
+ default=-1, description="Number of samples to use from datasets (-1 for all)"
196
+ )
197
+ data_column_mapper: (
198
+ DatasetPreprocessor
199
+ | dict[str, str | list[str]]
200
+ | Literal["generative_column_mapper"]
201
+ ) = Field(
202
+ default="generative_column_mapper",
203
+ description="Column mapping preprocessor for dataset fields",
204
+ )
205
+ data_request_formatter: RequestFormatter | dict[str, Any] | str = Field(
206
+ default="chat_completions",
207
+ description="Request formatting preprocessor or template name",
208
+ validation_alias=AliasChoices(
209
+ "data_request_formatter",
210
+ "data-request-formatter",
211
+ "request_type",
212
+ "request-type",
213
+ ),
214
+ )
215
+ data_collator: Callable | Literal["generative"] | None = Field(
216
+ default="generative", description="Data collator for batch processing"
217
+ )
218
+ data_sampler: Sampler[int] | Literal["shuffle"] | None = Field(
219
+ default=None, description="Data sampler for request ordering"
220
+ )
221
+ data_num_workers: int | None = Field(
222
+ default=1, description="Number of workers for data loading"
223
+ )
224
+ dataloader_kwargs: dict[str, Any] | None = Field(
225
+ default=None, description="Additional dataloader configuration arguments"
226
+ )
227
+ random_seed: int = Field(default=42, description="Random seed for reproducibility")
228
+ # Output configuration
229
+ outputs: list[str] | tuple[str] = Field(
230
+ default_factory=lambda: ["json", "csv", "html"],
231
+ description=(
232
+ "The aliases of the output types to create with their default filenames "
233
+ "the file names and extensions of the output types to create"
234
+ ),
235
+ )
236
+ output_dir: str | Path = Field(
237
+ default_factory=Path.cwd,
238
+ description="The directory path to save file output types in",
239
+ )
240
+ # Benchmarker configuration
241
+ sample_requests: int | None = Field(
242
+ default=10,
243
+ description="Number of requests to sample for detailed metrics (None for all)",
244
+ )
245
+ warmup: int | float | dict | TransientPhaseConfig | None = Field(
246
+ default=None,
247
+ description=(
248
+ "Warmup phase config: time or requests before measurement starts "
249
+ "(overlapping requests count toward measurement)"
250
+ ),
251
+ )
252
+ cooldown: int | float | dict | TransientPhaseConfig | None = Field(
253
+ default=None,
254
+ description=(
255
+ "Cooldown phase config: time or requests after measurement ends "
256
+ "(overlapping requests count toward measurement)"
257
+ ),
258
+ )
259
+ rampup: NonNegativeFloat = Field(
260
+ default=0.0,
261
+ description=(
262
+ "The time, in seconds, to ramp up the request rate over. "
263
+ "Only applicable for Throughput/Concurrent strategies"
264
+ ),
265
+ )
266
+ prefer_response_metrics: bool = Field(
267
+ default=True,
268
+ description="Whether to prefer backend response metrics over request metrics",
269
+ )
270
+ # Constraints configuration
271
+ max_seconds: int | float | None = Field(
272
+ default=None, description="Maximum benchmark execution time in seconds"
273
+ )
274
+ max_requests: int | None = Field(
275
+ default=None, description="Maximum number of requests to execute"
276
+ )
277
+ max_errors: int | None = Field(
278
+ default=None, description="Maximum number of errors before stopping"
279
+ )
280
+ max_error_rate: float | None = Field(
281
+ default=None, description="Maximum error rate (0-1) before stopping"
282
+ )
283
+ max_global_error_rate: float | None = Field(
284
+ default=None, description="Maximum global error rate (0-1) before stopping"
285
+ )
286
+ over_saturation: dict[str, Any] | None = Field(
287
+ default=None,
288
+ description=(
289
+ "Over-saturation detection configuration. A dict with configuration "
290
+ "parameters (enabled, min_seconds, max_window_seconds, "
291
+ "moe_threshold, etc.)."
292
+ ),
293
+ )
294
+
295
+ @field_validator("data", "data_args", "rate", mode="wrap")
296
+ @classmethod
297
+ def single_to_list(
298
+ cls, value: Any, handler: ValidatorFunctionWrapHandler
299
+ ) -> list[Any]:
300
+ """
301
+ Ensures field is always a list.
302
+
303
+ :param value: Input value for the 'data' field
304
+ :return: List of data sources
305
+ """
306
+ try:
307
+ return handler(value)
308
+ except ValidationError as err:
309
+ # If validation fails, try wrapping the value in a list
310
+ if err.errors()[0]["type"] == "list_type":
311
+ return handler([value])
312
+ else:
313
+ raise
314
+
315
+ @field_serializer("backend")
316
+ def serialize_backend(self, backend: BackendType | Backend) -> str:
317
+ """Serialize backend to type string."""
318
+ return backend.type_ if isinstance(backend, Backend) else backend
319
+
320
+ @field_serializer("data")
321
+ def serialize_data(self, data: list[Any]) -> list[str | None]:
322
+ """Serialize data items to strings."""
323
+ return [
324
+ item if isinstance(item, str | type(None)) else str(item) for item in data
325
+ ]
326
+
327
+ @field_serializer("data_collator")
328
+ def serialize_data_collator(
329
+ self, data_collator: Callable | Literal["generative"] | None
330
+ ) -> str | None:
331
+ """Serialize data_collator to string or None."""
332
+ return data_collator if isinstance(data_collator, str) else None
333
+
334
+ @field_serializer("data_column_mapper")
335
+ def serialize_data_column_mapper(
336
+ self,
337
+ data_column_mapper: (
338
+ DatasetPreprocessor
339
+ | dict[str, str | list[str]]
340
+ | Literal["generative_column_mapper"]
341
+ ),
342
+ ) -> dict | str:
343
+ """Serialize data_column_mapper to dict or string."""
344
+ return data_column_mapper if isinstance(data_column_mapper, dict | str) else {}
345
+
346
+ @field_serializer("data_request_formatter")
347
+ def serialize_data_request_formatter(
348
+ self, data_request_formatter: RequestFormatter | dict[str, Any] | str
349
+ ) -> dict | str:
350
+ """Serialize data_request_formatter to dict or string."""
351
+ return (
352
+ data_request_formatter
353
+ if isinstance(data_request_formatter, dict | str)
354
+ else {}
355
+ )
356
+
357
+ @field_serializer("data_sampler")
358
+ def serialize_data_sampler(
359
+ self, data_sampler: Sampler[int] | Literal["shuffle"] | None
360
+ ) -> str | None:
361
+ """Serialize data_sampler to string or None."""
362
+ return data_sampler if isinstance(data_sampler, str) else None
363
+
364
+ @field_serializer("output_dir")
365
+ def serialize_output_dir(self, output_dir: str | Path) -> str | None:
366
+ """Serialize output_dir to string."""
367
+ return str(output_dir) if output_dir is not None else None
368
+
369
+ @field_serializer("processor")
370
+ def serialize_processor(
371
+ self, processor: str | Path | PreTrainedTokenizerBase | None
372
+ ) -> str | None:
373
+ """Serialize processor to string."""
374
+ if processor is None:
375
+ return None
376
+ return processor if isinstance(processor, str) else str(processor)
377
+
378
+ @field_serializer("profile")
379
+ def serialize_profile(self, profile: StrategyType | ProfileType | Profile) -> str:
380
+ """Serialize profile to type string."""
381
+ return profile.type_ if isinstance(profile, Profile) else profile