guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
guidellm/benchmark/profile.py
DELETED
|
@@ -1,409 +0,0 @@
|
|
|
1
|
-
from collections.abc import Sequence
|
|
2
|
-
from typing import Literal, Optional, Union
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
from pydantic import Field, computed_field
|
|
6
|
-
|
|
7
|
-
from guidellm.config import settings
|
|
8
|
-
from guidellm.objects import StandardBaseModel
|
|
9
|
-
from guidellm.scheduler import (
|
|
10
|
-
AsyncConstantStrategy,
|
|
11
|
-
AsyncPoissonStrategy,
|
|
12
|
-
ConcurrentStrategy,
|
|
13
|
-
SchedulingStrategy,
|
|
14
|
-
StrategyType,
|
|
15
|
-
SynchronousStrategy,
|
|
16
|
-
ThroughputStrategy,
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
__all__ = [
|
|
20
|
-
"AsyncProfile",
|
|
21
|
-
"ConcurrentProfile",
|
|
22
|
-
"Profile",
|
|
23
|
-
"ProfileType",
|
|
24
|
-
"SweepProfile",
|
|
25
|
-
"SynchronousProfile",
|
|
26
|
-
"ThroughputProfile",
|
|
27
|
-
"create_profile",
|
|
28
|
-
]
|
|
29
|
-
|
|
30
|
-
ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class Profile(StandardBaseModel):
|
|
34
|
-
type_: Literal["profile"] = Field(
|
|
35
|
-
description="The type of benchmarking profile to use.",
|
|
36
|
-
)
|
|
37
|
-
completed_strategies: int = Field(
|
|
38
|
-
default=0,
|
|
39
|
-
description="The number of scheduling strategies generated so far.",
|
|
40
|
-
)
|
|
41
|
-
measured_rates: list[float] = Field(
|
|
42
|
-
default_factory=list,
|
|
43
|
-
description=("The average rates measured for the strategies that have run."),
|
|
44
|
-
)
|
|
45
|
-
measured_concurrencies: list[float] = Field(
|
|
46
|
-
default_factory=list,
|
|
47
|
-
description=(
|
|
48
|
-
"The average concurrency measured for the strategies that have run."
|
|
49
|
-
),
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
def completed_strategy(self, average_rate: float, average_concurrency: float):
|
|
53
|
-
self.measured_rates.append(average_rate)
|
|
54
|
-
self.measured_concurrencies.append(average_concurrency)
|
|
55
|
-
self.completed_strategies += 1
|
|
56
|
-
|
|
57
|
-
@computed_field # type: ignore[misc]
|
|
58
|
-
@property
|
|
59
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
60
|
-
return []
|
|
61
|
-
|
|
62
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
63
|
-
return None
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class SynchronousProfile(Profile):
|
|
67
|
-
type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment]
|
|
68
|
-
|
|
69
|
-
@property
|
|
70
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
71
|
-
return [self.type_]
|
|
72
|
-
|
|
73
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
74
|
-
if self.completed_strategies >= 1:
|
|
75
|
-
return None
|
|
76
|
-
|
|
77
|
-
return SynchronousStrategy()
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
def from_standard_args(
|
|
81
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
82
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
83
|
-
**kwargs,
|
|
84
|
-
) -> "SynchronousProfile":
|
|
85
|
-
if rate_type != "synchronous":
|
|
86
|
-
raise ValueError("Rate type must be 'synchronous' for synchronous profile.")
|
|
87
|
-
|
|
88
|
-
if rate is not None:
|
|
89
|
-
raise ValueError(
|
|
90
|
-
"Rate does not apply to synchronous profile, it must be set to None."
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
if kwargs:
|
|
94
|
-
raise ValueError(
|
|
95
|
-
"No additional arguments are allowed for synchronous profile."
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
return SynchronousProfile()
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
class ConcurrentProfile(Profile):
|
|
102
|
-
type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment]
|
|
103
|
-
streams: Union[int, Sequence[int]] = Field(
|
|
104
|
-
description="The number of concurrent streams to use.",
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
@property
|
|
108
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
109
|
-
num_strategies = len(self.streams) if isinstance(self.streams, Sequence) else 1
|
|
110
|
-
|
|
111
|
-
return [self.type_] * num_strategies
|
|
112
|
-
|
|
113
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
114
|
-
streams = self.streams if isinstance(self.streams, Sequence) else [self.streams]
|
|
115
|
-
|
|
116
|
-
if self.completed_strategies >= len(streams):
|
|
117
|
-
return None
|
|
118
|
-
|
|
119
|
-
return ConcurrentStrategy(
|
|
120
|
-
streams=streams[self.completed_strategies],
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
@staticmethod
|
|
124
|
-
def from_standard_args(
|
|
125
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
126
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
127
|
-
**kwargs,
|
|
128
|
-
) -> "ConcurrentProfile":
|
|
129
|
-
if rate_type != "concurrent":
|
|
130
|
-
raise ValueError("Rate type must be 'concurrent' for concurrent profile.")
|
|
131
|
-
|
|
132
|
-
if not rate:
|
|
133
|
-
raise ValueError("Rate (streams) must be provided for concurrent profile.")
|
|
134
|
-
|
|
135
|
-
if not isinstance(rate, Sequence):
|
|
136
|
-
rate = [rate]
|
|
137
|
-
|
|
138
|
-
if not all(stream.is_integer() and stream > 0 for stream in rate):
|
|
139
|
-
raise ValueError(
|
|
140
|
-
f"All rate values (streams) must be positive integers, received {rate}"
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
if kwargs:
|
|
144
|
-
raise ValueError(
|
|
145
|
-
"No additional arguments are allowed for concurrent profile."
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
return ConcurrentProfile(streams=[int(rat) for rat in rate])
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
class ThroughputProfile(Profile):
|
|
152
|
-
type_: Literal["throughput"] = "throughput" # type: ignore[assignment]
|
|
153
|
-
max_concurrency: Optional[int] = Field(
|
|
154
|
-
default=None,
|
|
155
|
-
description="The maximum number of concurrent requests that can be scheduled.",
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
@property
|
|
159
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
160
|
-
return [self.type_]
|
|
161
|
-
|
|
162
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
163
|
-
if self.completed_strategies >= 1:
|
|
164
|
-
return None
|
|
165
|
-
|
|
166
|
-
return ThroughputStrategy(
|
|
167
|
-
max_concurrency=self.max_concurrency,
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
@staticmethod
|
|
171
|
-
def from_standard_args(
|
|
172
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
173
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
174
|
-
**kwargs,
|
|
175
|
-
) -> "ThroughputProfile":
|
|
176
|
-
if rate_type != "throughput":
|
|
177
|
-
raise ValueError("Rate type must be 'throughput' for throughput profile.")
|
|
178
|
-
|
|
179
|
-
if rate is not None:
|
|
180
|
-
raise ValueError(
|
|
181
|
-
"Rate does not apply to throughput profile, it must be set to None."
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
return ThroughputProfile(**kwargs)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
class AsyncProfile(ThroughputProfile):
|
|
188
|
-
type_: Literal["async"] = "async" # type: ignore[assignment]
|
|
189
|
-
strategy_type: Literal["constant", "poisson"] = Field(
|
|
190
|
-
description="The type of asynchronous strategy to use.",
|
|
191
|
-
)
|
|
192
|
-
rate: Union[float, Sequence[float]] = Field(
|
|
193
|
-
description="The rate of requests per second to use.",
|
|
194
|
-
)
|
|
195
|
-
initial_burst: bool = Field(
|
|
196
|
-
default=True,
|
|
197
|
-
description=(
|
|
198
|
-
"True to send an initial burst of requests (math.floor(self.rate)) "
|
|
199
|
-
"to reach target rate. False to not send an initial burst."
|
|
200
|
-
),
|
|
201
|
-
)
|
|
202
|
-
random_seed: int = Field(
|
|
203
|
-
default=42,
|
|
204
|
-
description=(
|
|
205
|
-
"The random seed to use for the asynchronous strategy. "
|
|
206
|
-
"This is used to generate random numbers for the Poisson strategy."
|
|
207
|
-
),
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
@property
|
|
211
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
212
|
-
num_strategies = len(self.rate) if isinstance(self.rate, Sequence) else 1
|
|
213
|
-
|
|
214
|
-
return [self.strategy_type] * num_strategies
|
|
215
|
-
|
|
216
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
217
|
-
rate = self.rate if isinstance(self.rate, Sequence) else [self.rate]
|
|
218
|
-
|
|
219
|
-
if self.completed_strategies >= len(rate):
|
|
220
|
-
return None
|
|
221
|
-
|
|
222
|
-
if self.strategy_type == "constant":
|
|
223
|
-
return AsyncConstantStrategy(
|
|
224
|
-
rate=rate[self.completed_strategies],
|
|
225
|
-
initial_burst=self.initial_burst,
|
|
226
|
-
max_concurrency=self.max_concurrency,
|
|
227
|
-
)
|
|
228
|
-
elif self.strategy_type == "poisson":
|
|
229
|
-
return AsyncPoissonStrategy(
|
|
230
|
-
rate=rate[self.completed_strategies],
|
|
231
|
-
initial_burst=self.initial_burst,
|
|
232
|
-
max_concurrency=self.max_concurrency,
|
|
233
|
-
random_seed=self.random_seed,
|
|
234
|
-
)
|
|
235
|
-
else:
|
|
236
|
-
raise ValueError(f"Invalid strategy type: {self.strategy_type}")
|
|
237
|
-
|
|
238
|
-
@staticmethod
|
|
239
|
-
def from_standard_args( # type: ignore[override]
|
|
240
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
241
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
242
|
-
random_seed: int,
|
|
243
|
-
**kwargs,
|
|
244
|
-
) -> "AsyncProfile":
|
|
245
|
-
if rate_type not in ("async", "constant", "poisson"):
|
|
246
|
-
raise ValueError(
|
|
247
|
-
"Rate type must be in ('async', 'constant', 'poisson') "
|
|
248
|
-
f"for async profile. Received: {rate_type}"
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
if not rate:
|
|
252
|
-
raise ValueError("Rate must be provided for async profile.")
|
|
253
|
-
|
|
254
|
-
if not isinstance(rate, Sequence):
|
|
255
|
-
rate = [rate]
|
|
256
|
-
|
|
257
|
-
if not all(isinstance(r, (float, int)) and r > 0 for r in rate):
|
|
258
|
-
raise ValueError(
|
|
259
|
-
f"All rate values must be positive numbers, received {rate}"
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
if rate_type == "async":
|
|
263
|
-
rate_type = "constant" # default to constant if not specified
|
|
264
|
-
|
|
265
|
-
return AsyncProfile(
|
|
266
|
-
strategy_type=rate_type, # type: ignore[arg-type]
|
|
267
|
-
rate=rate,
|
|
268
|
-
random_seed=random_seed,
|
|
269
|
-
**kwargs,
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
class SweepProfile(AsyncProfile):
|
|
274
|
-
type_: Literal["sweep"] = "sweep" # type: ignore[assignment]
|
|
275
|
-
sweep_size: int = Field(
|
|
276
|
-
description="The number of strategies to generate for the sweep.",
|
|
277
|
-
)
|
|
278
|
-
rate: float = -1
|
|
279
|
-
rate_type: Literal["constant", "poisson"] = "constant"
|
|
280
|
-
|
|
281
|
-
@property
|
|
282
|
-
def strategy_types(self) -> list[StrategyType]:
|
|
283
|
-
return (
|
|
284
|
-
["synchronous"] + ["throughput"] + [self.rate_type] * (self.sweep_size - 2) # type: ignore[return-value]
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
def next_strategy(self) -> Optional[SchedulingStrategy]:
|
|
288
|
-
if self.completed_strategies >= self.sweep_size:
|
|
289
|
-
return None
|
|
290
|
-
|
|
291
|
-
if self.completed_strategies == 0:
|
|
292
|
-
return SynchronousStrategy()
|
|
293
|
-
|
|
294
|
-
if self.completed_strategies == 1:
|
|
295
|
-
return ThroughputStrategy(
|
|
296
|
-
max_concurrency=self.max_concurrency,
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
min_rate = self.measured_rates[0]
|
|
300
|
-
max_rate = self.measured_rates[1]
|
|
301
|
-
rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:]
|
|
302
|
-
|
|
303
|
-
if self.rate_type == "constant":
|
|
304
|
-
return AsyncConstantStrategy(
|
|
305
|
-
rate=rates[self.completed_strategies - 2],
|
|
306
|
-
initial_burst=self.initial_burst,
|
|
307
|
-
max_concurrency=self.max_concurrency,
|
|
308
|
-
)
|
|
309
|
-
elif self.rate_type == "poisson":
|
|
310
|
-
return AsyncPoissonStrategy(
|
|
311
|
-
rate=rates[self.completed_strategies - 2],
|
|
312
|
-
initial_burst=self.initial_burst,
|
|
313
|
-
max_concurrency=self.max_concurrency,
|
|
314
|
-
)
|
|
315
|
-
else:
|
|
316
|
-
raise ValueError(f"Invalid strategy type: {self.rate_type}")
|
|
317
|
-
|
|
318
|
-
@staticmethod
|
|
319
|
-
def from_standard_args( # type: ignore[override]
|
|
320
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
321
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
322
|
-
random_seed: int,
|
|
323
|
-
**kwargs,
|
|
324
|
-
) -> "SweepProfile":
|
|
325
|
-
if rate_type != "sweep":
|
|
326
|
-
raise ValueError("Rate type must be 'sweep' for sweep profile.")
|
|
327
|
-
|
|
328
|
-
if "sweep_size" in kwargs:
|
|
329
|
-
raise ValueError("Sweep size must not be provided, use rate instead.")
|
|
330
|
-
|
|
331
|
-
if isinstance(rate, Sequence):
|
|
332
|
-
if len(rate) != 1:
|
|
333
|
-
raise ValueError(
|
|
334
|
-
"Rate must be a single value for sweep profile, received "
|
|
335
|
-
f"{len(rate)} values."
|
|
336
|
-
)
|
|
337
|
-
rate = rate[0]
|
|
338
|
-
|
|
339
|
-
if not rate:
|
|
340
|
-
rate = settings.default_sweep_number
|
|
341
|
-
|
|
342
|
-
if not rate:
|
|
343
|
-
raise ValueError(
|
|
344
|
-
"Rate (sweep_size) must be provided for concurrent profile."
|
|
345
|
-
)
|
|
346
|
-
|
|
347
|
-
if (
|
|
348
|
-
not isinstance(rate, (int, float))
|
|
349
|
-
or (isinstance(rate, float) and not rate.is_integer())
|
|
350
|
-
or rate <= 1
|
|
351
|
-
):
|
|
352
|
-
raise ValueError(
|
|
353
|
-
f"Rate (sweep_size) must be a positive integer > 1, received {rate} "
|
|
354
|
-
f"with type {type(rate)}"
|
|
355
|
-
)
|
|
356
|
-
|
|
357
|
-
if not kwargs:
|
|
358
|
-
kwargs = {}
|
|
359
|
-
|
|
360
|
-
if "strategy_type" not in kwargs:
|
|
361
|
-
kwargs["strategy_type"] = "constant"
|
|
362
|
-
|
|
363
|
-
return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def create_profile(
|
|
367
|
-
rate_type: Union[StrategyType, ProfileType],
|
|
368
|
-
rate: Optional[Union[float, Sequence[float]]],
|
|
369
|
-
random_seed: int = 42,
|
|
370
|
-
**kwargs,
|
|
371
|
-
) -> "Profile":
|
|
372
|
-
if rate_type == "synchronous":
|
|
373
|
-
return SynchronousProfile.from_standard_args(
|
|
374
|
-
rate_type=rate_type,
|
|
375
|
-
rate=rate,
|
|
376
|
-
**kwargs,
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
if rate_type == "concurrent":
|
|
380
|
-
return ConcurrentProfile.from_standard_args(
|
|
381
|
-
rate_type=rate_type,
|
|
382
|
-
rate=rate,
|
|
383
|
-
**kwargs,
|
|
384
|
-
)
|
|
385
|
-
|
|
386
|
-
if rate_type == "throughput":
|
|
387
|
-
return ThroughputProfile.from_standard_args(
|
|
388
|
-
rate_type=rate_type,
|
|
389
|
-
rate=rate,
|
|
390
|
-
**kwargs,
|
|
391
|
-
)
|
|
392
|
-
|
|
393
|
-
if rate_type in ("async", "constant", "poisson"):
|
|
394
|
-
return AsyncProfile.from_standard_args(
|
|
395
|
-
rate_type=rate_type,
|
|
396
|
-
rate=rate,
|
|
397
|
-
random_seed=random_seed,
|
|
398
|
-
**kwargs,
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
if rate_type == "sweep":
|
|
402
|
-
return SweepProfile.from_standard_args(
|
|
403
|
-
rate_type=rate_type,
|
|
404
|
-
rate=rate,
|
|
405
|
-
random_seed=random_seed,
|
|
406
|
-
**kwargs,
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
raise ValueError(f"Invalid profile type: {rate_type}")
|
guidellm/benchmark/scenario.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from collections.abc import Iterable
|
|
2
|
-
from functools import cache
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Annotated, Any, Literal, Optional, TypeVar, Union
|
|
5
|
-
|
|
6
|
-
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
7
|
-
from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
|
|
8
|
-
from transformers.tokenization_utils_base import ( # type: ignore[import]
|
|
9
|
-
PreTrainedTokenizerBase,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
from guidellm.backend.backend import BackendType
|
|
13
|
-
from guidellm.benchmark.profile import ProfileType
|
|
14
|
-
from guidellm.objects.pydantic import StandardBaseModel
|
|
15
|
-
from guidellm.scheduler.strategy import StrategyType
|
|
16
|
-
|
|
17
|
-
__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
|
|
18
|
-
|
|
19
|
-
SCENARIO_DIR = Path(__file__).parent / "scenarios/"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@cache
|
|
23
|
-
def get_builtin_scenarios() -> list[str]:
|
|
24
|
-
"""Returns list of builtin scenario names."""
|
|
25
|
-
return [p.stem for p in SCENARIO_DIR.glob("*.json")]
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
|
|
29
|
-
"""
|
|
30
|
-
Parse a comma separated string to a list of float
|
|
31
|
-
or convert single float list of one or pass float
|
|
32
|
-
list through.
|
|
33
|
-
"""
|
|
34
|
-
if isinstance(value, (int, float)):
|
|
35
|
-
return [value]
|
|
36
|
-
elif isinstance(value, list):
|
|
37
|
-
return value
|
|
38
|
-
|
|
39
|
-
values = value.split(",") if "," in value else [value]
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
return [float(val) for val in values]
|
|
43
|
-
except ValueError as err:
|
|
44
|
-
raise ValueError(
|
|
45
|
-
"must be a number or comma-separated list of numbers."
|
|
46
|
-
) from err
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
T = TypeVar("T", bound="Scenario")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class Scenario(StandardBaseModel):
|
|
53
|
-
"""
|
|
54
|
-
Parent Scenario class with common options for all benchmarking types.
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
target: str
|
|
58
|
-
|
|
59
|
-
@classmethod
|
|
60
|
-
def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
|
|
61
|
-
filename = SCENARIO_DIR / f"{name}.json"
|
|
62
|
-
|
|
63
|
-
if not filename.is_file():
|
|
64
|
-
raise ValueError(f"{name} is not a valid builtin scenario")
|
|
65
|
-
|
|
66
|
-
return cls.from_file(filename, overrides)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class GenerativeTextScenario(Scenario):
|
|
70
|
-
"""
|
|
71
|
-
Scenario class for generative text benchmarks.
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
class Config:
|
|
75
|
-
# NOTE: This prevents errors due to unvalidatable
|
|
76
|
-
# types like PreTrainedTokenizerBase
|
|
77
|
-
arbitrary_types_allowed = True
|
|
78
|
-
|
|
79
|
-
backend_type: BackendType = "openai_http"
|
|
80
|
-
backend_args: Optional[dict[str, Any]] = None
|
|
81
|
-
model: Optional[str] = None
|
|
82
|
-
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
|
|
83
|
-
processor_args: Optional[dict[str, Any]] = None
|
|
84
|
-
data: Union[
|
|
85
|
-
str,
|
|
86
|
-
Path,
|
|
87
|
-
Iterable[Union[str, dict[str, Any]]],
|
|
88
|
-
Dataset,
|
|
89
|
-
DatasetDict,
|
|
90
|
-
IterableDataset,
|
|
91
|
-
IterableDatasetDict,
|
|
92
|
-
]
|
|
93
|
-
data_args: Optional[dict[str, Any]] = None
|
|
94
|
-
data_sampler: Optional[Literal["random"]] = None
|
|
95
|
-
rate_type: Union[StrategyType, ProfileType]
|
|
96
|
-
rate: Annotated[
|
|
97
|
-
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
|
|
98
|
-
] = None
|
|
99
|
-
max_seconds: Optional[PositiveFloat] = None
|
|
100
|
-
max_requests: Optional[PositiveInt] = None
|
|
101
|
-
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
102
|
-
cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
103
|
-
output_sampling: Optional[NonNegativeInt] = None
|
|
104
|
-
random_seed: int = 42
|
|
Binary file
|
guidellm/dataset/__init__.py
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from .creator import ColumnInputTypes, DatasetCreator
|
|
2
|
-
from .entrypoints import load_dataset
|
|
3
|
-
from .file import FileDatasetCreator
|
|
4
|
-
from .hf_datasets import HFDatasetsCreator
|
|
5
|
-
from .in_memory import InMemoryDatasetCreator
|
|
6
|
-
from .synthetic import (
|
|
7
|
-
SyntheticDatasetConfig,
|
|
8
|
-
SyntheticDatasetCreator,
|
|
9
|
-
SyntheticTextItemsGenerator,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"ColumnInputTypes",
|
|
14
|
-
"DatasetCreator",
|
|
15
|
-
"FileDatasetCreator",
|
|
16
|
-
"HFDatasetsCreator",
|
|
17
|
-
"InMemoryDatasetCreator",
|
|
18
|
-
"SyntheticDatasetConfig",
|
|
19
|
-
"SyntheticDatasetCreator",
|
|
20
|
-
"SyntheticTextItemsGenerator",
|
|
21
|
-
"load_dataset",
|
|
22
|
-
]
|