guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Request-based constraint implementations.
|
|
3
|
+
|
|
4
|
+
Provides constraint types for limiting benchmark execution based on request counts
|
|
5
|
+
and time duration. These constraints monitor request creation, processing, and
|
|
6
|
+
elapsed time to determine when to stop benchmark execution.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any, Literal, cast
|
|
13
|
+
|
|
14
|
+
from pydantic import Field, field_validator
|
|
15
|
+
|
|
16
|
+
from guidellm.scheduler.constraints.constraint import (
|
|
17
|
+
Constraint,
|
|
18
|
+
PydanticConstraintInitializer,
|
|
19
|
+
)
|
|
20
|
+
from guidellm.scheduler.constraints.factory import ConstraintsInitializerFactory
|
|
21
|
+
from guidellm.scheduler.schemas import (
|
|
22
|
+
SchedulerProgress,
|
|
23
|
+
SchedulerState,
|
|
24
|
+
SchedulerUpdateAction,
|
|
25
|
+
)
|
|
26
|
+
from guidellm.schemas import RequestInfo, StandardBaseModel
|
|
27
|
+
from guidellm.utils import InfoMixin
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"MaxDurationConstraint",
|
|
31
|
+
"MaxNumberConstraint",
|
|
32
|
+
"RequestsExhaustedConstraint",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
37
|
+
["max_number", "max_num", "max_requests", "max_req"]
|
|
38
|
+
)
|
|
39
|
+
class MaxNumberConstraint(PydanticConstraintInitializer):
|
|
40
|
+
"""
|
|
41
|
+
Constraint that limits execution based on maximum request counts.
|
|
42
|
+
|
|
43
|
+
Stops request queuing when created requests reach the limit and stops local
|
|
44
|
+
request processing when processed requests reach the limit. Provides progress
|
|
45
|
+
tracking based on remaining requests and completion fraction.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
type_: Literal["max_number"] = "max_number" # type: ignore[assignment]
|
|
49
|
+
max_num: int | float | list[int | float] = Field(
|
|
50
|
+
description="Maximum number of requests allowed before triggering constraint",
|
|
51
|
+
)
|
|
52
|
+
current_index: int = Field(
|
|
53
|
+
default=-1, description="Current index for list-based max_num values"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def validated_kwargs(
|
|
58
|
+
cls, max_num: int | float | list[int | float], **kwargs
|
|
59
|
+
) -> dict[str, Any]:
|
|
60
|
+
"""
|
|
61
|
+
Validate and process arguments for MaxNumberConstraint creation.
|
|
62
|
+
|
|
63
|
+
:param max_num: Maximum number of requests to allow
|
|
64
|
+
:param kwargs: Supports max_num, max_number, max_requests, max_req,
|
|
65
|
+
and optional type_
|
|
66
|
+
:return: Validated dictionary with max_num and type_ fields
|
|
67
|
+
"""
|
|
68
|
+
aliases = ["max_number", "max_num", "max_requests", "max_req"]
|
|
69
|
+
for alias in aliases:
|
|
70
|
+
if max_num is None:
|
|
71
|
+
max_num = kwargs.get(alias)
|
|
72
|
+
|
|
73
|
+
return {"max_num": max_num, "current_index": kwargs.get("current_index", -1)}
|
|
74
|
+
|
|
75
|
+
def create_constraint(self, **_kwargs) -> Constraint:
|
|
76
|
+
"""
|
|
77
|
+
Return self as the constraint instance.
|
|
78
|
+
|
|
79
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
80
|
+
:return: Self instance as the constraint
|
|
81
|
+
"""
|
|
82
|
+
self.current_index += 1
|
|
83
|
+
|
|
84
|
+
return cast("Constraint", self.model_copy())
|
|
85
|
+
|
|
86
|
+
def __call__(
|
|
87
|
+
self, state: SchedulerState, request_info: RequestInfo
|
|
88
|
+
) -> SchedulerUpdateAction:
|
|
89
|
+
"""
|
|
90
|
+
Evaluate constraint against current scheduler state and request count.
|
|
91
|
+
|
|
92
|
+
:param state: Current scheduler state with request counts
|
|
93
|
+
:param request_info: Individual request information (unused)
|
|
94
|
+
:return: Action indicating whether to continue or stop operations
|
|
95
|
+
"""
|
|
96
|
+
_ = request_info # Unused parameters
|
|
97
|
+
current_index = max(0, self.current_index)
|
|
98
|
+
max_num = (
|
|
99
|
+
self.max_num
|
|
100
|
+
if isinstance(self.max_num, int | float)
|
|
101
|
+
else self.max_num[min(current_index, len(self.max_num) - 1)]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
create_exceeded = state.created_requests >= max_num
|
|
105
|
+
processed_exceeded = state.processed_requests >= max_num
|
|
106
|
+
remaining_requests = min(max(0, max_num - state.processed_requests), max_num)
|
|
107
|
+
stop_time = (
|
|
108
|
+
None if remaining_requests > 0 else request_info.completed_at or time.time()
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return SchedulerUpdateAction(
|
|
112
|
+
request_queuing="stop" if create_exceeded else "continue",
|
|
113
|
+
request_processing="stop_local" if processed_exceeded else "continue",
|
|
114
|
+
metadata={
|
|
115
|
+
"max_number": max_num,
|
|
116
|
+
"create_exceeded": create_exceeded,
|
|
117
|
+
"processed_exceeded": processed_exceeded,
|
|
118
|
+
"created_requests": state.created_requests,
|
|
119
|
+
"processed_requests": state.processed_requests,
|
|
120
|
+
"remaining_requests": remaining_requests,
|
|
121
|
+
"stop_time": stop_time,
|
|
122
|
+
},
|
|
123
|
+
progress=SchedulerProgress(
|
|
124
|
+
remaining_requests=remaining_requests,
|
|
125
|
+
total_requests=max_num,
|
|
126
|
+
stop_time=stop_time,
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
@field_validator("max_num")
|
|
131
|
+
@classmethod
|
|
132
|
+
def _validate_max_num(
|
|
133
|
+
cls, value: int | float | list[int | float]
|
|
134
|
+
) -> int | float | list[int | float]:
|
|
135
|
+
if not isinstance(value, list):
|
|
136
|
+
value = [value]
|
|
137
|
+
for val in value:
|
|
138
|
+
if not val:
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"max_num must be set and truthful, received {value} ({val} failed)"
|
|
141
|
+
)
|
|
142
|
+
if not isinstance(val, int | float) or val <= 0:
|
|
143
|
+
raise ValueError(
|
|
144
|
+
f"max_num must be a positive num, received {value} ({val} failed)"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@ConstraintsInitializerFactory.register(
|
|
151
|
+
["max_duration", "max_dur", "max_sec", "max_seconds", "max_min", "max_minutes"]
|
|
152
|
+
)
|
|
153
|
+
class MaxDurationConstraint(PydanticConstraintInitializer):
|
|
154
|
+
"""
|
|
155
|
+
Constraint that limits execution based on maximum time duration.
|
|
156
|
+
|
|
157
|
+
Stops both request queuing and processing when the elapsed time since scheduler
|
|
158
|
+
start exceeds the maximum duration. Provides progress tracking based on
|
|
159
|
+
remaining time and completion fraction.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
type_: Literal["max_duration"] = "max_duration" # type: ignore[assignment]
|
|
163
|
+
max_duration: int | float | list[int | float] = Field(
|
|
164
|
+
description="Maximum duration in seconds before triggering constraint"
|
|
165
|
+
)
|
|
166
|
+
current_index: int = Field(default=-1, description="Current index in duration list")
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def validated_kwargs(
|
|
170
|
+
cls, max_duration: int | float | list[int | float] | None = None, **kwargs
|
|
171
|
+
) -> dict[str, Any]:
|
|
172
|
+
"""
|
|
173
|
+
Validate and process arguments for MaxDurationConstraint creation.
|
|
174
|
+
|
|
175
|
+
:param max_duration: Maximum duration in seconds
|
|
176
|
+
:param kwargs: Supports max_duration, max_dur, max_sec, max_seconds,
|
|
177
|
+
max_min, max_minutes, and optional type_
|
|
178
|
+
:return: Validated dictionary with max_duration and type_ fields
|
|
179
|
+
"""
|
|
180
|
+
seconds_aliases = ["max_dur", "max_sec", "max_seconds"]
|
|
181
|
+
for alias in seconds_aliases:
|
|
182
|
+
if max_duration is None:
|
|
183
|
+
max_duration = kwargs.get(alias)
|
|
184
|
+
minutes_aliases = ["max_min", "max_minutes"]
|
|
185
|
+
for alias in minutes_aliases:
|
|
186
|
+
minutes = kwargs.get(alias)
|
|
187
|
+
if minutes is not None and max_duration is None:
|
|
188
|
+
max_duration = minutes * 60
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
"max_duration": max_duration,
|
|
192
|
+
"current_index": kwargs.get("current_index", -1),
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
def create_constraint(self, **_kwargs) -> Constraint:
|
|
196
|
+
"""
|
|
197
|
+
Return self as the constraint instance.
|
|
198
|
+
|
|
199
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
200
|
+
:return: Self instance as the constraint
|
|
201
|
+
"""
|
|
202
|
+
self.current_index += 1
|
|
203
|
+
|
|
204
|
+
return cast("Constraint", self.model_copy())
|
|
205
|
+
|
|
206
|
+
def __call__(
|
|
207
|
+
self, state: SchedulerState, request_info: RequestInfo
|
|
208
|
+
) -> SchedulerUpdateAction:
|
|
209
|
+
"""
|
|
210
|
+
Evaluate constraint against current scheduler state and elapsed time.
|
|
211
|
+
|
|
212
|
+
:param state: Current scheduler state with start time
|
|
213
|
+
:param request_info: Individual request information (unused)
|
|
214
|
+
:return: Action indicating whether to continue or stop operations
|
|
215
|
+
"""
|
|
216
|
+
_ = request_info # Unused parameters
|
|
217
|
+
current_index = max(0, self.current_index)
|
|
218
|
+
max_duration = (
|
|
219
|
+
self.max_duration
|
|
220
|
+
if isinstance(self.max_duration, int | float)
|
|
221
|
+
else self.max_duration[min(current_index, len(self.max_duration) - 1)]
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
start_time = state.start_requests_time or state.start_time
|
|
225
|
+
current_time = time.time()
|
|
226
|
+
elapsed = current_time - start_time
|
|
227
|
+
duration_exceeded = elapsed >= max_duration
|
|
228
|
+
remaining_duration = min(max(0.0, max_duration - elapsed), max_duration)
|
|
229
|
+
stop_time = None if not duration_exceeded else start_time + max_duration
|
|
230
|
+
|
|
231
|
+
return SchedulerUpdateAction(
|
|
232
|
+
request_queuing="stop" if duration_exceeded else "continue",
|
|
233
|
+
request_processing="stop_local" if duration_exceeded else "continue",
|
|
234
|
+
metadata={
|
|
235
|
+
"max_duration": max_duration,
|
|
236
|
+
"elapsed_time": elapsed,
|
|
237
|
+
"duration_exceeded": duration_exceeded,
|
|
238
|
+
"start_time": start_time,
|
|
239
|
+
"current_time": current_time,
|
|
240
|
+
"stop_time": stop_time,
|
|
241
|
+
},
|
|
242
|
+
progress=SchedulerProgress(
|
|
243
|
+
remaining_duration=remaining_duration,
|
|
244
|
+
total_duration=max_duration,
|
|
245
|
+
stop_time=stop_time,
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
@field_validator("max_duration")
|
|
250
|
+
@classmethod
|
|
251
|
+
def _validate_max_duration(
|
|
252
|
+
cls, value: int | float | list[int | float]
|
|
253
|
+
) -> int | float | list[int | float]:
|
|
254
|
+
if not isinstance(value, list):
|
|
255
|
+
value = [value]
|
|
256
|
+
for val in value:
|
|
257
|
+
if not val:
|
|
258
|
+
raise ValueError(
|
|
259
|
+
"max_duration must be set and truthful, "
|
|
260
|
+
f"received {value} ({val} failed)"
|
|
261
|
+
)
|
|
262
|
+
if not isinstance(val, int | float) or val <= 0:
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"max_duration must be a positive num,"
|
|
265
|
+
f"received {value} ({val} failed)"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
|
|
272
|
+
type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
|
|
273
|
+
num_requests: int
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def info(self) -> dict[str, Any]:
|
|
277
|
+
"""
|
|
278
|
+
Extract serializable information from this constraint initializer.
|
|
279
|
+
|
|
280
|
+
:return: Dictionary containing constraint configuration and metadata
|
|
281
|
+
"""
|
|
282
|
+
return self.model_dump()
|
|
283
|
+
|
|
284
|
+
def __call__(
|
|
285
|
+
self, state: SchedulerState, request: RequestInfo
|
|
286
|
+
) -> SchedulerUpdateAction:
|
|
287
|
+
_ = request # Unused parameter
|
|
288
|
+
create_exceeded = state.created_requests >= self.num_requests
|
|
289
|
+
processed_exceeded = state.processed_requests >= self.num_requests
|
|
290
|
+
remaining_requests = max(0, self.num_requests - state.processed_requests)
|
|
291
|
+
stop_time = (
|
|
292
|
+
None if remaining_requests > 0 else request.completed_at or time.time()
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return SchedulerUpdateAction(
|
|
296
|
+
request_queuing="stop" if create_exceeded else "continue",
|
|
297
|
+
request_processing="stop_local" if processed_exceeded else "continue",
|
|
298
|
+
metadata={
|
|
299
|
+
"num_requests": self.num_requests,
|
|
300
|
+
"create_exceeded": create_exceeded,
|
|
301
|
+
"processed_exceeded": processed_exceeded,
|
|
302
|
+
"created_requests": state.created_requests,
|
|
303
|
+
"processed_requests": state.processed_requests,
|
|
304
|
+
"remaining_requests": remaining_requests,
|
|
305
|
+
"stop_time": stop_time,
|
|
306
|
+
},
|
|
307
|
+
progress=SchedulerProgress(
|
|
308
|
+
remaining_requests=remaining_requests,
|
|
309
|
+
total_requests=self.num_requests,
|
|
310
|
+
stop_time=stop_time,
|
|
311
|
+
),
|
|
312
|
+
)
|