guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1035 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constraint system for scheduler behavior control and request processing limits.
|
|
3
|
+
|
|
4
|
+
Provides flexible constraints for managing scheduler behavior with configurable
|
|
5
|
+
thresholds based on time, error rates, and request counts. Constraints evaluate
|
|
6
|
+
scheduler state and individual requests to determine whether processing should
|
|
7
|
+
continue or stop based on predefined limits. The constraint system enables
|
|
8
|
+
sophisticated benchmark stopping criteria through composable constraint types.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import time
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from typing import Any, Literal, Protocol, runtime_checkable
|
|
16
|
+
|
|
17
|
+
from pydantic import Field, field_validator
|
|
18
|
+
|
|
19
|
+
from guidellm.scheduler.schemas import (
|
|
20
|
+
SchedulerState,
|
|
21
|
+
SchedulerUpdateAction,
|
|
22
|
+
SchedulerUpdateActionProgress,
|
|
23
|
+
)
|
|
24
|
+
from guidellm.schemas import RequestInfo
|
|
25
|
+
from guidellm.settings import settings
|
|
26
|
+
from guidellm.utils import InfoMixin, RegistryMixin, StandardBaseModel
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"Constraint",
|
|
30
|
+
"ConstraintInitializer",
|
|
31
|
+
"ConstraintsInitializerFactory",
|
|
32
|
+
"MaxDurationConstraint",
|
|
33
|
+
"MaxErrorRateConstraint",
|
|
34
|
+
"MaxErrorsConstraint",
|
|
35
|
+
"MaxGlobalErrorRateConstraint",
|
|
36
|
+
"MaxNumberConstraint",
|
|
37
|
+
"PydanticConstraintInitializer",
|
|
38
|
+
"RequestsExhaustedConstraint",
|
|
39
|
+
"SerializableConstraintInitializer",
|
|
40
|
+
"UnserializableConstraintInitializer",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@runtime_checkable
|
|
45
|
+
class Constraint(Protocol):
|
|
46
|
+
"""Protocol for constraint evaluation functions that control scheduler behavior."""
|
|
47
|
+
|
|
48
|
+
def __call__(
|
|
49
|
+
self, state: SchedulerState, request: RequestInfo
|
|
50
|
+
) -> SchedulerUpdateAction:
|
|
51
|
+
"""
|
|
52
|
+
Evaluate constraint against scheduler state and request information.
|
|
53
|
+
|
|
54
|
+
:param state: Current scheduler state with metrics and timing information
|
|
55
|
+
:param request: Individual request information and metadata
|
|
56
|
+
:return: Action indicating whether to continue or stop scheduler operations
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@runtime_checkable
|
|
61
|
+
class ConstraintInitializer(Protocol):
|
|
62
|
+
"""Protocol for constraint initializer factory functions that create constraints."""
|
|
63
|
+
|
|
64
|
+
def create_constraint(self, **kwargs) -> Constraint:
|
|
65
|
+
"""
|
|
66
|
+
Create a constraint instance from configuration parameters.
|
|
67
|
+
|
|
68
|
+
:param kwargs: Configuration parameters for constraint creation
|
|
69
|
+
:return: Configured constraint evaluation function
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@runtime_checkable
|
|
74
|
+
class SerializableConstraintInitializer(Protocol):
|
|
75
|
+
"""Protocol for serializable constraint initializers supporting persistence."""
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
|
|
79
|
+
"""
|
|
80
|
+
Validate and process arguments for constraint creation.
|
|
81
|
+
|
|
82
|
+
:param args: Positional arguments for constraint configuration
|
|
83
|
+
:param kwargs: Keyword arguments for constraint configuration
|
|
84
|
+
:return: Validated parameter dictionary for constraint creation
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def model_validate(cls, **kwargs) -> ConstraintInitializer:
|
|
89
|
+
"""
|
|
90
|
+
Create validated constraint initializer from configuration.
|
|
91
|
+
|
|
92
|
+
:param kwargs: Configuration dictionary for initializer creation
|
|
93
|
+
:return: Validated constraint initializer instance
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def model_dump(self) -> dict[str, Any]:
|
|
97
|
+
"""
|
|
98
|
+
Serialize constraint initializer to dictionary format.
|
|
99
|
+
|
|
100
|
+
:return: Dictionary representation of constraint initializer
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def create_constraint(self, **kwargs) -> Constraint:
|
|
104
|
+
"""
|
|
105
|
+
Create constraint instance from this initializer.
|
|
106
|
+
|
|
107
|
+
:param kwargs: Additional configuration parameters
|
|
108
|
+
:return: Configured constraint evaluation function
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ConstraintsInitializerFactory(RegistryMixin[ConstraintInitializer]):
|
|
113
|
+
"""
|
|
114
|
+
Registry factory for creating and managing constraint initializers.
|
|
115
|
+
|
|
116
|
+
Provides centralized access to registered constraint types with support for
|
|
117
|
+
creating constraints from configuration dictionaries, simple values, or
|
|
118
|
+
pre-configured instances. Handles constraint resolution and type validation
|
|
119
|
+
for the scheduler constraint system.
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
::
|
|
123
|
+
from guidellm.scheduler import ConstraintsInitializerFactory
|
|
124
|
+
|
|
125
|
+
# Register new constraint type
|
|
126
|
+
@ConstraintsInitializerFactory.register("new_constraint")
|
|
127
|
+
class NewConstraint:
|
|
128
|
+
def create_constraint(self, **kwargs) -> Constraint:
|
|
129
|
+
return lambda state, request: SchedulerUpdateAction()
|
|
130
|
+
|
|
131
|
+
# Create and use constraint
|
|
132
|
+
constraint = ConstraintsInitializerFactory.create_constraint("new_constraint")
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def create(cls, key: str, *args, **kwargs) -> ConstraintInitializer:
|
|
137
|
+
"""
|
|
138
|
+
Create a constraint initializer for the specified key.
|
|
139
|
+
|
|
140
|
+
:param key: Registered constraint initializer key
|
|
141
|
+
:param args: Positional arguments for initializer creation
|
|
142
|
+
:param kwargs: Keyword arguments for initializer creation
|
|
143
|
+
:return: Configured constraint initializer instance
|
|
144
|
+
:raises ValueError: If the key is not registered in the factory
|
|
145
|
+
"""
|
|
146
|
+
if cls.registry is None or key not in cls.registry:
|
|
147
|
+
raise ValueError(f"Unknown constraint initializer key: {key}")
|
|
148
|
+
|
|
149
|
+
initializer_class = cls.registry[key]
|
|
150
|
+
|
|
151
|
+
return (
|
|
152
|
+
initializer_class(*args, **kwargs) # type: ignore[operator]
|
|
153
|
+
if not isinstance(initializer_class, type)
|
|
154
|
+
or not issubclass(initializer_class, SerializableConstraintInitializer)
|
|
155
|
+
else initializer_class(
|
|
156
|
+
**initializer_class.validated_kwargs(*args, **kwargs) # type: ignore[misc]
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def serialize(cls, initializer: ConstraintInitializer) -> dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Serialize constraint initializer to dictionary format.
|
|
164
|
+
|
|
165
|
+
:param initializer: Constraint initializer to serialize
|
|
166
|
+
:return: Dictionary representation or unserializable placeholder
|
|
167
|
+
"""
|
|
168
|
+
if isinstance(initializer, SerializableConstraintInitializer):
|
|
169
|
+
return initializer.model_dump()
|
|
170
|
+
else:
|
|
171
|
+
unserializable = UnserializableConstraintInitializer(
|
|
172
|
+
orig_info=InfoMixin.extract_from_obj(initializer)
|
|
173
|
+
)
|
|
174
|
+
return unserializable.model_dump()
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def deserialize(
|
|
178
|
+
cls, initializer_dict: dict[str, Any]
|
|
179
|
+
) -> SerializableConstraintInitializer | UnserializableConstraintInitializer:
|
|
180
|
+
"""
|
|
181
|
+
Deserialize constraint initializer from dictionary format.
|
|
182
|
+
|
|
183
|
+
:param initializer_dict: Dictionary representation of constraint initializer
|
|
184
|
+
:return: Reconstructed constraint initializer instance
|
|
185
|
+
:raises ValueError: If constraint type is unknown or cannot be deserialized
|
|
186
|
+
"""
|
|
187
|
+
if initializer_dict.get("type_") == "unserializable":
|
|
188
|
+
return UnserializableConstraintInitializer.model_validate(initializer_dict)
|
|
189
|
+
|
|
190
|
+
if (
|
|
191
|
+
cls.registry is not None
|
|
192
|
+
and initializer_dict.get("type_")
|
|
193
|
+
and initializer_dict["type_"] in cls.registry
|
|
194
|
+
):
|
|
195
|
+
initializer_class = cls.registry[initializer_dict["type_"]]
|
|
196
|
+
if hasattr(initializer_class, "model_validate"):
|
|
197
|
+
return initializer_class.model_validate(initializer_dict) # type: ignore[return-value]
|
|
198
|
+
else:
|
|
199
|
+
return initializer_class(**initializer_dict) # type: ignore[return-value,operator]
|
|
200
|
+
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Cannot deserialize unknown constraint initializer: "
|
|
203
|
+
f"{initializer_dict.get('type_', 'unknown')}"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def create_constraint(cls, key: str, *args, **kwargs) -> Constraint:
|
|
208
|
+
"""
|
|
209
|
+
Create a constraint instance for the specified key.
|
|
210
|
+
|
|
211
|
+
:param key: Registered constraint initializer key
|
|
212
|
+
:param args: Positional arguments for constraint creation
|
|
213
|
+
:param kwargs: Keyword arguments for constraint creation
|
|
214
|
+
:return: Configured constraint function ready for evaluation
|
|
215
|
+
:raises ValueError: If the key is not registered in the factory
|
|
216
|
+
"""
|
|
217
|
+
return cls.create(key, *args, **kwargs).create_constraint()
|
|
218
|
+
|
|
219
|
+
@classmethod
|
|
220
|
+
def resolve(
|
|
221
|
+
cls,
|
|
222
|
+
initializers: dict[
|
|
223
|
+
str,
|
|
224
|
+
Any | dict[str, Any] | Constraint | ConstraintInitializer,
|
|
225
|
+
],
|
|
226
|
+
) -> dict[str, Constraint]:
|
|
227
|
+
"""
|
|
228
|
+
Resolve mixed constraint specifications to callable constraints.
|
|
229
|
+
|
|
230
|
+
:param initializers: Dictionary mapping constraint keys to specifications
|
|
231
|
+
:return: Dictionary mapping constraint keys to callable functions
|
|
232
|
+
:raises ValueError: If any key is not registered in the factory
|
|
233
|
+
"""
|
|
234
|
+
constraints = {}
|
|
235
|
+
|
|
236
|
+
for key, val in initializers.items():
|
|
237
|
+
if isinstance(val, Constraint):
|
|
238
|
+
constraints[key] = val
|
|
239
|
+
elif isinstance(val, ConstraintInitializer):
|
|
240
|
+
constraints[key] = val.create_constraint()
|
|
241
|
+
elif isinstance(val, dict):
|
|
242
|
+
constraints[key] = cls.create_constraint(key, **val)
|
|
243
|
+
else:
|
|
244
|
+
constraints[key] = cls.create_constraint(key, val)
|
|
245
|
+
|
|
246
|
+
return constraints
|
|
247
|
+
|
|
248
|
+
@classmethod
|
|
249
|
+
def resolve_constraints(
|
|
250
|
+
cls,
|
|
251
|
+
constraints: dict[str, Any | dict[str, Any] | Constraint],
|
|
252
|
+
) -> dict[str, Constraint]:
|
|
253
|
+
"""
|
|
254
|
+
Resolve constraints from mixed constraint specifications.
|
|
255
|
+
|
|
256
|
+
:param constraints: Dictionary mapping constraint keys to specifications
|
|
257
|
+
:return: Dictionary mapping constraint keys to callable functions
|
|
258
|
+
:raises ValueError: If any constraint key is not registered
|
|
259
|
+
"""
|
|
260
|
+
resolved_constraints = {}
|
|
261
|
+
|
|
262
|
+
for key, val in constraints.items():
|
|
263
|
+
if isinstance(val, Constraint):
|
|
264
|
+
resolved_constraints[key] = val
|
|
265
|
+
elif isinstance(val, dict):
|
|
266
|
+
resolved_constraints[key] = cls.create_constraint(key, **val)
|
|
267
|
+
else:
|
|
268
|
+
resolved_constraints[key] = cls.create_constraint(key, val)
|
|
269
|
+
|
|
270
|
+
return resolved_constraints
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class PydanticConstraintInitializer(StandardBaseModel, ABC, InfoMixin):
|
|
274
|
+
"""
|
|
275
|
+
Abstract base for Pydantic-based constraint initializers.
|
|
276
|
+
|
|
277
|
+
Provides standardized serialization, validation, and metadata handling for
|
|
278
|
+
constraint initializers using Pydantic models. Subclasses implement specific
|
|
279
|
+
constraint creation logic while inheriting validation and persistence support.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
type_: str = Field(description="Type identifier for the constraint initializer")
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def info(self) -> dict[str, Any]:
|
|
286
|
+
"""
|
|
287
|
+
Extract serializable information from this constraint initializer.
|
|
288
|
+
|
|
289
|
+
:return: Dictionary containing constraint configuration and metadata
|
|
290
|
+
"""
|
|
291
|
+
return self.model_dump()
|
|
292
|
+
|
|
293
|
+
@classmethod
|
|
294
|
+
@abstractmethod
|
|
295
|
+
def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
|
|
296
|
+
"""
|
|
297
|
+
Validate and process arguments for constraint creation.
|
|
298
|
+
|
|
299
|
+
Must be implemented by subclasses to handle their specific parameter patterns
|
|
300
|
+
and validation requirements.
|
|
301
|
+
|
|
302
|
+
:param args: Positional arguments passed to the constraint
|
|
303
|
+
:param kwargs: Keyword arguments passed to the constraint
|
|
304
|
+
:return: Validated dictionary of parameters for constraint creation
|
|
305
|
+
:raises NotImplementedError: Must be implemented by subclasses
|
|
306
|
+
"""
|
|
307
|
+
...
|
|
308
|
+
|
|
309
|
+
@abstractmethod
|
|
310
|
+
def create_constraint(self, **kwargs) -> Constraint:
|
|
311
|
+
"""
|
|
312
|
+
Create a constraint instance.
|
|
313
|
+
|
|
314
|
+
Must be implemented by subclasses to return their specific constraint type
|
|
315
|
+
with appropriate configuration and validation.
|
|
316
|
+
|
|
317
|
+
:param kwargs: Additional keyword arguments (usually unused)
|
|
318
|
+
:return: Configured constraint instance
|
|
319
|
+
:raises NotImplementedError: Must be implemented by subclasses
|
|
320
|
+
"""
|
|
321
|
+
...
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class UnserializableConstraintInitializer(PydanticConstraintInitializer):
|
|
325
|
+
"""
|
|
326
|
+
Placeholder for constraints that cannot be serialized or executed.
|
|
327
|
+
|
|
328
|
+
Represents constraint initializers that failed serialization or contain
|
|
329
|
+
non-serializable components. Cannot be executed and raises errors when
|
|
330
|
+
invoked to prevent runtime failures from invalid constraint state.
|
|
331
|
+
"""
|
|
332
|
+
|
|
333
|
+
type_: Literal["unserializable"] = "unserializable" # type: ignore[assignment]
|
|
334
|
+
orig_info: dict[str, Any] = Field(
|
|
335
|
+
default_factory=dict,
|
|
336
|
+
description="Original constraint information before serialization failure",
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
@classmethod
|
|
340
|
+
def validated_kwargs(
|
|
341
|
+
cls,
|
|
342
|
+
orig_info: dict[str, Any] | None = None,
|
|
343
|
+
**kwargs, # noqa: ARG003
|
|
344
|
+
) -> dict[str, Any]:
|
|
345
|
+
"""
|
|
346
|
+
Validate arguments for unserializable constraint creation.
|
|
347
|
+
|
|
348
|
+
:param orig_info: Original constraint information before serialization failure
|
|
349
|
+
:param kwargs: Additional arguments (ignored)
|
|
350
|
+
:return: Validated parameters for unserializable constraint creation
|
|
351
|
+
"""
|
|
352
|
+
return {"orig_info": orig_info or {}}
|
|
353
|
+
|
|
354
|
+
def create_constraint(
|
|
355
|
+
self,
|
|
356
|
+
**kwargs, # noqa: ARG002
|
|
357
|
+
) -> Constraint:
|
|
358
|
+
"""
|
|
359
|
+
Raise error for unserializable constraint creation attempt.
|
|
360
|
+
|
|
361
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
362
|
+
:raises RuntimeError: Always raised since unserializable constraints
|
|
363
|
+
cannot be executed
|
|
364
|
+
"""
|
|
365
|
+
raise RuntimeError(
|
|
366
|
+
"Cannot create constraint from unserializable constraint instance. "
|
|
367
|
+
"This constraint cannot be serialized and therefore cannot be executed."
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
def __call__(
|
|
371
|
+
self,
|
|
372
|
+
state: SchedulerState, # noqa: ARG002
|
|
373
|
+
request: RequestInfo, # noqa: ARG002
|
|
374
|
+
) -> SchedulerUpdateAction:
|
|
375
|
+
"""
|
|
376
|
+
Raise error since unserializable constraints cannot be invoked.
|
|
377
|
+
|
|
378
|
+
:param state: Current scheduler state (unused)
|
|
379
|
+
:param request: Individual request information (unused)
|
|
380
|
+
:raises RuntimeError: Always raised for unserializable constraints
|
|
381
|
+
"""
|
|
382
|
+
raise RuntimeError(
|
|
383
|
+
"Cannot invoke unserializable constraint instance. "
|
|
384
|
+
"This constraint was not properly serialized and cannot be executed."
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
389
|
+
["max_number", "max_num", "max_requests", "max_req"]
|
|
390
|
+
)
|
|
391
|
+
class MaxNumberConstraint(PydanticConstraintInitializer):
|
|
392
|
+
"""
|
|
393
|
+
Constraint that limits execution based on maximum request counts.
|
|
394
|
+
|
|
395
|
+
Stops request queuing when created requests reach the limit and stops local
|
|
396
|
+
request processing when processed requests reach the limit. Provides progress
|
|
397
|
+
tracking based on remaining requests and completion fraction.
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
type_: Literal["max_number"] = "max_number" # type: ignore[assignment]
|
|
401
|
+
max_num: int | float | list[int | float] = Field(
|
|
402
|
+
description="Maximum number of requests allowed before triggering constraint",
|
|
403
|
+
)
|
|
404
|
+
current_index: int = Field(
|
|
405
|
+
default=-1, description="Current index for list-based max_num values"
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
@classmethod
|
|
409
|
+
def validated_kwargs(
|
|
410
|
+
cls, max_num: int | float | list[int | float], **kwargs
|
|
411
|
+
) -> dict[str, Any]:
|
|
412
|
+
"""
|
|
413
|
+
Validate and process arguments for MaxNumberConstraint creation.
|
|
414
|
+
|
|
415
|
+
:param max_num: Maximum number of requests to allow
|
|
416
|
+
:param kwargs: Supports max_num, max_number, max_requests, max_req,
|
|
417
|
+
and optional type_
|
|
418
|
+
:return: Validated dictionary with max_num and type_ fields
|
|
419
|
+
"""
|
|
420
|
+
aliases = ["max_number", "max_num", "max_requests", "max_req"]
|
|
421
|
+
for alias in aliases:
|
|
422
|
+
if max_num is None:
|
|
423
|
+
max_num = kwargs.get(alias)
|
|
424
|
+
|
|
425
|
+
return {"max_num": max_num, "current_index": kwargs.get("current_index", -1)}
|
|
426
|
+
|
|
427
|
+
def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
|
|
428
|
+
"""
|
|
429
|
+
Return self as the constraint instance.
|
|
430
|
+
|
|
431
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
432
|
+
:return: Self instance as the constraint
|
|
433
|
+
"""
|
|
434
|
+
self.current_index += 1
|
|
435
|
+
|
|
436
|
+
return self.model_copy() # type: ignore[return-value]
|
|
437
|
+
|
|
438
|
+
def __call__(
|
|
439
|
+
self,
|
|
440
|
+
state: SchedulerState,
|
|
441
|
+
request_info: RequestInfo, # noqa: ARG002
|
|
442
|
+
) -> SchedulerUpdateAction:
|
|
443
|
+
"""
|
|
444
|
+
Evaluate constraint against current scheduler state and request count.
|
|
445
|
+
|
|
446
|
+
:param state: Current scheduler state with request counts
|
|
447
|
+
:param request_info: Individual request information (unused)
|
|
448
|
+
:return: Action indicating whether to continue or stop operations
|
|
449
|
+
"""
|
|
450
|
+
current_index = max(0, self.current_index)
|
|
451
|
+
max_num = (
|
|
452
|
+
self.max_num
|
|
453
|
+
if isinstance(self.max_num, int | float)
|
|
454
|
+
else self.max_num[min(current_index, len(self.max_num) - 1)]
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
create_exceeded = state.created_requests >= max_num
|
|
458
|
+
processed_exceeded = state.processed_requests >= max_num
|
|
459
|
+
remaining_requests = min(max(0, max_num - state.processed_requests), max_num)
|
|
460
|
+
remaining_fraction = remaining_requests / float(max_num)
|
|
461
|
+
|
|
462
|
+
return SchedulerUpdateAction(
|
|
463
|
+
request_queuing="stop" if create_exceeded else "continue",
|
|
464
|
+
request_processing="stop_local" if processed_exceeded else "continue",
|
|
465
|
+
metadata={
|
|
466
|
+
"max_number": max_num,
|
|
467
|
+
"create_exceeded": create_exceeded,
|
|
468
|
+
"processed_exceeded": processed_exceeded,
|
|
469
|
+
"created_requests": state.created_requests,
|
|
470
|
+
"processed_requests": state.processed_requests,
|
|
471
|
+
"remaining_fraction": remaining_fraction,
|
|
472
|
+
"remaining_requests": remaining_requests,
|
|
473
|
+
},
|
|
474
|
+
progress=SchedulerUpdateActionProgress(
|
|
475
|
+
remaining_fraction=remaining_fraction,
|
|
476
|
+
remaining_requests=remaining_requests,
|
|
477
|
+
),
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
@field_validator("max_num")
|
|
481
|
+
@classmethod
|
|
482
|
+
def _validate_max_num(
|
|
483
|
+
cls, value: int | float | list[int | float]
|
|
484
|
+
) -> int | float | list[int | float]:
|
|
485
|
+
if not isinstance(value, list):
|
|
486
|
+
value = [value]
|
|
487
|
+
for val in value:
|
|
488
|
+
if not val:
|
|
489
|
+
raise ValueError(
|
|
490
|
+
f"max_num must be set and truthful, received {value} ({val} failed)"
|
|
491
|
+
)
|
|
492
|
+
if not isinstance(val, int | float) or val <= 0:
|
|
493
|
+
raise ValueError(
|
|
494
|
+
f"max_num must be a positive num, received {value} ({val} failed)"
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
501
|
+
["max_duration", "max_dur", "max_sec", "max_seconds", "max_min", "max_minutes"]
|
|
502
|
+
)
|
|
503
|
+
class MaxDurationConstraint(PydanticConstraintInitializer):
|
|
504
|
+
"""
|
|
505
|
+
Constraint that limits execution based on maximum time duration.
|
|
506
|
+
|
|
507
|
+
Stops both request queuing and processing when the elapsed time since scheduler
|
|
508
|
+
start exceeds the maximum duration. Provides progress tracking based on
|
|
509
|
+
remaining time and completion fraction.
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
type_: Literal["max_duration"] = "max_duration" # type: ignore[assignment]
|
|
513
|
+
max_duration: int | float | list[int | float] = Field(
|
|
514
|
+
description="Maximum duration in seconds before triggering constraint"
|
|
515
|
+
)
|
|
516
|
+
current_index: int = Field(default=-1, description="Current index in duration list")
|
|
517
|
+
|
|
518
|
+
@classmethod
|
|
519
|
+
def validated_kwargs(
|
|
520
|
+
cls, max_duration: int | float | list[int | float] | None = None, **kwargs
|
|
521
|
+
) -> dict[str, Any]:
|
|
522
|
+
"""
|
|
523
|
+
Validate and process arguments for MaxDurationConstraint creation.
|
|
524
|
+
|
|
525
|
+
:param max_duration: Maximum duration in seconds
|
|
526
|
+
:param kwargs: Supports max_duration, max_dur, max_sec, max_seconds,
|
|
527
|
+
max_min, max_minutes, and optional type_
|
|
528
|
+
:return: Validated dictionary with max_duration and type_ fields
|
|
529
|
+
"""
|
|
530
|
+
seconds_aliases = ["max_dur", "max_sec", "max_seconds"]
|
|
531
|
+
for alias in seconds_aliases:
|
|
532
|
+
if max_duration is None:
|
|
533
|
+
max_duration = kwargs.get(alias)
|
|
534
|
+
minutes_aliases = ["max_min", "max_minutes"]
|
|
535
|
+
for alias in minutes_aliases:
|
|
536
|
+
minutes = kwargs.get(alias)
|
|
537
|
+
if minutes is not None and max_duration is None:
|
|
538
|
+
max_duration = minutes * 60
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
"max_duration": max_duration,
|
|
542
|
+
"current_index": kwargs.get("current_index", -1),
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
|
|
546
|
+
"""
|
|
547
|
+
Return self as the constraint instance.
|
|
548
|
+
|
|
549
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
550
|
+
:return: Self instance as the constraint
|
|
551
|
+
"""
|
|
552
|
+
self.current_index += 1
|
|
553
|
+
|
|
554
|
+
return self.model_copy() # type: ignore[return-value]
|
|
555
|
+
|
|
556
|
+
def __call__(
|
|
557
|
+
self,
|
|
558
|
+
state: SchedulerState,
|
|
559
|
+
request_info: RequestInfo, # noqa: ARG002
|
|
560
|
+
) -> SchedulerUpdateAction:
|
|
561
|
+
"""
|
|
562
|
+
Evaluate constraint against current scheduler state and elapsed time.
|
|
563
|
+
|
|
564
|
+
:param state: Current scheduler state with start time
|
|
565
|
+
:param request_info: Individual request information (unused)
|
|
566
|
+
:return: Action indicating whether to continue or stop operations
|
|
567
|
+
"""
|
|
568
|
+
current_index = max(0, self.current_index)
|
|
569
|
+
max_duration = (
|
|
570
|
+
self.max_duration
|
|
571
|
+
if isinstance(self.max_duration, int | float)
|
|
572
|
+
else self.max_duration[min(current_index, len(self.max_duration) - 1)]
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
current_time = time.time()
|
|
576
|
+
elapsed = current_time - state.start_time
|
|
577
|
+
duration_exceeded = elapsed >= max_duration
|
|
578
|
+
remaining_duration = min(max(0.0, max_duration - elapsed), max_duration)
|
|
579
|
+
remaining_fraction = remaining_duration / float(max_duration)
|
|
580
|
+
|
|
581
|
+
return SchedulerUpdateAction(
|
|
582
|
+
request_queuing="stop" if duration_exceeded else "continue",
|
|
583
|
+
request_processing="stop_local" if duration_exceeded else "continue",
|
|
584
|
+
metadata={
|
|
585
|
+
"max_duration": max_duration,
|
|
586
|
+
"elapsed_time": elapsed,
|
|
587
|
+
"duration_exceeded": duration_exceeded,
|
|
588
|
+
"start_time": state.start_time,
|
|
589
|
+
"current_time": current_time,
|
|
590
|
+
},
|
|
591
|
+
progress=SchedulerUpdateActionProgress(
|
|
592
|
+
remaining_fraction=remaining_fraction,
|
|
593
|
+
remaining_duration=remaining_duration,
|
|
594
|
+
),
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
@field_validator("max_duration")
|
|
598
|
+
@classmethod
|
|
599
|
+
def _validate_max_duration(
|
|
600
|
+
cls, value: int | float | list[int | float]
|
|
601
|
+
) -> int | float | list[int | float]:
|
|
602
|
+
if not isinstance(value, list):
|
|
603
|
+
value = [value]
|
|
604
|
+
for val in value:
|
|
605
|
+
if not val:
|
|
606
|
+
raise ValueError(
|
|
607
|
+
"max_duration must be set and truthful, "
|
|
608
|
+
f"received {value} ({val} failed)"
|
|
609
|
+
)
|
|
610
|
+
if not isinstance(val, int | float) or val <= 0:
|
|
611
|
+
raise ValueError(
|
|
612
|
+
"max_duration must be a positive num,"
|
|
613
|
+
f"received {value} ({val} failed)"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
620
|
+
["max_errors", "max_err", "max_error", "max_errs"]
|
|
621
|
+
)
|
|
622
|
+
class MaxErrorsConstraint(PydanticConstraintInitializer):
|
|
623
|
+
"""
|
|
624
|
+
Constraint that limits execution based on absolute error count.
|
|
625
|
+
|
|
626
|
+
Stops both request queuing and all request processing when the total number
|
|
627
|
+
of errored requests reaches the maximum threshold. Uses global error tracking
|
|
628
|
+
across all requests for immediate constraint evaluation.
|
|
629
|
+
"""
|
|
630
|
+
|
|
631
|
+
type_: Literal["max_errors"] = "max_errors" # type: ignore[assignment]
|
|
632
|
+
max_errors: int | float | list[int | float] = Field(
|
|
633
|
+
description="Maximum number of errors allowed before triggering constraint",
|
|
634
|
+
)
|
|
635
|
+
current_index: int = Field(default=-1, description="Current index in error list")
|
|
636
|
+
|
|
637
|
+
@classmethod
|
|
638
|
+
def validated_kwargs(
|
|
639
|
+
cls, max_errors: int | float | list[int | float] | None = None, **kwargs
|
|
640
|
+
) -> dict[str, Any]:
|
|
641
|
+
"""
|
|
642
|
+
Validate and process arguments for MaxErrorsConstraint creation.
|
|
643
|
+
|
|
644
|
+
:param max_errors: Maximum number of errors to allow
|
|
645
|
+
:param kwargs: Supports max_errors, max_err, max_error, max_errs,
|
|
646
|
+
and optional type_
|
|
647
|
+
:return: Validated dictionary with max_errors and type_ fields
|
|
648
|
+
"""
|
|
649
|
+
aliases = ["max_errors", "max_err", "max_error", "max_errs"]
|
|
650
|
+
for alias in aliases:
|
|
651
|
+
if max_errors is None:
|
|
652
|
+
max_errors = kwargs.get(alias)
|
|
653
|
+
|
|
654
|
+
return {
|
|
655
|
+
"max_errors": max_errors,
|
|
656
|
+
"current_index": kwargs.get("current_index", -1),
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
|
|
660
|
+
"""
|
|
661
|
+
Return self as the constraint instance.
|
|
662
|
+
|
|
663
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
664
|
+
:return: Self instance as the constraint
|
|
665
|
+
"""
|
|
666
|
+
self.current_index += 1
|
|
667
|
+
|
|
668
|
+
return self.model_copy() # type: ignore[return-value]
|
|
669
|
+
|
|
670
|
+
def __call__(
|
|
671
|
+
self,
|
|
672
|
+
state: SchedulerState,
|
|
673
|
+
request_info: RequestInfo, # noqa: ARG002
|
|
674
|
+
) -> SchedulerUpdateAction:
|
|
675
|
+
"""
|
|
676
|
+
Evaluate constraint against current error count.
|
|
677
|
+
|
|
678
|
+
:param state: Current scheduler state with error counts
|
|
679
|
+
:param request_info: Individual request information (unused)
|
|
680
|
+
:return: Action indicating whether to continue or stop operations
|
|
681
|
+
"""
|
|
682
|
+
current_index = max(0, self.current_index)
|
|
683
|
+
max_errors = (
|
|
684
|
+
self.max_errors
|
|
685
|
+
if isinstance(self.max_errors, int | float)
|
|
686
|
+
else self.max_errors[min(current_index, len(self.max_errors) - 1)]
|
|
687
|
+
)
|
|
688
|
+
errors_exceeded = state.errored_requests >= max_errors
|
|
689
|
+
|
|
690
|
+
return SchedulerUpdateAction(
|
|
691
|
+
request_queuing="stop" if errors_exceeded else "continue",
|
|
692
|
+
request_processing="stop_all" if errors_exceeded else "continue",
|
|
693
|
+
metadata={
|
|
694
|
+
"max_errors": max_errors,
|
|
695
|
+
"errors_exceeded": errors_exceeded,
|
|
696
|
+
"current_errors": state.errored_requests,
|
|
697
|
+
},
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
@field_validator("max_errors")
|
|
701
|
+
@classmethod
|
|
702
|
+
def _validate_max_errors(
|
|
703
|
+
cls, value: int | float | list[int | float]
|
|
704
|
+
) -> int | float | list[int | float]:
|
|
705
|
+
if not isinstance(value, list):
|
|
706
|
+
value = [value]
|
|
707
|
+
for val in value:
|
|
708
|
+
if not val:
|
|
709
|
+
raise ValueError(
|
|
710
|
+
"max_errors must be set and truthful, "
|
|
711
|
+
f"received {value} ({val} failed)"
|
|
712
|
+
)
|
|
713
|
+
if not isinstance(val, int | float) or val <= 0:
|
|
714
|
+
raise ValueError(
|
|
715
|
+
f"max_errors must be a positive num,received {value} ({val} failed)"
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
722
|
+
["max_error_rate", "max_err_rate", "max_errors_rate"]
|
|
723
|
+
)
|
|
724
|
+
class MaxErrorRateConstraint(PydanticConstraintInitializer):
|
|
725
|
+
"""
|
|
726
|
+
Constraint that limits execution based on sliding window error rate.
|
|
727
|
+
|
|
728
|
+
Tracks error status of recent requests in a sliding window and stops all
|
|
729
|
+
processing when the error rate exceeds the threshold. Only applies the
|
|
730
|
+
constraint after processing enough requests to fill the minimum window size
|
|
731
|
+
for statistical significance.
|
|
732
|
+
"""
|
|
733
|
+
|
|
734
|
+
type_: Literal["max_error_rate"] = "max_error_rate" # type: ignore[assignment]
|
|
735
|
+
max_error_rate: int | float | list[int | float] = Field(
|
|
736
|
+
description="Maximum error rate allowed (0.0, 1.0)"
|
|
737
|
+
)
|
|
738
|
+
window_size: int | float = Field(
|
|
739
|
+
default=30,
|
|
740
|
+
gt=0,
|
|
741
|
+
description="Size of sliding window for calculating error rate",
|
|
742
|
+
)
|
|
743
|
+
error_window: list[bool] = Field(
|
|
744
|
+
default_factory=list,
|
|
745
|
+
description="Sliding window tracking error status of recent requests",
|
|
746
|
+
)
|
|
747
|
+
current_index: int = Field(
|
|
748
|
+
default=-1, description="Current index in the error window"
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
@classmethod
|
|
752
|
+
def validated_kwargs(
|
|
753
|
+
cls, max_error_rate: int | float | list[int | float], **kwargs
|
|
754
|
+
) -> dict[str, Any]:
|
|
755
|
+
"""
|
|
756
|
+
Validate and process arguments for MaxErrorRateConstraint creation.
|
|
757
|
+
|
|
758
|
+
:param max_error_rate: Maximum error rate to allow
|
|
759
|
+
:param kwargs: Supports max_error_rate, max_err_rate, max_errors_rate,
|
|
760
|
+
optional window_size, and optional type_
|
|
761
|
+
:return: Validated dictionary with max_error_rate, window_size,
|
|
762
|
+
and type_ fields
|
|
763
|
+
"""
|
|
764
|
+
aliases = ["max_error_rate", "max_err_rate", "max_errors_rate"]
|
|
765
|
+
for alias in aliases:
|
|
766
|
+
if max_error_rate is None:
|
|
767
|
+
max_error_rate = kwargs.get(alias)
|
|
768
|
+
|
|
769
|
+
return {
|
|
770
|
+
"max_error_rate": max_error_rate,
|
|
771
|
+
"window_size": kwargs.get(
|
|
772
|
+
"window_size", settings.constraint_error_window_size
|
|
773
|
+
),
|
|
774
|
+
"error_window": kwargs.get("error_window", []),
|
|
775
|
+
"current_index": kwargs.get("current_index", -1),
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
|
|
779
|
+
"""
|
|
780
|
+
Create a new instance of MaxErrorRateConstraint (due to stateful window).
|
|
781
|
+
|
|
782
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
783
|
+
:return: New instance of the constraint
|
|
784
|
+
"""
|
|
785
|
+
self.current_index += 1
|
|
786
|
+
|
|
787
|
+
return self.model_copy() # type: ignore[return-value]
|
|
788
|
+
|
|
789
|
+
def __call__(
|
|
790
|
+
self, state: SchedulerState, request_info: RequestInfo
|
|
791
|
+
) -> SchedulerUpdateAction:
|
|
792
|
+
"""
|
|
793
|
+
Evaluate constraint against sliding window error rate.
|
|
794
|
+
|
|
795
|
+
:param state: Current scheduler state with request counts
|
|
796
|
+
:param request_info: Individual request with completion status
|
|
797
|
+
:return: Action indicating whether to continue or stop operations
|
|
798
|
+
"""
|
|
799
|
+
current_index = max(0, self.current_index)
|
|
800
|
+
max_error_rate = (
|
|
801
|
+
self.max_error_rate
|
|
802
|
+
if isinstance(self.max_error_rate, int | float)
|
|
803
|
+
else self.max_error_rate[min(current_index, len(self.max_error_rate) - 1)]
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
if request_info.status in ["completed", "errored", "cancelled"]:
|
|
807
|
+
self.error_window.append(request_info.status == "errored")
|
|
808
|
+
if len(self.error_window) > self.window_size:
|
|
809
|
+
self.error_window.pop(0)
|
|
810
|
+
|
|
811
|
+
error_count = sum(self.error_window)
|
|
812
|
+
window_requests = len(self.error_window)
|
|
813
|
+
error_rate = (
|
|
814
|
+
error_count / float(window_requests) if window_requests > 0 else 0.0
|
|
815
|
+
)
|
|
816
|
+
exceeded_min_processed = state.processed_requests >= self.window_size
|
|
817
|
+
exceeded_error_rate = error_rate >= max_error_rate
|
|
818
|
+
|
|
819
|
+
return SchedulerUpdateAction(
|
|
820
|
+
request_queuing=(
|
|
821
|
+
"stop" if exceeded_min_processed and exceeded_error_rate else "continue"
|
|
822
|
+
),
|
|
823
|
+
request_processing=(
|
|
824
|
+
"stop_all"
|
|
825
|
+
if exceeded_min_processed and exceeded_error_rate
|
|
826
|
+
else "continue"
|
|
827
|
+
),
|
|
828
|
+
metadata={
|
|
829
|
+
"max_error_rate": max_error_rate,
|
|
830
|
+
"window_size": self.window_size,
|
|
831
|
+
"error_count": error_count,
|
|
832
|
+
"processed_count": state.processed_requests,
|
|
833
|
+
"current_window_size": len(self.error_window),
|
|
834
|
+
"current_error_rate": error_rate,
|
|
835
|
+
"exceeded_min_processed": exceeded_min_processed,
|
|
836
|
+
"exceeded_error_rate": exceeded_error_rate,
|
|
837
|
+
},
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
@field_validator("max_error_rate")
|
|
841
|
+
@classmethod
|
|
842
|
+
def _validate_max_error_rate(
|
|
843
|
+
cls, value: int | float | list[int | float]
|
|
844
|
+
) -> int | float | list[int | float]:
|
|
845
|
+
if not isinstance(value, list):
|
|
846
|
+
value = [value]
|
|
847
|
+
for val in value:
|
|
848
|
+
if not val:
|
|
849
|
+
raise ValueError(
|
|
850
|
+
"max_error_rate must be set and truthful, "
|
|
851
|
+
f"received {value} ({val} failed)"
|
|
852
|
+
)
|
|
853
|
+
if not isinstance(val, int | float) or val <= 0 or val >= 1:
|
|
854
|
+
raise ValueError(
|
|
855
|
+
"max_error_rate must be a number between 0 and 1,"
|
|
856
|
+
f"received {value} ({val} failed)"
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
|
|
863
|
+
["max_global_error_rate", "max_global_err_rate", "max_global_errors_rate"]
|
|
864
|
+
)
|
|
865
|
+
class MaxGlobalErrorRateConstraint(PydanticConstraintInitializer):
|
|
866
|
+
"""
|
|
867
|
+
Constraint that limits execution based on global error rate.
|
|
868
|
+
|
|
869
|
+
Calculates error rate across all processed requests and stops all processing
|
|
870
|
+
when the rate exceeds the threshold. Only applies the constraint after
|
|
871
|
+
processing the minimum number of requests to ensure statistical significance
|
|
872
|
+
for global error rate calculations.
|
|
873
|
+
"""
|
|
874
|
+
|
|
875
|
+
type_: Literal["max_global_error_rate"] = "max_global_error_rate" # type: ignore[assignment]
|
|
876
|
+
max_error_rate: int | float = Field(
|
|
877
|
+
description="Maximum error rate allowed (0.0 to 1.0)"
|
|
878
|
+
)
|
|
879
|
+
min_processed: int | float | None = Field(
|
|
880
|
+
default=30,
|
|
881
|
+
gt=0,
|
|
882
|
+
description="Minimum requests processed before applying error rate constraint",
|
|
883
|
+
)
|
|
884
|
+
current_index: int = Field(
|
|
885
|
+
default=-1, description="Current index for list-based max_error_rate values"
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
@classmethod
|
|
889
|
+
def validated_kwargs(
|
|
890
|
+
cls, max_error_rate: int | float | list[int | float], **kwargs
|
|
891
|
+
) -> dict[str, Any]:
|
|
892
|
+
"""
|
|
893
|
+
Validate and process arguments for MaxGlobalErrorRateConstraint creation.
|
|
894
|
+
|
|
895
|
+
:param max_error_rate: Maximum error rate to allow
|
|
896
|
+
:param kwargs: Supports max_global_error_rate, max_global_err_rate,
|
|
897
|
+
max_global_errors_rate, optional min_processed, and optional type_
|
|
898
|
+
:return: Validated dictionary with max_error_rate, min_processed,
|
|
899
|
+
and type_ fields
|
|
900
|
+
"""
|
|
901
|
+
for alias in [
|
|
902
|
+
"max_global_error_rate",
|
|
903
|
+
"max_global_err_rate",
|
|
904
|
+
"max_global_errors_rate",
|
|
905
|
+
]:
|
|
906
|
+
if max_error_rate is None:
|
|
907
|
+
max_error_rate = kwargs.get(alias)
|
|
908
|
+
|
|
909
|
+
return {
|
|
910
|
+
"max_error_rate": max_error_rate,
|
|
911
|
+
"min_processed": kwargs.get(
|
|
912
|
+
"min_processed", settings.constraint_error_min_processed
|
|
913
|
+
),
|
|
914
|
+
"current_index": kwargs.get("current_index", -1),
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
|
|
918
|
+
"""
|
|
919
|
+
Return self as the constraint instance.
|
|
920
|
+
|
|
921
|
+
:param kwargs: Additional keyword arguments (unused)
|
|
922
|
+
:return: Self instance as the constraint
|
|
923
|
+
"""
|
|
924
|
+
self.current_index += 1
|
|
925
|
+
|
|
926
|
+
return self.model_copy() # type: ignore[return-value]
|
|
927
|
+
|
|
928
|
+
def __call__(
|
|
929
|
+
self,
|
|
930
|
+
state: SchedulerState,
|
|
931
|
+
request_info: RequestInfo, # noqa: ARG002
|
|
932
|
+
) -> SchedulerUpdateAction:
|
|
933
|
+
"""
|
|
934
|
+
Evaluate constraint against global error rate.
|
|
935
|
+
|
|
936
|
+
:param state: Current scheduler state with global request and error counts
|
|
937
|
+
:param request_info: Individual request information (unused)
|
|
938
|
+
:return: Action indicating whether to continue or stop operations
|
|
939
|
+
"""
|
|
940
|
+
current_index = max(0, self.current_index)
|
|
941
|
+
max_error_rate = (
|
|
942
|
+
self.max_error_rate
|
|
943
|
+
if isinstance(self.max_error_rate, int | float)
|
|
944
|
+
else self.max_error_rate[min(current_index, len(self.max_error_rate) - 1)]
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
exceeded_min_processed = (
|
|
948
|
+
self.min_processed is None or state.processed_requests >= self.min_processed
|
|
949
|
+
)
|
|
950
|
+
error_rate = (
|
|
951
|
+
state.errored_requests / float(state.processed_requests)
|
|
952
|
+
if state.processed_requests > 0
|
|
953
|
+
else 0.0
|
|
954
|
+
)
|
|
955
|
+
exceeded_error_rate = error_rate >= max_error_rate
|
|
956
|
+
should_stop = exceeded_min_processed and exceeded_error_rate
|
|
957
|
+
|
|
958
|
+
return SchedulerUpdateAction(
|
|
959
|
+
request_queuing="stop" if should_stop else "continue",
|
|
960
|
+
request_processing="stop_all" if should_stop else "continue",
|
|
961
|
+
metadata={
|
|
962
|
+
"max_error_rate": max_error_rate,
|
|
963
|
+
"min_processed": self.min_processed,
|
|
964
|
+
"processed_requests": state.processed_requests,
|
|
965
|
+
"errored_requests": state.errored_requests,
|
|
966
|
+
"error_rate": error_rate,
|
|
967
|
+
"exceeded_min_processed": exceeded_min_processed,
|
|
968
|
+
"exceeded_error_rate": exceeded_error_rate,
|
|
969
|
+
},
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
@field_validator("max_error_rate")
|
|
973
|
+
@classmethod
|
|
974
|
+
def _validate_max_error_rate(
|
|
975
|
+
cls, value: int | float | list[int | float]
|
|
976
|
+
) -> int | float | list[int | float]:
|
|
977
|
+
if not isinstance(value, list):
|
|
978
|
+
value = [value]
|
|
979
|
+
for val in value:
|
|
980
|
+
if not val:
|
|
981
|
+
raise ValueError(
|
|
982
|
+
"max_error_rate must be set and truthful, "
|
|
983
|
+
f"received {value} ({val} failed)"
|
|
984
|
+
)
|
|
985
|
+
if not isinstance(val, int | float) or val <= 0 or val >= 1:
|
|
986
|
+
raise ValueError(
|
|
987
|
+
"max_error_rate must be a number between 0 and 1,"
|
|
988
|
+
f"received {value} ({val} failed)"
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
return value[0] if isinstance(value, list) and len(value) == 1 else value
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
|
|
995
|
+
type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
|
|
996
|
+
num_requests: int
|
|
997
|
+
|
|
998
|
+
@property
|
|
999
|
+
def info(self) -> dict[str, Any]:
|
|
1000
|
+
"""
|
|
1001
|
+
Extract serializable information from this constraint initializer.
|
|
1002
|
+
|
|
1003
|
+
:return: Dictionary containing constraint configuration and metadata
|
|
1004
|
+
"""
|
|
1005
|
+
return self.model_dump()
|
|
1006
|
+
|
|
1007
|
+
def __call__(
|
|
1008
|
+
self,
|
|
1009
|
+
state: SchedulerState,
|
|
1010
|
+
request_info: RequestInfo, # noqa: ARG002
|
|
1011
|
+
) -> SchedulerUpdateAction:
|
|
1012
|
+
create_exceeded = state.created_requests >= self.num_requests
|
|
1013
|
+
processed_exceeded = state.processed_requests >= self.num_requests
|
|
1014
|
+
remaining_fraction = min(
|
|
1015
|
+
max(0.0, 1.0 - state.processed_requests / float(self.num_requests)), 1.0
|
|
1016
|
+
)
|
|
1017
|
+
remaining_requests = max(0, self.num_requests - state.processed_requests)
|
|
1018
|
+
|
|
1019
|
+
return SchedulerUpdateAction(
|
|
1020
|
+
request_queuing="stop" if create_exceeded else "continue",
|
|
1021
|
+
request_processing="stop_local" if processed_exceeded else "continue",
|
|
1022
|
+
metadata={
|
|
1023
|
+
"num_requests": self.num_requests,
|
|
1024
|
+
"create_exceeded": create_exceeded,
|
|
1025
|
+
"processed_exceeded": processed_exceeded,
|
|
1026
|
+
"created_requests": state.created_requests,
|
|
1027
|
+
"processed_requests": state.processed_requests,
|
|
1028
|
+
"remaining_fraction": remaining_fraction,
|
|
1029
|
+
"remaining_requests": remaining_requests,
|
|
1030
|
+
},
|
|
1031
|
+
progress=SchedulerUpdateActionProgress(
|
|
1032
|
+
remaining_fraction=remaining_fraction,
|
|
1033
|
+
remaining_requests=remaining_requests,
|
|
1034
|
+
),
|
|
1035
|
+
)
|