guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a18.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1035 @@
1
+ """
2
+ Constraint system for scheduler behavior control and request processing limits.
3
+
4
+ Provides flexible constraints for managing scheduler behavior with configurable
5
+ thresholds based on time, error rates, and request counts. Constraints evaluate
6
+ scheduler state and individual requests to determine whether processing should
7
+ continue or stop based on predefined limits. The constraint system enables
8
+ sophisticated benchmark stopping criteria through composable constraint types.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ from abc import ABC, abstractmethod
15
+ from typing import Any, Literal, Protocol, runtime_checkable
16
+
17
+ from pydantic import Field, field_validator
18
+
19
+ from guidellm.scheduler.schemas import (
20
+ SchedulerState,
21
+ SchedulerUpdateAction,
22
+ SchedulerUpdateActionProgress,
23
+ )
24
+ from guidellm.schemas import RequestInfo
25
+ from guidellm.settings import settings
26
+ from guidellm.utils import InfoMixin, RegistryMixin, StandardBaseModel
27
+
28
+ __all__ = [
29
+ "Constraint",
30
+ "ConstraintInitializer",
31
+ "ConstraintsInitializerFactory",
32
+ "MaxDurationConstraint",
33
+ "MaxErrorRateConstraint",
34
+ "MaxErrorsConstraint",
35
+ "MaxGlobalErrorRateConstraint",
36
+ "MaxNumberConstraint",
37
+ "PydanticConstraintInitializer",
38
+ "RequestsExhaustedConstraint",
39
+ "SerializableConstraintInitializer",
40
+ "UnserializableConstraintInitializer",
41
+ ]
42
+
43
+
44
+ @runtime_checkable
45
+ class Constraint(Protocol):
46
+ """Protocol for constraint evaluation functions that control scheduler behavior."""
47
+
48
+ def __call__(
49
+ self, state: SchedulerState, request: RequestInfo
50
+ ) -> SchedulerUpdateAction:
51
+ """
52
+ Evaluate constraint against scheduler state and request information.
53
+
54
+ :param state: Current scheduler state with metrics and timing information
55
+ :param request: Individual request information and metadata
56
+ :return: Action indicating whether to continue or stop scheduler operations
57
+ """
58
+
59
+
60
+ @runtime_checkable
61
+ class ConstraintInitializer(Protocol):
62
+ """Protocol for constraint initializer factory functions that create constraints."""
63
+
64
+ def create_constraint(self, **kwargs) -> Constraint:
65
+ """
66
+ Create a constraint instance from configuration parameters.
67
+
68
+ :param kwargs: Configuration parameters for constraint creation
69
+ :return: Configured constraint evaluation function
70
+ """
71
+
72
+
73
+ @runtime_checkable
74
+ class SerializableConstraintInitializer(Protocol):
75
+ """Protocol for serializable constraint initializers supporting persistence."""
76
+
77
+ @classmethod
78
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
79
+ """
80
+ Validate and process arguments for constraint creation.
81
+
82
+ :param args: Positional arguments for constraint configuration
83
+ :param kwargs: Keyword arguments for constraint configuration
84
+ :return: Validated parameter dictionary for constraint creation
85
+ """
86
+
87
+ @classmethod
88
+ def model_validate(cls, **kwargs) -> ConstraintInitializer:
89
+ """
90
+ Create validated constraint initializer from configuration.
91
+
92
+ :param kwargs: Configuration dictionary for initializer creation
93
+ :return: Validated constraint initializer instance
94
+ """
95
+
96
+ def model_dump(self) -> dict[str, Any]:
97
+ """
98
+ Serialize constraint initializer to dictionary format.
99
+
100
+ :return: Dictionary representation of constraint initializer
101
+ """
102
+
103
+ def create_constraint(self, **kwargs) -> Constraint:
104
+ """
105
+ Create constraint instance from this initializer.
106
+
107
+ :param kwargs: Additional configuration parameters
108
+ :return: Configured constraint evaluation function
109
+ """
110
+
111
+
112
+ class ConstraintsInitializerFactory(RegistryMixin[ConstraintInitializer]):
113
+ """
114
+ Registry factory for creating and managing constraint initializers.
115
+
116
+ Provides centralized access to registered constraint types with support for
117
+ creating constraints from configuration dictionaries, simple values, or
118
+ pre-configured instances. Handles constraint resolution and type validation
119
+ for the scheduler constraint system.
120
+
121
+ Example:
122
+ ::
123
+ from guidellm.scheduler import ConstraintsInitializerFactory
124
+
125
+ # Register new constraint type
126
+ @ConstraintsInitializerFactory.register("new_constraint")
127
+ class NewConstraint:
128
+ def create_constraint(self, **kwargs) -> Constraint:
129
+ return lambda state, request: SchedulerUpdateAction()
130
+
131
+ # Create and use constraint
132
+ constraint = ConstraintsInitializerFactory.create_constraint("new_constraint")
133
+ """
134
+
135
+ @classmethod
136
+ def create(cls, key: str, *args, **kwargs) -> ConstraintInitializer:
137
+ """
138
+ Create a constraint initializer for the specified key.
139
+
140
+ :param key: Registered constraint initializer key
141
+ :param args: Positional arguments for initializer creation
142
+ :param kwargs: Keyword arguments for initializer creation
143
+ :return: Configured constraint initializer instance
144
+ :raises ValueError: If the key is not registered in the factory
145
+ """
146
+ if cls.registry is None or key not in cls.registry:
147
+ raise ValueError(f"Unknown constraint initializer key: {key}")
148
+
149
+ initializer_class = cls.registry[key]
150
+
151
+ return (
152
+ initializer_class(*args, **kwargs) # type: ignore[operator]
153
+ if not isinstance(initializer_class, type)
154
+ or not issubclass(initializer_class, SerializableConstraintInitializer)
155
+ else initializer_class(
156
+ **initializer_class.validated_kwargs(*args, **kwargs) # type: ignore[misc]
157
+ )
158
+ )
159
+
160
+ @classmethod
161
+ def serialize(cls, initializer: ConstraintInitializer) -> dict[str, Any]:
162
+ """
163
+ Serialize constraint initializer to dictionary format.
164
+
165
+ :param initializer: Constraint initializer to serialize
166
+ :return: Dictionary representation or unserializable placeholder
167
+ """
168
+ if isinstance(initializer, SerializableConstraintInitializer):
169
+ return initializer.model_dump()
170
+ else:
171
+ unserializable = UnserializableConstraintInitializer(
172
+ orig_info=InfoMixin.extract_from_obj(initializer)
173
+ )
174
+ return unserializable.model_dump()
175
+
176
+ @classmethod
177
+ def deserialize(
178
+ cls, initializer_dict: dict[str, Any]
179
+ ) -> SerializableConstraintInitializer | UnserializableConstraintInitializer:
180
+ """
181
+ Deserialize constraint initializer from dictionary format.
182
+
183
+ :param initializer_dict: Dictionary representation of constraint initializer
184
+ :return: Reconstructed constraint initializer instance
185
+ :raises ValueError: If constraint type is unknown or cannot be deserialized
186
+ """
187
+ if initializer_dict.get("type_") == "unserializable":
188
+ return UnserializableConstraintInitializer.model_validate(initializer_dict)
189
+
190
+ if (
191
+ cls.registry is not None
192
+ and initializer_dict.get("type_")
193
+ and initializer_dict["type_"] in cls.registry
194
+ ):
195
+ initializer_class = cls.registry[initializer_dict["type_"]]
196
+ if hasattr(initializer_class, "model_validate"):
197
+ return initializer_class.model_validate(initializer_dict) # type: ignore[return-value]
198
+ else:
199
+ return initializer_class(**initializer_dict) # type: ignore[return-value,operator]
200
+
201
+ raise ValueError(
202
+ f"Cannot deserialize unknown constraint initializer: "
203
+ f"{initializer_dict.get('type_', 'unknown')}"
204
+ )
205
+
206
+ @classmethod
207
+ def create_constraint(cls, key: str, *args, **kwargs) -> Constraint:
208
+ """
209
+ Create a constraint instance for the specified key.
210
+
211
+ :param key: Registered constraint initializer key
212
+ :param args: Positional arguments for constraint creation
213
+ :param kwargs: Keyword arguments for constraint creation
214
+ :return: Configured constraint function ready for evaluation
215
+ :raises ValueError: If the key is not registered in the factory
216
+ """
217
+ return cls.create(key, *args, **kwargs).create_constraint()
218
+
219
+ @classmethod
220
+ def resolve(
221
+ cls,
222
+ initializers: dict[
223
+ str,
224
+ Any | dict[str, Any] | Constraint | ConstraintInitializer,
225
+ ],
226
+ ) -> dict[str, Constraint]:
227
+ """
228
+ Resolve mixed constraint specifications to callable constraints.
229
+
230
+ :param initializers: Dictionary mapping constraint keys to specifications
231
+ :return: Dictionary mapping constraint keys to callable functions
232
+ :raises ValueError: If any key is not registered in the factory
233
+ """
234
+ constraints = {}
235
+
236
+ for key, val in initializers.items():
237
+ if isinstance(val, Constraint):
238
+ constraints[key] = val
239
+ elif isinstance(val, ConstraintInitializer):
240
+ constraints[key] = val.create_constraint()
241
+ elif isinstance(val, dict):
242
+ constraints[key] = cls.create_constraint(key, **val)
243
+ else:
244
+ constraints[key] = cls.create_constraint(key, val)
245
+
246
+ return constraints
247
+
248
+ @classmethod
249
+ def resolve_constraints(
250
+ cls,
251
+ constraints: dict[str, Any | dict[str, Any] | Constraint],
252
+ ) -> dict[str, Constraint]:
253
+ """
254
+ Resolve constraints from mixed constraint specifications.
255
+
256
+ :param constraints: Dictionary mapping constraint keys to specifications
257
+ :return: Dictionary mapping constraint keys to callable functions
258
+ :raises ValueError: If any constraint key is not registered
259
+ """
260
+ resolved_constraints = {}
261
+
262
+ for key, val in constraints.items():
263
+ if isinstance(val, Constraint):
264
+ resolved_constraints[key] = val
265
+ elif isinstance(val, dict):
266
+ resolved_constraints[key] = cls.create_constraint(key, **val)
267
+ else:
268
+ resolved_constraints[key] = cls.create_constraint(key, val)
269
+
270
+ return resolved_constraints
271
+
272
+
273
+ class PydanticConstraintInitializer(StandardBaseModel, ABC, InfoMixin):
274
+ """
275
+ Abstract base for Pydantic-based constraint initializers.
276
+
277
+ Provides standardized serialization, validation, and metadata handling for
278
+ constraint initializers using Pydantic models. Subclasses implement specific
279
+ constraint creation logic while inheriting validation and persistence support.
280
+ """
281
+
282
+ type_: str = Field(description="Type identifier for the constraint initializer")
283
+
284
+ @property
285
+ def info(self) -> dict[str, Any]:
286
+ """
287
+ Extract serializable information from this constraint initializer.
288
+
289
+ :return: Dictionary containing constraint configuration and metadata
290
+ """
291
+ return self.model_dump()
292
+
293
+ @classmethod
294
+ @abstractmethod
295
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
296
+ """
297
+ Validate and process arguments for constraint creation.
298
+
299
+ Must be implemented by subclasses to handle their specific parameter patterns
300
+ and validation requirements.
301
+
302
+ :param args: Positional arguments passed to the constraint
303
+ :param kwargs: Keyword arguments passed to the constraint
304
+ :return: Validated dictionary of parameters for constraint creation
305
+ :raises NotImplementedError: Must be implemented by subclasses
306
+ """
307
+ ...
308
+
309
+ @abstractmethod
310
+ def create_constraint(self, **kwargs) -> Constraint:
311
+ """
312
+ Create a constraint instance.
313
+
314
+ Must be implemented by subclasses to return their specific constraint type
315
+ with appropriate configuration and validation.
316
+
317
+ :param kwargs: Additional keyword arguments (usually unused)
318
+ :return: Configured constraint instance
319
+ :raises NotImplementedError: Must be implemented by subclasses
320
+ """
321
+ ...
322
+
323
+
324
+ class UnserializableConstraintInitializer(PydanticConstraintInitializer):
325
+ """
326
+ Placeholder for constraints that cannot be serialized or executed.
327
+
328
+ Represents constraint initializers that failed serialization or contain
329
+ non-serializable components. Cannot be executed and raises errors when
330
+ invoked to prevent runtime failures from invalid constraint state.
331
+ """
332
+
333
+ type_: Literal["unserializable"] = "unserializable" # type: ignore[assignment]
334
+ orig_info: dict[str, Any] = Field(
335
+ default_factory=dict,
336
+ description="Original constraint information before serialization failure",
337
+ )
338
+
339
+ @classmethod
340
+ def validated_kwargs(
341
+ cls,
342
+ orig_info: dict[str, Any] | None = None,
343
+ **kwargs, # noqa: ARG003
344
+ ) -> dict[str, Any]:
345
+ """
346
+ Validate arguments for unserializable constraint creation.
347
+
348
+ :param orig_info: Original constraint information before serialization failure
349
+ :param kwargs: Additional arguments (ignored)
350
+ :return: Validated parameters for unserializable constraint creation
351
+ """
352
+ return {"orig_info": orig_info or {}}
353
+
354
+ def create_constraint(
355
+ self,
356
+ **kwargs, # noqa: ARG002
357
+ ) -> Constraint:
358
+ """
359
+ Raise error for unserializable constraint creation attempt.
360
+
361
+ :param kwargs: Additional keyword arguments (unused)
362
+ :raises RuntimeError: Always raised since unserializable constraints
363
+ cannot be executed
364
+ """
365
+ raise RuntimeError(
366
+ "Cannot create constraint from unserializable constraint instance. "
367
+ "This constraint cannot be serialized and therefore cannot be executed."
368
+ )
369
+
370
+ def __call__(
371
+ self,
372
+ state: SchedulerState, # noqa: ARG002
373
+ request: RequestInfo, # noqa: ARG002
374
+ ) -> SchedulerUpdateAction:
375
+ """
376
+ Raise error since unserializable constraints cannot be invoked.
377
+
378
+ :param state: Current scheduler state (unused)
379
+ :param request: Individual request information (unused)
380
+ :raises RuntimeError: Always raised for unserializable constraints
381
+ """
382
+ raise RuntimeError(
383
+ "Cannot invoke unserializable constraint instance. "
384
+ "This constraint was not properly serialized and cannot be executed."
385
+ )
386
+
387
+
388
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
389
+ ["max_number", "max_num", "max_requests", "max_req"]
390
+ )
391
+ class MaxNumberConstraint(PydanticConstraintInitializer):
392
+ """
393
+ Constraint that limits execution based on maximum request counts.
394
+
395
+ Stops request queuing when created requests reach the limit and stops local
396
+ request processing when processed requests reach the limit. Provides progress
397
+ tracking based on remaining requests and completion fraction.
398
+ """
399
+
400
+ type_: Literal["max_number"] = "max_number" # type: ignore[assignment]
401
+ max_num: int | float | list[int | float] = Field(
402
+ description="Maximum number of requests allowed before triggering constraint",
403
+ )
404
+ current_index: int = Field(
405
+ default=-1, description="Current index for list-based max_num values"
406
+ )
407
+
408
+ @classmethod
409
+ def validated_kwargs(
410
+ cls, max_num: int | float | list[int | float], **kwargs
411
+ ) -> dict[str, Any]:
412
+ """
413
+ Validate and process arguments for MaxNumberConstraint creation.
414
+
415
+ :param max_num: Maximum number of requests to allow
416
+ :param kwargs: Supports max_num, max_number, max_requests, max_req,
417
+ and optional type_
418
+ :return: Validated dictionary with max_num and type_ fields
419
+ """
420
+ aliases = ["max_number", "max_num", "max_requests", "max_req"]
421
+ for alias in aliases:
422
+ if max_num is None:
423
+ max_num = kwargs.get(alias)
424
+
425
+ return {"max_num": max_num, "current_index": kwargs.get("current_index", -1)}
426
+
427
+ def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
428
+ """
429
+ Return self as the constraint instance.
430
+
431
+ :param kwargs: Additional keyword arguments (unused)
432
+ :return: Self instance as the constraint
433
+ """
434
+ self.current_index += 1
435
+
436
+ return self.model_copy() # type: ignore[return-value]
437
+
438
+ def __call__(
439
+ self,
440
+ state: SchedulerState,
441
+ request_info: RequestInfo, # noqa: ARG002
442
+ ) -> SchedulerUpdateAction:
443
+ """
444
+ Evaluate constraint against current scheduler state and request count.
445
+
446
+ :param state: Current scheduler state with request counts
447
+ :param request_info: Individual request information (unused)
448
+ :return: Action indicating whether to continue or stop operations
449
+ """
450
+ current_index = max(0, self.current_index)
451
+ max_num = (
452
+ self.max_num
453
+ if isinstance(self.max_num, int | float)
454
+ else self.max_num[min(current_index, len(self.max_num) - 1)]
455
+ )
456
+
457
+ create_exceeded = state.created_requests >= max_num
458
+ processed_exceeded = state.processed_requests >= max_num
459
+ remaining_requests = min(max(0, max_num - state.processed_requests), max_num)
460
+ remaining_fraction = remaining_requests / float(max_num)
461
+
462
+ return SchedulerUpdateAction(
463
+ request_queuing="stop" if create_exceeded else "continue",
464
+ request_processing="stop_local" if processed_exceeded else "continue",
465
+ metadata={
466
+ "max_number": max_num,
467
+ "create_exceeded": create_exceeded,
468
+ "processed_exceeded": processed_exceeded,
469
+ "created_requests": state.created_requests,
470
+ "processed_requests": state.processed_requests,
471
+ "remaining_fraction": remaining_fraction,
472
+ "remaining_requests": remaining_requests,
473
+ },
474
+ progress=SchedulerUpdateActionProgress(
475
+ remaining_fraction=remaining_fraction,
476
+ remaining_requests=remaining_requests,
477
+ ),
478
+ )
479
+
480
+ @field_validator("max_num")
481
+ @classmethod
482
+ def _validate_max_num(
483
+ cls, value: int | float | list[int | float]
484
+ ) -> int | float | list[int | float]:
485
+ if not isinstance(value, list):
486
+ value = [value]
487
+ for val in value:
488
+ if not val:
489
+ raise ValueError(
490
+ f"max_num must be set and truthful, received {value} ({val} failed)"
491
+ )
492
+ if not isinstance(val, int | float) or val <= 0:
493
+ raise ValueError(
494
+ f"max_num must be a positive num, received {value} ({val} failed)"
495
+ )
496
+
497
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
498
+
499
+
500
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
501
+ ["max_duration", "max_dur", "max_sec", "max_seconds", "max_min", "max_minutes"]
502
+ )
503
+ class MaxDurationConstraint(PydanticConstraintInitializer):
504
+ """
505
+ Constraint that limits execution based on maximum time duration.
506
+
507
+ Stops both request queuing and processing when the elapsed time since scheduler
508
+ start exceeds the maximum duration. Provides progress tracking based on
509
+ remaining time and completion fraction.
510
+ """
511
+
512
+ type_: Literal["max_duration"] = "max_duration" # type: ignore[assignment]
513
+ max_duration: int | float | list[int | float] = Field(
514
+ description="Maximum duration in seconds before triggering constraint"
515
+ )
516
+ current_index: int = Field(default=-1, description="Current index in duration list")
517
+
518
+ @classmethod
519
+ def validated_kwargs(
520
+ cls, max_duration: int | float | list[int | float] | None = None, **kwargs
521
+ ) -> dict[str, Any]:
522
+ """
523
+ Validate and process arguments for MaxDurationConstraint creation.
524
+
525
+ :param max_duration: Maximum duration in seconds
526
+ :param kwargs: Supports max_duration, max_dur, max_sec, max_seconds,
527
+ max_min, max_minutes, and optional type_
528
+ :return: Validated dictionary with max_duration and type_ fields
529
+ """
530
+ seconds_aliases = ["max_dur", "max_sec", "max_seconds"]
531
+ for alias in seconds_aliases:
532
+ if max_duration is None:
533
+ max_duration = kwargs.get(alias)
534
+ minutes_aliases = ["max_min", "max_minutes"]
535
+ for alias in minutes_aliases:
536
+ minutes = kwargs.get(alias)
537
+ if minutes is not None and max_duration is None:
538
+ max_duration = minutes * 60
539
+
540
+ return {
541
+ "max_duration": max_duration,
542
+ "current_index": kwargs.get("current_index", -1),
543
+ }
544
+
545
+ def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
546
+ """
547
+ Return self as the constraint instance.
548
+
549
+ :param kwargs: Additional keyword arguments (unused)
550
+ :return: Self instance as the constraint
551
+ """
552
+ self.current_index += 1
553
+
554
+ return self.model_copy() # type: ignore[return-value]
555
+
556
+ def __call__(
557
+ self,
558
+ state: SchedulerState,
559
+ request_info: RequestInfo, # noqa: ARG002
560
+ ) -> SchedulerUpdateAction:
561
+ """
562
+ Evaluate constraint against current scheduler state and elapsed time.
563
+
564
+ :param state: Current scheduler state with start time
565
+ :param request_info: Individual request information (unused)
566
+ :return: Action indicating whether to continue or stop operations
567
+ """
568
+ current_index = max(0, self.current_index)
569
+ max_duration = (
570
+ self.max_duration
571
+ if isinstance(self.max_duration, int | float)
572
+ else self.max_duration[min(current_index, len(self.max_duration) - 1)]
573
+ )
574
+
575
+ current_time = time.time()
576
+ elapsed = current_time - state.start_time
577
+ duration_exceeded = elapsed >= max_duration
578
+ remaining_duration = min(max(0.0, max_duration - elapsed), max_duration)
579
+ remaining_fraction = remaining_duration / float(max_duration)
580
+
581
+ return SchedulerUpdateAction(
582
+ request_queuing="stop" if duration_exceeded else "continue",
583
+ request_processing="stop_local" if duration_exceeded else "continue",
584
+ metadata={
585
+ "max_duration": max_duration,
586
+ "elapsed_time": elapsed,
587
+ "duration_exceeded": duration_exceeded,
588
+ "start_time": state.start_time,
589
+ "current_time": current_time,
590
+ },
591
+ progress=SchedulerUpdateActionProgress(
592
+ remaining_fraction=remaining_fraction,
593
+ remaining_duration=remaining_duration,
594
+ ),
595
+ )
596
+
597
+ @field_validator("max_duration")
598
+ @classmethod
599
+ def _validate_max_duration(
600
+ cls, value: int | float | list[int | float]
601
+ ) -> int | float | list[int | float]:
602
+ if not isinstance(value, list):
603
+ value = [value]
604
+ for val in value:
605
+ if not val:
606
+ raise ValueError(
607
+ "max_duration must be set and truthful, "
608
+ f"received {value} ({val} failed)"
609
+ )
610
+ if not isinstance(val, int | float) or val <= 0:
611
+ raise ValueError(
612
+ "max_duration must be a positive num,"
613
+ f"received {value} ({val} failed)"
614
+ )
615
+
616
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
617
+
618
+
619
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
620
+ ["max_errors", "max_err", "max_error", "max_errs"]
621
+ )
622
+ class MaxErrorsConstraint(PydanticConstraintInitializer):
623
+ """
624
+ Constraint that limits execution based on absolute error count.
625
+
626
+ Stops both request queuing and all request processing when the total number
627
+ of errored requests reaches the maximum threshold. Uses global error tracking
628
+ across all requests for immediate constraint evaluation.
629
+ """
630
+
631
+ type_: Literal["max_errors"] = "max_errors" # type: ignore[assignment]
632
+ max_errors: int | float | list[int | float] = Field(
633
+ description="Maximum number of errors allowed before triggering constraint",
634
+ )
635
+ current_index: int = Field(default=-1, description="Current index in error list")
636
+
637
+ @classmethod
638
+ def validated_kwargs(
639
+ cls, max_errors: int | float | list[int | float] | None = None, **kwargs
640
+ ) -> dict[str, Any]:
641
+ """
642
+ Validate and process arguments for MaxErrorsConstraint creation.
643
+
644
+ :param max_errors: Maximum number of errors to allow
645
+ :param kwargs: Supports max_errors, max_err, max_error, max_errs,
646
+ and optional type_
647
+ :return: Validated dictionary with max_errors and type_ fields
648
+ """
649
+ aliases = ["max_errors", "max_err", "max_error", "max_errs"]
650
+ for alias in aliases:
651
+ if max_errors is None:
652
+ max_errors = kwargs.get(alias)
653
+
654
+ return {
655
+ "max_errors": max_errors,
656
+ "current_index": kwargs.get("current_index", -1),
657
+ }
658
+
659
+ def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
660
+ """
661
+ Return self as the constraint instance.
662
+
663
+ :param kwargs: Additional keyword arguments (unused)
664
+ :return: Self instance as the constraint
665
+ """
666
+ self.current_index += 1
667
+
668
+ return self.model_copy() # type: ignore[return-value]
669
+
670
+ def __call__(
671
+ self,
672
+ state: SchedulerState,
673
+ request_info: RequestInfo, # noqa: ARG002
674
+ ) -> SchedulerUpdateAction:
675
+ """
676
+ Evaluate constraint against current error count.
677
+
678
+ :param state: Current scheduler state with error counts
679
+ :param request_info: Individual request information (unused)
680
+ :return: Action indicating whether to continue or stop operations
681
+ """
682
+ current_index = max(0, self.current_index)
683
+ max_errors = (
684
+ self.max_errors
685
+ if isinstance(self.max_errors, int | float)
686
+ else self.max_errors[min(current_index, len(self.max_errors) - 1)]
687
+ )
688
+ errors_exceeded = state.errored_requests >= max_errors
689
+
690
+ return SchedulerUpdateAction(
691
+ request_queuing="stop" if errors_exceeded else "continue",
692
+ request_processing="stop_all" if errors_exceeded else "continue",
693
+ metadata={
694
+ "max_errors": max_errors,
695
+ "errors_exceeded": errors_exceeded,
696
+ "current_errors": state.errored_requests,
697
+ },
698
+ )
699
+
700
+ @field_validator("max_errors")
701
+ @classmethod
702
+ def _validate_max_errors(
703
+ cls, value: int | float | list[int | float]
704
+ ) -> int | float | list[int | float]:
705
+ if not isinstance(value, list):
706
+ value = [value]
707
+ for val in value:
708
+ if not val:
709
+ raise ValueError(
710
+ "max_errors must be set and truthful, "
711
+ f"received {value} ({val} failed)"
712
+ )
713
+ if not isinstance(val, int | float) or val <= 0:
714
+ raise ValueError(
715
+ f"max_errors must be a positive num,received {value} ({val} failed)"
716
+ )
717
+
718
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
719
+
720
+
721
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
722
+ ["max_error_rate", "max_err_rate", "max_errors_rate"]
723
+ )
724
+ class MaxErrorRateConstraint(PydanticConstraintInitializer):
725
+ """
726
+ Constraint that limits execution based on sliding window error rate.
727
+
728
+ Tracks error status of recent requests in a sliding window and stops all
729
+ processing when the error rate exceeds the threshold. Only applies the
730
+ constraint after processing enough requests to fill the minimum window size
731
+ for statistical significance.
732
+ """
733
+
734
+ type_: Literal["max_error_rate"] = "max_error_rate" # type: ignore[assignment]
735
+ max_error_rate: int | float | list[int | float] = Field(
736
+ description="Maximum error rate allowed (0.0, 1.0)"
737
+ )
738
+ window_size: int | float = Field(
739
+ default=30,
740
+ gt=0,
741
+ description="Size of sliding window for calculating error rate",
742
+ )
743
+ error_window: list[bool] = Field(
744
+ default_factory=list,
745
+ description="Sliding window tracking error status of recent requests",
746
+ )
747
+ current_index: int = Field(
748
+ default=-1, description="Current index in the error window"
749
+ )
750
+
751
+ @classmethod
752
+ def validated_kwargs(
753
+ cls, max_error_rate: int | float | list[int | float], **kwargs
754
+ ) -> dict[str, Any]:
755
+ """
756
+ Validate and process arguments for MaxErrorRateConstraint creation.
757
+
758
+ :param max_error_rate: Maximum error rate to allow
759
+ :param kwargs: Supports max_error_rate, max_err_rate, max_errors_rate,
760
+ optional window_size, and optional type_
761
+ :return: Validated dictionary with max_error_rate, window_size,
762
+ and type_ fields
763
+ """
764
+ aliases = ["max_error_rate", "max_err_rate", "max_errors_rate"]
765
+ for alias in aliases:
766
+ if max_error_rate is None:
767
+ max_error_rate = kwargs.get(alias)
768
+
769
+ return {
770
+ "max_error_rate": max_error_rate,
771
+ "window_size": kwargs.get(
772
+ "window_size", settings.constraint_error_window_size
773
+ ),
774
+ "error_window": kwargs.get("error_window", []),
775
+ "current_index": kwargs.get("current_index", -1),
776
+ }
777
+
778
+ def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
779
+ """
780
+ Create a new instance of MaxErrorRateConstraint (due to stateful window).
781
+
782
+ :param kwargs: Additional keyword arguments (unused)
783
+ :return: New instance of the constraint
784
+ """
785
+ self.current_index += 1
786
+
787
+ return self.model_copy() # type: ignore[return-value]
788
+
789
+ def __call__(
790
+ self, state: SchedulerState, request_info: RequestInfo
791
+ ) -> SchedulerUpdateAction:
792
+ """
793
+ Evaluate constraint against sliding window error rate.
794
+
795
+ :param state: Current scheduler state with request counts
796
+ :param request_info: Individual request with completion status
797
+ :return: Action indicating whether to continue or stop operations
798
+ """
799
+ current_index = max(0, self.current_index)
800
+ max_error_rate = (
801
+ self.max_error_rate
802
+ if isinstance(self.max_error_rate, int | float)
803
+ else self.max_error_rate[min(current_index, len(self.max_error_rate) - 1)]
804
+ )
805
+
806
+ if request_info.status in ["completed", "errored", "cancelled"]:
807
+ self.error_window.append(request_info.status == "errored")
808
+ if len(self.error_window) > self.window_size:
809
+ self.error_window.pop(0)
810
+
811
+ error_count = sum(self.error_window)
812
+ window_requests = len(self.error_window)
813
+ error_rate = (
814
+ error_count / float(window_requests) if window_requests > 0 else 0.0
815
+ )
816
+ exceeded_min_processed = state.processed_requests >= self.window_size
817
+ exceeded_error_rate = error_rate >= max_error_rate
818
+
819
+ return SchedulerUpdateAction(
820
+ request_queuing=(
821
+ "stop" if exceeded_min_processed and exceeded_error_rate else "continue"
822
+ ),
823
+ request_processing=(
824
+ "stop_all"
825
+ if exceeded_min_processed and exceeded_error_rate
826
+ else "continue"
827
+ ),
828
+ metadata={
829
+ "max_error_rate": max_error_rate,
830
+ "window_size": self.window_size,
831
+ "error_count": error_count,
832
+ "processed_count": state.processed_requests,
833
+ "current_window_size": len(self.error_window),
834
+ "current_error_rate": error_rate,
835
+ "exceeded_min_processed": exceeded_min_processed,
836
+ "exceeded_error_rate": exceeded_error_rate,
837
+ },
838
+ )
839
+
840
+ @field_validator("max_error_rate")
841
+ @classmethod
842
+ def _validate_max_error_rate(
843
+ cls, value: int | float | list[int | float]
844
+ ) -> int | float | list[int | float]:
845
+ if not isinstance(value, list):
846
+ value = [value]
847
+ for val in value:
848
+ if not val:
849
+ raise ValueError(
850
+ "max_error_rate must be set and truthful, "
851
+ f"received {value} ({val} failed)"
852
+ )
853
+ if not isinstance(val, int | float) or val <= 0 or val >= 1:
854
+ raise ValueError(
855
+ "max_error_rate must be a number between 0 and 1,"
856
+ f"received {value} ({val} failed)"
857
+ )
858
+
859
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
860
+
861
+
862
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
863
+ ["max_global_error_rate", "max_global_err_rate", "max_global_errors_rate"]
864
+ )
865
+ class MaxGlobalErrorRateConstraint(PydanticConstraintInitializer):
866
+ """
867
+ Constraint that limits execution based on global error rate.
868
+
869
+ Calculates error rate across all processed requests and stops all processing
870
+ when the rate exceeds the threshold. Only applies the constraint after
871
+ processing the minimum number of requests to ensure statistical significance
872
+ for global error rate calculations.
873
+ """
874
+
875
+ type_: Literal["max_global_error_rate"] = "max_global_error_rate" # type: ignore[assignment]
876
+ max_error_rate: int | float = Field(
877
+ description="Maximum error rate allowed (0.0 to 1.0)"
878
+ )
879
+ min_processed: int | float | None = Field(
880
+ default=30,
881
+ gt=0,
882
+ description="Minimum requests processed before applying error rate constraint",
883
+ )
884
+ current_index: int = Field(
885
+ default=-1, description="Current index for list-based max_error_rate values"
886
+ )
887
+
888
+ @classmethod
889
+ def validated_kwargs(
890
+ cls, max_error_rate: int | float | list[int | float], **kwargs
891
+ ) -> dict[str, Any]:
892
+ """
893
+ Validate and process arguments for MaxGlobalErrorRateConstraint creation.
894
+
895
+ :param max_error_rate: Maximum error rate to allow
896
+ :param kwargs: Supports max_global_error_rate, max_global_err_rate,
897
+ max_global_errors_rate, optional min_processed, and optional type_
898
+ :return: Validated dictionary with max_error_rate, min_processed,
899
+ and type_ fields
900
+ """
901
+ for alias in [
902
+ "max_global_error_rate",
903
+ "max_global_err_rate",
904
+ "max_global_errors_rate",
905
+ ]:
906
+ if max_error_rate is None:
907
+ max_error_rate = kwargs.get(alias)
908
+
909
+ return {
910
+ "max_error_rate": max_error_rate,
911
+ "min_processed": kwargs.get(
912
+ "min_processed", settings.constraint_error_min_processed
913
+ ),
914
+ "current_index": kwargs.get("current_index", -1),
915
+ }
916
+
917
+ def create_constraint(self, **kwargs) -> Constraint: # noqa: ARG002
918
+ """
919
+ Return self as the constraint instance.
920
+
921
+ :param kwargs: Additional keyword arguments (unused)
922
+ :return: Self instance as the constraint
923
+ """
924
+ self.current_index += 1
925
+
926
+ return self.model_copy() # type: ignore[return-value]
927
+
928
+ def __call__(
929
+ self,
930
+ state: SchedulerState,
931
+ request_info: RequestInfo, # noqa: ARG002
932
+ ) -> SchedulerUpdateAction:
933
+ """
934
+ Evaluate constraint against global error rate.
935
+
936
+ :param state: Current scheduler state with global request and error counts
937
+ :param request_info: Individual request information (unused)
938
+ :return: Action indicating whether to continue or stop operations
939
+ """
940
+ current_index = max(0, self.current_index)
941
+ max_error_rate = (
942
+ self.max_error_rate
943
+ if isinstance(self.max_error_rate, int | float)
944
+ else self.max_error_rate[min(current_index, len(self.max_error_rate) - 1)]
945
+ )
946
+
947
+ exceeded_min_processed = (
948
+ self.min_processed is None or state.processed_requests >= self.min_processed
949
+ )
950
+ error_rate = (
951
+ state.errored_requests / float(state.processed_requests)
952
+ if state.processed_requests > 0
953
+ else 0.0
954
+ )
955
+ exceeded_error_rate = error_rate >= max_error_rate
956
+ should_stop = exceeded_min_processed and exceeded_error_rate
957
+
958
+ return SchedulerUpdateAction(
959
+ request_queuing="stop" if should_stop else "continue",
960
+ request_processing="stop_all" if should_stop else "continue",
961
+ metadata={
962
+ "max_error_rate": max_error_rate,
963
+ "min_processed": self.min_processed,
964
+ "processed_requests": state.processed_requests,
965
+ "errored_requests": state.errored_requests,
966
+ "error_rate": error_rate,
967
+ "exceeded_min_processed": exceeded_min_processed,
968
+ "exceeded_error_rate": exceeded_error_rate,
969
+ },
970
+ )
971
+
972
+ @field_validator("max_error_rate")
973
+ @classmethod
974
+ def _validate_max_error_rate(
975
+ cls, value: int | float | list[int | float]
976
+ ) -> int | float | list[int | float]:
977
+ if not isinstance(value, list):
978
+ value = [value]
979
+ for val in value:
980
+ if not val:
981
+ raise ValueError(
982
+ "max_error_rate must be set and truthful, "
983
+ f"received {value} ({val} failed)"
984
+ )
985
+ if not isinstance(val, int | float) or val <= 0 or val >= 1:
986
+ raise ValueError(
987
+ "max_error_rate must be a number between 0 and 1,"
988
+ f"received {value} ({val} failed)"
989
+ )
990
+
991
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
992
+
993
+
994
+ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
995
+ type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
996
+ num_requests: int
997
+
998
+ @property
999
+ def info(self) -> dict[str, Any]:
1000
+ """
1001
+ Extract serializable information from this constraint initializer.
1002
+
1003
+ :return: Dictionary containing constraint configuration and metadata
1004
+ """
1005
+ return self.model_dump()
1006
+
1007
+ def __call__(
1008
+ self,
1009
+ state: SchedulerState,
1010
+ request_info: RequestInfo, # noqa: ARG002
1011
+ ) -> SchedulerUpdateAction:
1012
+ create_exceeded = state.created_requests >= self.num_requests
1013
+ processed_exceeded = state.processed_requests >= self.num_requests
1014
+ remaining_fraction = min(
1015
+ max(0.0, 1.0 - state.processed_requests / float(self.num_requests)), 1.0
1016
+ )
1017
+ remaining_requests = max(0, self.num_requests - state.processed_requests)
1018
+
1019
+ return SchedulerUpdateAction(
1020
+ request_queuing="stop" if create_exceeded else "continue",
1021
+ request_processing="stop_local" if processed_exceeded else "continue",
1022
+ metadata={
1023
+ "num_requests": self.num_requests,
1024
+ "create_exceeded": create_exceeded,
1025
+ "processed_exceeded": processed_exceeded,
1026
+ "created_requests": state.created_requests,
1027
+ "processed_requests": state.processed_requests,
1028
+ "remaining_fraction": remaining_fraction,
1029
+ "remaining_requests": remaining_requests,
1030
+ },
1031
+ progress=SchedulerUpdateActionProgress(
1032
+ remaining_fraction=remaining_fraction,
1033
+ remaining_requests=remaining_requests,
1034
+ ),
1035
+ )