guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,49 @@
1
+ """
2
+ Constraint system for scheduler behavior control and request processing limits.
3
+
4
+ Provides flexible constraints for managing scheduler behavior with configurable
5
+ thresholds based on time, error rates, and request counts. Constraints evaluate
6
+ scheduler state and individual requests to determine whether processing should
7
+ continue or stop based on predefined limits. The constraint system enables
8
+ sophisticated benchmark stopping criteria through composable constraint types.
9
+ """
10
+
11
+ from .constraint import (
12
+ Constraint,
13
+ ConstraintInitializer,
14
+ PydanticConstraintInitializer,
15
+ SerializableConstraintInitializer,
16
+ UnserializableConstraintInitializer,
17
+ )
18
+ from .error import (
19
+ MaxErrorRateConstraint,
20
+ MaxErrorsConstraint,
21
+ MaxGlobalErrorRateConstraint,
22
+ )
23
+ from .factory import ConstraintsInitializerFactory
24
+ from .request import (
25
+ MaxDurationConstraint,
26
+ MaxNumberConstraint,
27
+ RequestsExhaustedConstraint,
28
+ )
29
+ from .saturation import (
30
+ OverSaturationConstraint,
31
+ OverSaturationConstraintInitializer,
32
+ )
33
+
34
+ __all__ = [
35
+ "Constraint",
36
+ "ConstraintInitializer",
37
+ "ConstraintsInitializerFactory",
38
+ "MaxDurationConstraint",
39
+ "MaxErrorRateConstraint",
40
+ "MaxErrorsConstraint",
41
+ "MaxGlobalErrorRateConstraint",
42
+ "MaxNumberConstraint",
43
+ "OverSaturationConstraint",
44
+ "OverSaturationConstraintInitializer",
45
+ "PydanticConstraintInitializer",
46
+ "RequestsExhaustedConstraint",
47
+ "SerializableConstraintInitializer",
48
+ "UnserializableConstraintInitializer",
49
+ ]
@@ -0,0 +1,325 @@
1
+ """
2
+ Core constraint system protocols and base classes.
3
+
4
+ Defines the fundamental protocols and base classes that form the foundation of the
5
+ constraint system. Constraints control scheduler behavior by evaluating scheduler
6
+ state and individual requests to determine whether processing should continue or
7
+ stop based on predefined limits. The constraint system enables sophisticated
8
+ benchmark stopping criteria through composable constraint types with support for
9
+ serialization, validation, and dynamic instantiation.
10
+
11
+ The module provides:
12
+ - Protocols defining the constraint interface contract
13
+ (Constraint, ConstraintInitializer)
14
+ - Base classes for Pydantic-based constraint initializers with serialization support
15
+ - Placeholder classes for handling unserializable constraint states
16
+
17
+ Example:
18
+ ::
19
+ from guidellm.scheduler.constraints import (
20
+ Constraint,
21
+ PydanticConstraintInitializer,
22
+ )
23
+
24
+ class MyConstraint(PydanticConstraintInitializer):
25
+ type_: str = "my_constraint"
26
+
27
+ def create_constraint(self) -> Constraint:
28
+ def evaluate(state, request):
29
+ return SchedulerUpdateAction(request_queuing="continue")
30
+ return evaluate
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ from abc import ABC, abstractmethod
36
+ from typing import Any, Literal, Protocol, runtime_checkable
37
+
38
+ from pydantic import Field
39
+
40
+ from guidellm.scheduler.schemas import SchedulerState, SchedulerUpdateAction
41
+ from guidellm.schemas import RequestInfo, StandardBaseModel
42
+ from guidellm.utils import InfoMixin
43
+
44
+ __all__ = [
45
+ "Constraint",
46
+ "ConstraintInitializer",
47
+ "PydanticConstraintInitializer",
48
+ "SerializableConstraintInitializer",
49
+ "UnserializableConstraintInitializer",
50
+ ]
51
+
52
+
53
+ @runtime_checkable
54
+ class Constraint(Protocol):
55
+ """
56
+ Protocol for constraint evaluation functions that control scheduler behavior.
57
+
58
+ Defines the interface that all constraint implementations must follow. Constraints
59
+ are callable objects that evaluate scheduler state and request information to
60
+ determine whether processing should continue or stop. The protocol enables type
61
+ checking and runtime validation of constraint implementations while allowing
62
+ flexible implementation approaches (functions, classes, closures).
63
+
64
+ Example:
65
+ ::
66
+ def my_constraint(
67
+ state: SchedulerState, request: RequestInfo
68
+ ) -> SchedulerUpdateAction:
69
+ if state.processing_requests > 100:
70
+ return SchedulerUpdateAction(request_queuing="stop")
71
+ return SchedulerUpdateAction(request_queuing="continue")
72
+ """
73
+
74
+ def __call__(
75
+ self, state: SchedulerState, request: RequestInfo
76
+ ) -> SchedulerUpdateAction:
77
+ """
78
+ Evaluate constraint against scheduler state and request information.
79
+
80
+ :param state: Current scheduler state with metrics and timing information
81
+ :param request: Individual request information and metadata
82
+ :return: Action indicating whether to continue or stop scheduler operations
83
+ """
84
+
85
+
86
+ @runtime_checkable
87
+ class ConstraintInitializer(Protocol):
88
+ """
89
+ Protocol for constraint initializer factory functions that create constraints.
90
+
91
+ Defines the interface for factory objects that create constraint instances from
92
+ configuration parameters. Constraint initializers enable dynamic constraint
93
+ creation and configuration, supporting both simple boolean flags and complex
94
+ parameter dictionaries. The protocol allows type checking while maintaining
95
+ flexibility for different initialization patterns.
96
+
97
+ Example:
98
+ ::
99
+ class MaxRequestsInitializer:
100
+ def __init__(self, max_requests: int):
101
+ self.max_requests = max_requests
102
+
103
+ def create_constraint(self) -> Constraint:
104
+ def evaluate(state, request):
105
+ if state.total_requests >= self.max_requests:
106
+ return SchedulerUpdateAction(request_queuing="stop")
107
+ return SchedulerUpdateAction(request_queuing="continue")
108
+ return evaluate
109
+ """
110
+
111
+ def create_constraint(self, **kwargs) -> Constraint:
112
+ """
113
+ Create a constraint instance from configuration parameters.
114
+
115
+ :param kwargs: Configuration parameters for constraint creation
116
+ :return: Configured constraint evaluation function
117
+ """
118
+
119
+
120
+ @runtime_checkable
121
+ class SerializableConstraintInitializer(Protocol):
122
+ """
123
+ Protocol for serializable constraint initializers supporting persistence.
124
+
125
+ Extends ConstraintInitializer with serialization capabilities, enabling constraint
126
+ configurations to be saved, loaded, and transmitted. Serializable initializers
127
+ support validation, model-based configuration, and dictionary-based serialization
128
+ for integration with configuration systems and persistence layers.
129
+
130
+ Example:
131
+ ::
132
+ class SerializableInitializer:
133
+ @classmethod
134
+ def validated_kwargs(cls, **kwargs) -> dict[str, Any]:
135
+ return {"max_requests": kwargs.get("max_requests", 100)}
136
+
137
+ @classmethod
138
+ def model_validate(cls, data: dict) -> ConstraintInitializer:
139
+ return cls(**cls.validated_kwargs(**data))
140
+
141
+ def model_dump(self) -> dict[str, Any]:
142
+ return {"type_": "max_requests", "max_requests": self.max_requests}
143
+
144
+ def create_constraint(self) -> Constraint:
145
+ # ... create constraint
146
+ """
147
+
148
+ @classmethod
149
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
150
+ """
151
+ Validate and process arguments for constraint creation.
152
+
153
+ :param args: Positional arguments for constraint configuration
154
+ :param kwargs: Keyword arguments for constraint configuration
155
+ :return: Validated parameter dictionary for constraint creation
156
+ """
157
+
158
+ @classmethod
159
+ def model_validate(cls, **kwargs) -> ConstraintInitializer:
160
+ """
161
+ Create validated constraint initializer from configuration.
162
+
163
+ :param kwargs: Configuration dictionary for initializer creation
164
+ :return: Validated constraint initializer instance
165
+ """
166
+
167
+ def model_dump(self) -> dict[str, Any]:
168
+ """
169
+ Serialize constraint initializer to dictionary format.
170
+
171
+ :return: Dictionary representation of constraint initializer
172
+ """
173
+
174
+ def create_constraint(self, **kwargs) -> Constraint:
175
+ """
176
+ Create constraint instance from this initializer.
177
+
178
+ :param kwargs: Additional configuration parameters
179
+ :return: Configured constraint evaluation function
180
+ """
181
+
182
+
183
+ class PydanticConstraintInitializer(StandardBaseModel, ABC, InfoMixin):
184
+ """
185
+ Abstract base for Pydantic-based constraint initializers.
186
+
187
+ Provides standardized serialization, validation, and metadata handling for
188
+ constraint initializers using Pydantic models. Subclasses implement specific
189
+ constraint creation logic while inheriting validation and persistence support.
190
+ Integrates with the constraint factory system for dynamic instantiation and
191
+ configuration management.
192
+
193
+ Example:
194
+ ::
195
+ @ConstraintsInitializerFactory.register("max_duration")
196
+ class MaxDurationConstraintInitializer(PydanticConstraintInitializer):
197
+ type_: str = "max_duration"
198
+ max_seconds: float = Field(description="Maximum duration in seconds")
199
+
200
+ def create_constraint(self) -> Constraint:
201
+ def evaluate(state, request):
202
+ if time.time() - state.start_time > self.max_seconds:
203
+ return SchedulerUpdateAction(request_queuing="stop")
204
+ return SchedulerUpdateAction(request_queuing="continue")
205
+ return evaluate
206
+
207
+ :cvar type_: Type identifier for the constraint initializer
208
+ """
209
+
210
+ type_: str = Field(description="Type identifier for the constraint initializer")
211
+
212
+ @property
213
+ def info(self) -> dict[str, Any]:
214
+ """
215
+ Extract serializable information from this constraint initializer.
216
+
217
+ :return: Dictionary containing constraint configuration and metadata
218
+ """
219
+ return self.model_dump()
220
+
221
+ @classmethod
222
+ @abstractmethod
223
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
224
+ """
225
+ Validate and process arguments for constraint creation.
226
+
227
+ Must be implemented by subclasses to handle their specific parameter patterns
228
+ and validation requirements. This method processes raw input (booleans, dicts,
229
+ etc.) and converts them into validated parameter dictionaries suitable for
230
+ constraint initialization.
231
+
232
+ :param args: Positional arguments passed to the constraint
233
+ :param kwargs: Keyword arguments passed to the constraint
234
+ :return: Validated dictionary of parameters for constraint creation
235
+ :raises NotImplementedError: Must be implemented by subclasses
236
+ """
237
+ ...
238
+
239
+ @abstractmethod
240
+ def create_constraint(self, **kwargs) -> Constraint:
241
+ """
242
+ Create a constraint instance.
243
+
244
+ Must be implemented by subclasses to return their specific constraint type
245
+ with appropriate configuration and validation. The returned constraint should
246
+ be ready for evaluation against scheduler state and requests.
247
+
248
+ :param kwargs: Additional keyword arguments (usually unused)
249
+ :return: Configured constraint instance
250
+ :raises NotImplementedError: Must be implemented by subclasses
251
+ """
252
+ ...
253
+
254
+
255
+ class UnserializableConstraintInitializer(PydanticConstraintInitializer):
256
+ """
257
+ Placeholder for constraints that cannot be serialized or executed.
258
+
259
+ Represents constraint initializers that failed serialization or contain
260
+ non-serializable components. Cannot be executed and raises errors when
261
+ invoked to prevent runtime failures from invalid constraint state. Used
262
+ by the factory system to preserve constraint information even when full
263
+ serialization is not possible.
264
+
265
+ Example:
266
+ ::
267
+ # Created automatically by factory when serialization fails
268
+ unserializable = UnserializableConstraintInitializer(
269
+ orig_info={"type_": "custom", "data": non_serializable_object}
270
+ )
271
+
272
+ # Attempting to use it raises RuntimeError
273
+ constraint = unserializable.create_constraint() # Raises RuntimeError
274
+
275
+ :cvar type_: Always "unserializable" to identify placeholder constraints
276
+ :cvar orig_info: Original constraint information before serialization failure
277
+ """
278
+
279
+ type_: Literal["unserializable"] = "unserializable" # type: ignore[assignment]
280
+ orig_info: dict[str, Any] = Field(
281
+ default_factory=dict,
282
+ description="Original constraint information before serialization failure",
283
+ )
284
+
285
+ @classmethod
286
+ def validated_kwargs(
287
+ cls, orig_info: dict[str, Any] | None = None, **_kwargs
288
+ ) -> dict[str, Any]:
289
+ """
290
+ Validate arguments for unserializable constraint creation.
291
+
292
+ :param orig_info: Original constraint information before serialization failure
293
+ :param kwargs: Additional arguments (ignored)
294
+ :return: Validated parameters for unserializable constraint creation
295
+ """
296
+ return {"orig_info": orig_info or {}}
297
+
298
+ def create_constraint(self, **_kwargs) -> Constraint:
299
+ """
300
+ Raise error for unserializable constraint creation attempt.
301
+
302
+ :param kwargs: Additional keyword arguments (unused)
303
+ :raises RuntimeError: Always raised since unserializable constraints
304
+ cannot be executed
305
+ """
306
+ raise RuntimeError(
307
+ "Cannot create constraint from unserializable constraint instance. "
308
+ "This constraint cannot be serialized and therefore cannot be executed."
309
+ )
310
+
311
+ def __call__(
312
+ self, state: SchedulerState, request: RequestInfo
313
+ ) -> SchedulerUpdateAction:
314
+ """
315
+ Raise error since unserializable constraints cannot be invoked.
316
+
317
+ :param state: Current scheduler state (unused)
318
+ :param request: Individual request information (unused)
319
+ :raises RuntimeError: Always raised for unserializable constraints
320
+ """
321
+ _ = (state, request) # Unused parameters
322
+ raise RuntimeError(
323
+ "Cannot invoke unserializable constraint instance. "
324
+ "This constraint was not properly serialized and cannot be executed."
325
+ )