guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,312 @@
1
+ """
2
+ Request-based constraint implementations.
3
+
4
+ Provides constraint types for limiting benchmark execution based on request counts
5
+ and time duration. These constraints monitor request creation, processing, and
6
+ elapsed time to determine when to stop benchmark execution.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from typing import Any, Literal, cast
13
+
14
+ from pydantic import Field, field_validator
15
+
16
+ from guidellm.scheduler.constraints.constraint import (
17
+ Constraint,
18
+ PydanticConstraintInitializer,
19
+ )
20
+ from guidellm.scheduler.constraints.factory import ConstraintsInitializerFactory
21
+ from guidellm.scheduler.schemas import (
22
+ SchedulerProgress,
23
+ SchedulerState,
24
+ SchedulerUpdateAction,
25
+ )
26
+ from guidellm.schemas import RequestInfo, StandardBaseModel
27
+ from guidellm.utils import InfoMixin
28
+
29
+ __all__ = [
30
+ "MaxDurationConstraint",
31
+ "MaxNumberConstraint",
32
+ "RequestsExhaustedConstraint",
33
+ ]
34
+
35
+
36
+ @ConstraintsInitializerFactory.register( # type: ignore[arg-type]
37
+ ["max_number", "max_num", "max_requests", "max_req"]
38
+ )
39
+ class MaxNumberConstraint(PydanticConstraintInitializer):
40
+ """
41
+ Constraint that limits execution based on maximum request counts.
42
+
43
+ Stops request queuing when created requests reach the limit and stops local
44
+ request processing when processed requests reach the limit. Provides progress
45
+ tracking based on remaining requests and completion fraction.
46
+ """
47
+
48
+ type_: Literal["max_number"] = "max_number" # type: ignore[assignment]
49
+ max_num: int | float | list[int | float] = Field(
50
+ description="Maximum number of requests allowed before triggering constraint",
51
+ )
52
+ current_index: int = Field(
53
+ default=-1, description="Current index for list-based max_num values"
54
+ )
55
+
56
+ @classmethod
57
+ def validated_kwargs(
58
+ cls, max_num: int | float | list[int | float], **kwargs
59
+ ) -> dict[str, Any]:
60
+ """
61
+ Validate and process arguments for MaxNumberConstraint creation.
62
+
63
+ :param max_num: Maximum number of requests to allow
64
+ :param kwargs: Supports max_num, max_number, max_requests, max_req,
65
+ and optional type_
66
+ :return: Validated dictionary with max_num and type_ fields
67
+ """
68
+ aliases = ["max_number", "max_num", "max_requests", "max_req"]
69
+ for alias in aliases:
70
+ if max_num is None:
71
+ max_num = kwargs.get(alias)
72
+
73
+ return {"max_num": max_num, "current_index": kwargs.get("current_index", -1)}
74
+
75
+ def create_constraint(self, **_kwargs) -> Constraint:
76
+ """
77
+ Return self as the constraint instance.
78
+
79
+ :param kwargs: Additional keyword arguments (unused)
80
+ :return: Self instance as the constraint
81
+ """
82
+ self.current_index += 1
83
+
84
+ return cast("Constraint", self.model_copy())
85
+
86
+ def __call__(
87
+ self, state: SchedulerState, request_info: RequestInfo
88
+ ) -> SchedulerUpdateAction:
89
+ """
90
+ Evaluate constraint against current scheduler state and request count.
91
+
92
+ :param state: Current scheduler state with request counts
93
+ :param request_info: Individual request information (unused)
94
+ :return: Action indicating whether to continue or stop operations
95
+ """
96
+ _ = request_info # Unused parameters
97
+ current_index = max(0, self.current_index)
98
+ max_num = (
99
+ self.max_num
100
+ if isinstance(self.max_num, int | float)
101
+ else self.max_num[min(current_index, len(self.max_num) - 1)]
102
+ )
103
+
104
+ create_exceeded = state.created_requests >= max_num
105
+ processed_exceeded = state.processed_requests >= max_num
106
+ remaining_requests = min(max(0, max_num - state.processed_requests), max_num)
107
+ stop_time = (
108
+ None if remaining_requests > 0 else request_info.completed_at or time.time()
109
+ )
110
+
111
+ return SchedulerUpdateAction(
112
+ request_queuing="stop" if create_exceeded else "continue",
113
+ request_processing="stop_local" if processed_exceeded else "continue",
114
+ metadata={
115
+ "max_number": max_num,
116
+ "create_exceeded": create_exceeded,
117
+ "processed_exceeded": processed_exceeded,
118
+ "created_requests": state.created_requests,
119
+ "processed_requests": state.processed_requests,
120
+ "remaining_requests": remaining_requests,
121
+ "stop_time": stop_time,
122
+ },
123
+ progress=SchedulerProgress(
124
+ remaining_requests=remaining_requests,
125
+ total_requests=max_num,
126
+ stop_time=stop_time,
127
+ ),
128
+ )
129
+
130
+ @field_validator("max_num")
131
+ @classmethod
132
+ def _validate_max_num(
133
+ cls, value: int | float | list[int | float]
134
+ ) -> int | float | list[int | float]:
135
+ if not isinstance(value, list):
136
+ value = [value]
137
+ for val in value:
138
+ if not val:
139
+ raise ValueError(
140
+ f"max_num must be set and truthful, received {value} ({val} failed)"
141
+ )
142
+ if not isinstance(val, int | float) or val <= 0:
143
+ raise ValueError(
144
+ f"max_num must be a positive num, received {value} ({val} failed)"
145
+ )
146
+
147
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
148
+
149
+
150
+ @ConstraintsInitializerFactory.register(
151
+ ["max_duration", "max_dur", "max_sec", "max_seconds", "max_min", "max_minutes"]
152
+ )
153
+ class MaxDurationConstraint(PydanticConstraintInitializer):
154
+ """
155
+ Constraint that limits execution based on maximum time duration.
156
+
157
+ Stops both request queuing and processing when the elapsed time since scheduler
158
+ start exceeds the maximum duration. Provides progress tracking based on
159
+ remaining time and completion fraction.
160
+ """
161
+
162
+ type_: Literal["max_duration"] = "max_duration" # type: ignore[assignment]
163
+ max_duration: int | float | list[int | float] = Field(
164
+ description="Maximum duration in seconds before triggering constraint"
165
+ )
166
+ current_index: int = Field(default=-1, description="Current index in duration list")
167
+
168
+ @classmethod
169
+ def validated_kwargs(
170
+ cls, max_duration: int | float | list[int | float] | None = None, **kwargs
171
+ ) -> dict[str, Any]:
172
+ """
173
+ Validate and process arguments for MaxDurationConstraint creation.
174
+
175
+ :param max_duration: Maximum duration in seconds
176
+ :param kwargs: Supports max_duration, max_dur, max_sec, max_seconds,
177
+ max_min, max_minutes, and optional type_
178
+ :return: Validated dictionary with max_duration and type_ fields
179
+ """
180
+ seconds_aliases = ["max_dur", "max_sec", "max_seconds"]
181
+ for alias in seconds_aliases:
182
+ if max_duration is None:
183
+ max_duration = kwargs.get(alias)
184
+ minutes_aliases = ["max_min", "max_minutes"]
185
+ for alias in minutes_aliases:
186
+ minutes = kwargs.get(alias)
187
+ if minutes is not None and max_duration is None:
188
+ max_duration = minutes * 60
189
+
190
+ return {
191
+ "max_duration": max_duration,
192
+ "current_index": kwargs.get("current_index", -1),
193
+ }
194
+
195
+ def create_constraint(self, **_kwargs) -> Constraint:
196
+ """
197
+ Return self as the constraint instance.
198
+
199
+ :param kwargs: Additional keyword arguments (unused)
200
+ :return: Self instance as the constraint
201
+ """
202
+ self.current_index += 1
203
+
204
+ return cast("Constraint", self.model_copy())
205
+
206
+ def __call__(
207
+ self, state: SchedulerState, request_info: RequestInfo
208
+ ) -> SchedulerUpdateAction:
209
+ """
210
+ Evaluate constraint against current scheduler state and elapsed time.
211
+
212
+ :param state: Current scheduler state with start time
213
+ :param request_info: Individual request information (unused)
214
+ :return: Action indicating whether to continue or stop operations
215
+ """
216
+ _ = request_info # Unused parameters
217
+ current_index = max(0, self.current_index)
218
+ max_duration = (
219
+ self.max_duration
220
+ if isinstance(self.max_duration, int | float)
221
+ else self.max_duration[min(current_index, len(self.max_duration) - 1)]
222
+ )
223
+
224
+ start_time = state.start_requests_time or state.start_time
225
+ current_time = time.time()
226
+ elapsed = current_time - start_time
227
+ duration_exceeded = elapsed >= max_duration
228
+ remaining_duration = min(max(0.0, max_duration - elapsed), max_duration)
229
+ stop_time = None if not duration_exceeded else start_time + max_duration
230
+
231
+ return SchedulerUpdateAction(
232
+ request_queuing="stop" if duration_exceeded else "continue",
233
+ request_processing="stop_local" if duration_exceeded else "continue",
234
+ metadata={
235
+ "max_duration": max_duration,
236
+ "elapsed_time": elapsed,
237
+ "duration_exceeded": duration_exceeded,
238
+ "start_time": start_time,
239
+ "current_time": current_time,
240
+ "stop_time": stop_time,
241
+ },
242
+ progress=SchedulerProgress(
243
+ remaining_duration=remaining_duration,
244
+ total_duration=max_duration,
245
+ stop_time=stop_time,
246
+ ),
247
+ )
248
+
249
+ @field_validator("max_duration")
250
+ @classmethod
251
+ def _validate_max_duration(
252
+ cls, value: int | float | list[int | float]
253
+ ) -> int | float | list[int | float]:
254
+ if not isinstance(value, list):
255
+ value = [value]
256
+ for val in value:
257
+ if not val:
258
+ raise ValueError(
259
+ "max_duration must be set and truthful, "
260
+ f"received {value} ({val} failed)"
261
+ )
262
+ if not isinstance(val, int | float) or val <= 0:
263
+ raise ValueError(
264
+ "max_duration must be a positive num,"
265
+ f"received {value} ({val} failed)"
266
+ )
267
+
268
+ return value[0] if isinstance(value, list) and len(value) == 1 else value
269
+
270
+
271
+ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
272
+ type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
273
+ num_requests: int
274
+
275
+ @property
276
+ def info(self) -> dict[str, Any]:
277
+ """
278
+ Extract serializable information from this constraint initializer.
279
+
280
+ :return: Dictionary containing constraint configuration and metadata
281
+ """
282
+ return self.model_dump()
283
+
284
+ def __call__(
285
+ self, state: SchedulerState, request: RequestInfo
286
+ ) -> SchedulerUpdateAction:
287
+ _ = request # Unused parameter
288
+ create_exceeded = state.created_requests >= self.num_requests
289
+ processed_exceeded = state.processed_requests >= self.num_requests
290
+ remaining_requests = max(0, self.num_requests - state.processed_requests)
291
+ stop_time = (
292
+ None if remaining_requests > 0 else request.completed_at or time.time()
293
+ )
294
+
295
+ return SchedulerUpdateAction(
296
+ request_queuing="stop" if create_exceeded else "continue",
297
+ request_processing="stop_local" if processed_exceeded else "continue",
298
+ metadata={
299
+ "num_requests": self.num_requests,
300
+ "create_exceeded": create_exceeded,
301
+ "processed_exceeded": processed_exceeded,
302
+ "created_requests": state.created_requests,
303
+ "processed_requests": state.processed_requests,
304
+ "remaining_requests": remaining_requests,
305
+ "stop_time": stop_time,
306
+ },
307
+ progress=SchedulerProgress(
308
+ remaining_requests=remaining_requests,
309
+ total_requests=self.num_requests,
310
+ stop_time=stop_time,
311
+ ),
312
+ )