guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,409 @@
1
+ from collections.abc import Sequence
2
+ from typing import Literal, Optional, Union
3
+
4
+ import numpy as np
5
+ from pydantic import Field, computed_field
6
+
7
+ from guidellm.config import settings
8
+ from guidellm.objects import StandardBaseModel
9
+ from guidellm.scheduler import (
10
+ AsyncConstantStrategy,
11
+ AsyncPoissonStrategy,
12
+ ConcurrentStrategy,
13
+ SchedulingStrategy,
14
+ StrategyType,
15
+ SynchronousStrategy,
16
+ ThroughputStrategy,
17
+ )
18
+
19
+ __all__ = [
20
+ "ProfileType",
21
+ "Profile",
22
+ "SynchronousProfile",
23
+ "ConcurrentProfile",
24
+ "ThroughputProfile",
25
+ "AsyncProfile",
26
+ "SweepProfile",
27
+ "create_profile",
28
+ ]
29
+
30
+ ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
31
+
32
+
33
+ class Profile(StandardBaseModel):
34
+ type_: Literal["profile"] = Field(
35
+ description="The type of benchmarking profile to use.",
36
+ )
37
+ completed_strategies: int = Field(
38
+ default=0,
39
+ description="The number of scheduling strategies generated so far.",
40
+ )
41
+ measured_rates: list[float] = Field(
42
+ default_factory=list,
43
+ description=("The average rates measured for the strategies that have run."),
44
+ )
45
+ measured_concurrencies: list[float] = Field(
46
+ default_factory=list,
47
+ description=(
48
+ "The average concurrency measured for the strategies that have run."
49
+ ),
50
+ )
51
+
52
+ def completed_strategy(self, average_rate: float, average_concurrency: float):
53
+ self.measured_rates.append(average_rate)
54
+ self.measured_concurrencies.append(average_concurrency)
55
+ self.completed_strategies += 1
56
+
57
+ @computed_field # type: ignore[misc]
58
+ @property
59
+ def strategy_types(self) -> list[StrategyType]:
60
+ return []
61
+
62
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
63
+ return None
64
+
65
+
66
+ class SynchronousProfile(Profile):
67
+ type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment]
68
+
69
+ @property
70
+ def strategy_types(self) -> list[StrategyType]:
71
+ return [self.type_]
72
+
73
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
74
+ if self.completed_strategies >= 1:
75
+ return None
76
+
77
+ return SynchronousStrategy()
78
+
79
+ @staticmethod
80
+ def from_standard_args(
81
+ rate_type: Union[StrategyType, ProfileType],
82
+ rate: Optional[Union[float, Sequence[float]]],
83
+ **kwargs,
84
+ ) -> "SynchronousProfile":
85
+ if rate_type != "synchronous":
86
+ raise ValueError("Rate type must be 'synchronous' for synchronous profile.")
87
+
88
+ if rate is not None:
89
+ raise ValueError(
90
+ "Rate does not apply to synchronous profile, it must be set to None."
91
+ )
92
+
93
+ if kwargs:
94
+ raise ValueError(
95
+ "No additional arguments are allowed for synchronous profile."
96
+ )
97
+
98
+ return SynchronousProfile()
99
+
100
+
101
+ class ConcurrentProfile(Profile):
102
+ type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment]
103
+ streams: Union[int, Sequence[int]] = Field(
104
+ description="The number of concurrent streams to use.",
105
+ )
106
+
107
+ @property
108
+ def strategy_types(self) -> list[StrategyType]:
109
+ num_strategies = len(self.streams) if isinstance(self.streams, Sequence) else 1
110
+
111
+ return [self.type_] * num_strategies
112
+
113
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
114
+ streams = self.streams if isinstance(self.streams, Sequence) else [self.streams]
115
+
116
+ if self.completed_strategies >= len(streams):
117
+ return None
118
+
119
+ return ConcurrentStrategy(
120
+ streams=streams[self.completed_strategies],
121
+ )
122
+
123
+ @staticmethod
124
+ def from_standard_args(
125
+ rate_type: Union[StrategyType, ProfileType],
126
+ rate: Optional[Union[float, Sequence[float]]],
127
+ **kwargs,
128
+ ) -> "ConcurrentProfile":
129
+ if rate_type != "concurrent":
130
+ raise ValueError("Rate type must be 'concurrent' for concurrent profile.")
131
+
132
+ if not rate:
133
+ raise ValueError("Rate (streams) must be provided for concurrent profile.")
134
+
135
+ if not isinstance(rate, Sequence):
136
+ rate = [rate]
137
+
138
+ if not all(stream.is_integer() and stream > 0 for stream in rate):
139
+ raise ValueError(
140
+ f"All rate values (streams) must be positive integers, received {rate}"
141
+ )
142
+
143
+ if kwargs:
144
+ raise ValueError(
145
+ "No additional arguments are allowed for concurrent profile."
146
+ )
147
+
148
+ return ConcurrentProfile(streams=[int(rat) for rat in rate])
149
+
150
+
151
+ class ThroughputProfile(Profile):
152
+ type_: Literal["throughput"] = "throughput" # type: ignore[assignment]
153
+ max_concurrency: Optional[int] = Field(
154
+ default=None,
155
+ description="The maximum number of concurrent requests that can be scheduled.",
156
+ )
157
+
158
+ @property
159
+ def strategy_types(self) -> list[StrategyType]:
160
+ return [self.type_]
161
+
162
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
163
+ if self.completed_strategies >= 1:
164
+ return None
165
+
166
+ return ThroughputStrategy(
167
+ max_concurrency=self.max_concurrency,
168
+ )
169
+
170
+ @staticmethod
171
+ def from_standard_args(
172
+ rate_type: Union[StrategyType, ProfileType],
173
+ rate: Optional[Union[float, Sequence[float]]],
174
+ **kwargs,
175
+ ) -> "ThroughputProfile":
176
+ if rate_type != "throughput":
177
+ raise ValueError("Rate type must be 'throughput' for throughput profile.")
178
+
179
+ if rate is not None:
180
+ raise ValueError(
181
+ "Rate does not apply to throughput profile, it must be set to None."
182
+ )
183
+
184
+ return ThroughputProfile(**kwargs)
185
+
186
+
187
+ class AsyncProfile(ThroughputProfile):
188
+ type_: Literal["async"] = "async" # type: ignore[assignment]
189
+ strategy_type: Literal["constant", "poisson"] = Field(
190
+ description="The type of asynchronous strategy to use.",
191
+ )
192
+ rate: Union[float, Sequence[float]] = Field(
193
+ description="The rate of requests per second to use.",
194
+ )
195
+ initial_burst: bool = Field(
196
+ default=True,
197
+ description=(
198
+ "True to send an initial burst of requests (math.floor(self.rate)) "
199
+ "to reach target rate. False to not send an initial burst."
200
+ ),
201
+ )
202
+ random_seed: int = Field(
203
+ default=42,
204
+ description=(
205
+ "The random seed to use for the asynchronous strategy. "
206
+ "This is used to generate random numbers for the Poisson strategy."
207
+ ),
208
+ )
209
+
210
+ @property
211
+ def strategy_types(self) -> list[StrategyType]:
212
+ num_strategies = len(self.rate) if isinstance(self.rate, Sequence) else 1
213
+
214
+ return [self.strategy_type] * num_strategies
215
+
216
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
217
+ rate = self.rate if isinstance(self.rate, Sequence) else [self.rate]
218
+
219
+ if self.completed_strategies >= len(rate):
220
+ return None
221
+
222
+ if self.strategy_type == "constant":
223
+ return AsyncConstantStrategy(
224
+ rate=rate[self.completed_strategies],
225
+ initial_burst=self.initial_burst,
226
+ max_concurrency=self.max_concurrency,
227
+ )
228
+ elif self.strategy_type == "poisson":
229
+ return AsyncPoissonStrategy(
230
+ rate=rate[self.completed_strategies],
231
+ initial_burst=self.initial_burst,
232
+ max_concurrency=self.max_concurrency,
233
+ random_seed=self.random_seed,
234
+ )
235
+ else:
236
+ raise ValueError(f"Invalid strategy type: {self.strategy_type}")
237
+
238
+ @staticmethod
239
+ def from_standard_args( # type: ignore[override]
240
+ rate_type: Union[StrategyType, ProfileType],
241
+ rate: Optional[Union[float, Sequence[float]]],
242
+ random_seed: int,
243
+ **kwargs,
244
+ ) -> "AsyncProfile":
245
+ if rate_type not in ("async", "constant", "poisson"):
246
+ raise ValueError(
247
+ "Rate type must be in ('async', 'constant', 'poisson') "
248
+ f"for async profile. Received: {rate_type}"
249
+ )
250
+
251
+ if not rate:
252
+ raise ValueError("Rate must be provided for async profile.")
253
+
254
+ if not isinstance(rate, Sequence):
255
+ rate = [rate]
256
+
257
+ if not all(isinstance(r, (float, int)) and r > 0 for r in rate):
258
+ raise ValueError(
259
+ f"All rate values must be positive numbers, received {rate}"
260
+ )
261
+
262
+ if rate_type == "async":
263
+ rate_type = "constant" # default to constant if not specified
264
+
265
+ return AsyncProfile(
266
+ strategy_type=rate_type, # type: ignore[arg-type]
267
+ rate=rate,
268
+ random_seed=random_seed,
269
+ **kwargs,
270
+ )
271
+
272
+
273
+ class SweepProfile(AsyncProfile):
274
+ type_: Literal["sweep"] = "sweep" # type: ignore[assignment]
275
+ sweep_size: int = Field(
276
+ description="The number of strategies to generate for the sweep.",
277
+ )
278
+ rate: float = -1
279
+ rate_type: Literal["constant", "poisson"] = "constant"
280
+
281
+ @property
282
+ def strategy_types(self) -> list[StrategyType]:
283
+ return (
284
+ ["synchronous"] + ["throughput"] + [self.rate_type] * (self.sweep_size - 2) # type: ignore[return-value]
285
+ )
286
+
287
+ def next_strategy(self) -> Optional[SchedulingStrategy]:
288
+ if self.completed_strategies >= self.sweep_size:
289
+ return None
290
+
291
+ if self.completed_strategies == 0:
292
+ return SynchronousStrategy()
293
+
294
+ if self.completed_strategies == 1:
295
+ return ThroughputStrategy(
296
+ max_concurrency=self.max_concurrency,
297
+ )
298
+
299
+ min_rate = self.measured_rates[0]
300
+ max_rate = self.measured_rates[1]
301
+ rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:]
302
+
303
+ if self.rate_type == "constant":
304
+ return AsyncConstantStrategy(
305
+ rate=rates[self.completed_strategies - 2],
306
+ initial_burst=self.initial_burst,
307
+ max_concurrency=self.max_concurrency,
308
+ )
309
+ elif self.rate_type == "poisson":
310
+ return AsyncPoissonStrategy(
311
+ rate=rates[self.completed_strategies - 2],
312
+ initial_burst=self.initial_burst,
313
+ max_concurrency=self.max_concurrency,
314
+ )
315
+ else:
316
+ raise ValueError(f"Invalid strategy type: {self.rate_type}")
317
+
318
+ @staticmethod
319
+ def from_standard_args( # type: ignore[override]
320
+ rate_type: Union[StrategyType, ProfileType],
321
+ rate: Optional[Union[float, Sequence[float]]],
322
+ random_seed: int,
323
+ **kwargs,
324
+ ) -> "SweepProfile":
325
+ if rate_type != "sweep":
326
+ raise ValueError("Rate type must be 'sweep' for sweep profile.")
327
+
328
+ if "sweep_size" in kwargs:
329
+ raise ValueError("Sweep size must not be provided, use rate instead.")
330
+
331
+ if isinstance(rate, Sequence):
332
+ if len(rate) != 1:
333
+ raise ValueError(
334
+ "Rate must be a single value for sweep profile, received "
335
+ f"{len(rate)} values."
336
+ )
337
+ rate = rate[0]
338
+
339
+ if not rate:
340
+ rate = settings.default_sweep_number
341
+
342
+ if not rate:
343
+ raise ValueError(
344
+ "Rate (sweep_size) must be provided for concurrent profile."
345
+ )
346
+
347
+ if (
348
+ not isinstance(rate, (int, float))
349
+ or (isinstance(rate, float) and not rate.is_integer())
350
+ or rate <= 1
351
+ ):
352
+ raise ValueError(
353
+ f"Rate (sweep_size) must be a positive integer > 1, received {rate} "
354
+ f"with type {type(rate)}"
355
+ )
356
+
357
+ if not kwargs:
358
+ kwargs = {}
359
+
360
+ if "strategy_type" not in kwargs:
361
+ kwargs["strategy_type"] = "constant"
362
+
363
+ return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
364
+
365
+
366
+ def create_profile(
367
+ rate_type: Union[StrategyType, ProfileType],
368
+ rate: Optional[Union[float, Sequence[float]]],
369
+ random_seed: int = 42,
370
+ **kwargs,
371
+ ) -> "Profile":
372
+ if rate_type == "synchronous":
373
+ return SynchronousProfile.from_standard_args(
374
+ rate_type=rate_type,
375
+ rate=rate,
376
+ **kwargs,
377
+ )
378
+
379
+ if rate_type == "concurrent":
380
+ return ConcurrentProfile.from_standard_args(
381
+ rate_type=rate_type,
382
+ rate=rate,
383
+ **kwargs,
384
+ )
385
+
386
+ if rate_type == "throughput":
387
+ return ThroughputProfile.from_standard_args(
388
+ rate_type=rate_type,
389
+ rate=rate,
390
+ **kwargs,
391
+ )
392
+
393
+ if rate_type in ("async", "constant", "poisson"):
394
+ return AsyncProfile.from_standard_args(
395
+ rate_type=rate_type,
396
+ rate=rate,
397
+ random_seed=random_seed,
398
+ **kwargs,
399
+ )
400
+
401
+ if rate_type == "sweep":
402
+ return SweepProfile.from_standard_args(
403
+ rate_type=rate_type,
404
+ rate=rate,
405
+ random_seed=random_seed,
406
+ **kwargs,
407
+ )
408
+
409
+ raise ValueError(f"Invalid profile type: {rate_type}")