guidellm 0.1.0__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0.dev0.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0.dev0.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,493 @@
1
+ import math
2
+ import os
3
+ import random
4
+ import time
5
+ from collections.abc import Generator
6
+ from typing import (
7
+ Literal,
8
+ Optional,
9
+ Union,
10
+ )
11
+
12
+ from pydantic import Field
13
+
14
+ from guidellm.config import settings
15
+ from guidellm.objects import StandardBaseModel
16
+
17
+ __all__ = [
18
+ "StrategyType",
19
+ "SchedulingStrategy",
20
+ "SynchronousStrategy",
21
+ "ConcurrentStrategy",
22
+ "ThroughputStrategy",
23
+ "AsyncConstantStrategy",
24
+ "AsyncPoissonStrategy",
25
+ "strategy_display_str",
26
+ ]
27
+
28
+
29
+ StrategyType = Literal["synchronous", "concurrent", "throughput", "constant", "poisson"]
30
+
31
+
32
+ class SchedulingStrategy(StandardBaseModel):
33
+ """
34
+ An abstract base class for scheduling strategies.
35
+ This class defines the interface for scheduling requests and provides
36
+ a common structure for all scheduling strategies.
37
+ Subclasses should implement the `request_times` method to provide
38
+ specific scheduling behavior.
39
+
40
+ :param type_: The type of scheduling strategy to use.
41
+ This should be one of the predefined strategy types.
42
+ """
43
+
44
+ type_: Literal["strategy"] = Field(
45
+ description="The type of scheduling strategy schedule requests with.",
46
+ )
47
+
48
+ @property
49
+ def processing_mode(self) -> Literal["sync", "async"]:
50
+ """
51
+ The processing mode for the scheduling strategy, either 'sync' or 'async'.
52
+ This property determines how the worker processes are setup:
53
+ either to run synchronously with one request at a time or asynchronously.
54
+ This property should be implemented by subclasses to return
55
+ the appropriate processing mode.
56
+
57
+ :return: The processing mode for the scheduling strategy,
58
+ either 'sync' or 'async'.
59
+ """
60
+ return "async"
61
+
62
+ @property
63
+ def processes_limit(self) -> int:
64
+ """
65
+ The limit on the number of worker processes for the scheduling strategy.
66
+ It determines how many worker processes are created
67
+ for the scheduling strategy and must be implemented by subclasses.
68
+
69
+ :return: The number of processes for the scheduling strategy.
70
+ """
71
+ cpu_cores = os.cpu_count() or 1
72
+
73
+ return min(max(1, cpu_cores - 1), settings.max_worker_processes)
74
+
75
+ @property
76
+ def queued_requests_limit(self) -> Optional[int]:
77
+ """
78
+ The maximum number of queued requests for the scheduling strategy.
79
+ It determines how many requests can be queued at one time
80
+ for the scheduling strategy and must be implemented by subclasses.
81
+
82
+ :return: The maximum number of queued requests for the scheduling strategy.
83
+ """
84
+ return settings.max_concurrency
85
+
86
+ @property
87
+ def processing_requests_limit(self) -> int:
88
+ """
89
+ The maximum number of processing requests for the scheduling strategy.
90
+ It determines how many requests can be processed at one time
91
+ for the scheduling strategy and must be implemented by subclasses.
92
+
93
+ :return: The maximum number of processing requests for the scheduling strategy.
94
+ """
95
+ return settings.max_concurrency
96
+
97
+ def request_times(self) -> Generator[float, None, None]:
98
+ """
99
+ A generator that yields timestamps for when requests should be sent.
100
+ This method should be implemented by subclasses to provide specific
101
+ scheduling behavior.
102
+
103
+ :return: A generator that yields timestamps for request scheduling
104
+ or -1 for requests that should be sent immediately.
105
+ """
106
+ raise NotImplementedError("Subclasses must implement request_times() method.")
107
+
108
+
109
+ class SynchronousStrategy(SchedulingStrategy):
110
+ """
111
+ A class representing a synchronous scheduling strategy.
112
+ This strategy schedules requests synchronously, one at a time,
113
+ with the maximum rate possible.
114
+ It inherits from the `SchedulingStrategy` base class and
115
+ implements the `request_times` method to provide the specific
116
+ behavior for synchronous scheduling.
117
+
118
+ :param type_: The synchronous StrategyType to schedule requests synchronously.
119
+ """
120
+
121
+ type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment]
122
+
123
+ @property
124
+ def processing_mode(self) -> Literal["sync"]:
125
+ """
126
+ The processing mode for the scheduling strategy, either 'sync' or 'async'.
127
+ This property determines how the worker processes are setup:
128
+ either to run synchronously with one request at a time or asynchronously.
129
+
130
+ :return: 'sync' for synchronous scheduling strategy
131
+ for the single worker process.
132
+ """
133
+ return "sync"
134
+
135
+ @property
136
+ def processes_limit(self) -> int:
137
+ """
138
+ The limit on the number of worker processes for the scheduling strategy.
139
+ It determines how many worker processes are created
140
+ for the scheduling strategy and must be implemented by subclasses.
141
+
142
+ :return: 1 for the synchronous scheduling strategy to limit
143
+ the worker processes to one.
144
+ """
145
+ return 1
146
+
147
+ @property
148
+ def queued_requests_limit(self) -> int:
149
+ """
150
+ The maximum number of queued requests for the scheduling strategy.
151
+ It determines how many requests can be queued at one time
152
+ for the scheduling strategy and must be implemented by subclasses.
153
+
154
+ :return: 1 for the synchronous scheduling strategy to limit
155
+ the queued requests to one that is ready to be processed.
156
+ """
157
+ return 1
158
+
159
+ @property
160
+ def processing_requests_limit(self) -> int:
161
+ """
162
+ The maximum number of processing requests for the scheduling strategy.
163
+ It determines how many requests can be processed at one time
164
+ for the scheduling strategy and must be implemented by subclasses.
165
+
166
+ :return: 1 for the synchronous scheduling strategy to limit
167
+ the processing requests to one that is ready to be processed.
168
+ """
169
+ return 1
170
+
171
+ def request_times(self) -> Generator[float, None, None]:
172
+ """
173
+ A generator that yields time.time() so requests are sent immediately,
174
+ while scheduling them synchronously.
175
+
176
+ :return: A generator that yields time.time() for immediate request scheduling.
177
+ """
178
+ while True:
179
+ yield time.time()
180
+
181
+
182
+ class ConcurrentStrategy(SchedulingStrategy):
183
+ """
184
+ A class representing a concurrent scheduling strategy.
185
+ This strategy schedules requests concurrently with the specified
186
+ number of streams.
187
+ It inherits from the `SchedulingStrategy` base class and
188
+ implements the `request_times` method to provide the specific
189
+ behavior for concurrent scheduling.
190
+
191
+ :param type_: The concurrent StrategyType to schedule requests concurrently.
192
+ :param streams: The number of concurrent streams to use for scheduling requests.
193
+ Each stream runs synchronously with the maximum rate possible.
194
+ This must be a positive integer.
195
+ """
196
+
197
+ type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment]
198
+ streams: int = Field(
199
+ description=(
200
+ "The number of concurrent streams to use for scheduling requests. "
201
+ "Each stream runs sychronously with the maximum rate possible. "
202
+ "This must be a positive integer."
203
+ ),
204
+ gt=0,
205
+ )
206
+
207
+ @property
208
+ def processing_mode(self) -> Literal["sync"]:
209
+ """
210
+ The processing mode for the scheduling strategy, either 'sync' or 'async'.
211
+ This property determines how the worker processes are setup:
212
+ either to run synchronously with one request at a time or asynchronously.
213
+
214
+ :return: 'sync' for synchronous scheduling strategy
215
+ for the multiple worker processes equal to streams.
216
+ """
217
+ return "sync"
218
+
219
+ @property
220
+ def processes_limit(self) -> int:
221
+ """
222
+ The limit on the number of worker processes for the scheduling strategy.
223
+ It determines how many worker processes are created
224
+ for the scheduling strategy and must be implemented by subclasses.
225
+
226
+ :return: {self.streams} for the concurrent scheduling strategy to limit
227
+ the worker processes to the number of streams.
228
+ """
229
+ return self.streams
230
+
231
+ @property
232
+ def queued_requests_limit(self) -> int:
233
+ """
234
+ The maximum number of queued requests for the scheduling strategy.
235
+ It determines how many requests can be queued at one time
236
+ for the scheduling strategy and must be implemented by subclasses.
237
+
238
+ :return: {self.streams} for the concurrent scheduling strategy to limit
239
+ the queued requests to the number of streams that are ready to be processed.
240
+ """
241
+ return self.streams
242
+
243
+ @property
244
+ def processing_requests_limit(self) -> int:
245
+ """
246
+ The maximum number of processing requests for the scheduling strategy.
247
+ It determines how many requests can be processed at one time
248
+ for the scheduling strategy and must be implemented by subclasses.
249
+
250
+ :return: {self.streams} for the concurrent scheduling strategy to limit
251
+ the processing requests to the number of streams that ready to be processed.
252
+ """
253
+ return self.streams
254
+
255
+ def request_times(self) -> Generator[float, None, None]:
256
+ """
257
+ A generator that yields time.time() so requests are sent
258
+ immediately, while scheduling them concurrently with the specified
259
+ number of streams.
260
+
261
+ :return: A generator that yields time.time() for immediate request scheduling.
262
+ """
263
+ while True:
264
+ yield time.time()
265
+
266
+
267
+ class ThroughputStrategy(SchedulingStrategy):
268
+ """
269
+ A class representing a throughput scheduling strategy.
270
+ This strategy schedules as many requests asynchronously as possible,
271
+ with the maximum rate possible.
272
+ It inherits from the `SchedulingStrategy` base class and
273
+ implements the `request_times` method to provide the specific
274
+ behavior for throughput scheduling.
275
+
276
+ :param type_: The throughput StrategyType to schedule requests asynchronously.
277
+ """
278
+
279
+ type_: Literal["throughput"] = "throughput" # type: ignore[assignment]
280
+ max_concurrency: Optional[int] = Field(
281
+ default=None,
282
+ description=(
283
+ "The maximum number of concurrent requests to schedule. "
284
+ "If set to None, the concurrency value from settings will be used. "
285
+ "This must be a positive integer greater than 0."
286
+ ),
287
+ gt=0,
288
+ )
289
+
290
+ @property
291
+ def processing_mode(self) -> Literal["async"]:
292
+ """
293
+ The processing mode for the scheduling strategy, either 'sync' or 'async'.
294
+ This property determines how the worker processes are setup:
295
+ either to run synchronously with one request at a time or asynchronously.
296
+
297
+ :return: 'async' for asynchronous scheduling strategy
298
+ for the multiple worker processes handling requests.
299
+ """
300
+ return "async"
301
+
302
+ @property
303
+ def queued_requests_limit(self) -> int:
304
+ """
305
+ The maximum number of queued requests for the scheduling strategy.
306
+ It determines how many requests can be queued at one time
307
+ for the scheduling strategy and must be implemented by subclasses.
308
+
309
+ :return: The processing requests limit to ensure that there are enough
310
+ requests even for the worst case scenario where the max concurrent
311
+ requests are pulled at once for processing.
312
+ """
313
+ return self.processing_requests_limit
314
+
315
+ @property
316
+ def processing_requests_limit(self) -> int:
317
+ """
318
+ The maximum number of processing requests for the scheduling strategy.
319
+ It determines how many requests can be processed at one time
320
+ for the scheduling strategy and must be implemented by subclasses.
321
+
322
+ :return: {self.max_concurrency} for the throughput scheduling strategy to limit
323
+ the processing requests to the maximum concurrency.
324
+ If max_concurrency is None, then the default processing requests limit
325
+ will be used.
326
+ """
327
+ return self.max_concurrency or super().processing_requests_limit
328
+
329
+ def request_times(self) -> Generator[float, None, None]:
330
+ """
331
+ A generator that yields the start time.time() so requests are sent
332
+ immediately, while scheduling as many asynchronously as possible.
333
+
334
+ :return: A generator that yields the start time.time()
335
+ for immediate request scheduling.
336
+ """
337
+ start_time = time.time()
338
+
339
+ while True:
340
+ yield start_time
341
+
342
+
343
+ class AsyncConstantStrategy(ThroughputStrategy):
344
+ """
345
+ A class representing an asynchronous constant scheduling strategy.
346
+ This strategy schedules requests asynchronously at a constant request rate
347
+ in requests per second.
348
+ If initial_burst is set, it will send an initial burst of math.floor(rate)
349
+ requests to reach the target rate.
350
+ This is useful to ensure that the target rate is reached quickly
351
+ and then maintained.
352
+ It inherits from the `SchedulingStrategy` base class and
353
+ implements the `request_times` method to provide the specific
354
+ behavior for asynchronous constant scheduling.
355
+
356
+ :param type_: The constant StrategyType to schedule requests asynchronously.
357
+ :param rate: The rate at which to schedule requests asynchronously in
358
+ requests per second. This must be a positive float.
359
+ :param initial_burst: True to send an initial burst of requests
360
+ (math.floor(self.rate)) to reach target rate.
361
+ False to not send an initial burst.
362
+ """
363
+
364
+ type_: Literal["constant"] = "constant" # type: ignore[assignment]
365
+ rate: float = Field(
366
+ description=(
367
+ "The rate at which to schedule requests asynchronously in "
368
+ "requests per second. This must be a positive float."
369
+ ),
370
+ gt=0,
371
+ )
372
+ initial_burst: bool = Field(
373
+ default=True,
374
+ description=(
375
+ "True to send an initial burst of requests (math.floor(self.rate)) "
376
+ "to reach target rate. False to not send an initial burst."
377
+ ),
378
+ )
379
+
380
+ def request_times(self) -> Generator[float, None, None]:
381
+ """
382
+ A generator that yields timestamps for when requests should be sent.
383
+ This method schedules requests asynchronously at a constant rate
384
+ in requests per second.
385
+ If burst_time is set, it will send an initial burst of requests
386
+ to reach the target rate.
387
+ This is useful to ensure that the target rate is reached quickly
388
+ and then maintained.
389
+
390
+ :return: A generator that yields timestamps for request scheduling.
391
+ """
392
+ start_time = time.time()
393
+ constant_increment = 1.0 / self.rate
394
+
395
+ # handle bursts first to get to the desired rate
396
+ if self.initial_burst is not None:
397
+ # send an initial burst equal to the rate
398
+ # to reach the target rate
399
+ burst_count = math.floor(self.rate)
400
+ for _ in range(burst_count):
401
+ yield start_time
402
+
403
+ start_time += constant_increment
404
+
405
+ counter = 0
406
+
407
+ # continue with constant rate after bursting
408
+ while True:
409
+ yield start_time + constant_increment * counter
410
+ counter += 1
411
+
412
+
413
+ class AsyncPoissonStrategy(ThroughputStrategy):
414
+ """
415
+ A class representing an asynchronous Poisson scheduling strategy.
416
+ This strategy schedules requests asynchronously at a Poisson request rate
417
+ in requests per second.
418
+ If initial_burst is set, it will send an initial burst of math.floor(rate)
419
+ requests to reach the target rate.
420
+ It inherits from the `SchedulingStrategy` base class and
421
+ implements the `request_times` method to provide the specific
422
+ behavior for asynchronous Poisson scheduling.
423
+
424
+ :param type_: The Poisson StrategyType to schedule requests asynchronously.
425
+ :param rate: The rate at which to schedule requests asynchronously in
426
+ requests per second. This must be a positive float.
427
+ :param initial_burst: True to send an initial burst of requests
428
+ (math.floor(self.rate)) to reach target rate.
429
+ False to not send an initial burst.
430
+ """
431
+
432
+ type_: Literal["poisson"] = "poisson" # type: ignore[assignment]
433
+ rate: float = Field(
434
+ description=(
435
+ "The rate at which to schedule requests asynchronously in "
436
+ "requests per second. This must be a positive float."
437
+ ),
438
+ gt=0,
439
+ )
440
+ initial_burst: bool = Field(
441
+ default=True,
442
+ description=(
443
+ "True to send an initial burst of requests (math.floor(self.rate)) "
444
+ "to reach target rate. False to not send an initial burst."
445
+ ),
446
+ )
447
+ random_seed: int = Field(
448
+ default=42,
449
+ description=("The random seed to use for the Poisson distribution. "),
450
+ )
451
+
452
+ def request_times(self) -> Generator[float, None, None]:
453
+ """
454
+ A generator that yields timestamps for when requests should be sent.
455
+ This method schedules requests asynchronously at a Poisson rate
456
+ in requests per second.
457
+ The inter arrival time between requests is exponentially distributed
458
+ based on the rate.
459
+
460
+ :return: A generator that yields timestamps for request scheduling.
461
+ """
462
+ start_time = time.time()
463
+
464
+ if self.initial_burst is not None:
465
+ # send an initial burst equal to the rate
466
+ # to reach the target rate
467
+ burst_count = math.floor(self.rate)
468
+ for _ in range(burst_count):
469
+ yield start_time
470
+ else:
471
+ yield start_time
472
+
473
+ # set the random seed for reproducibility
474
+ rand = random.Random(self.random_seed) # noqa: S311
475
+
476
+ while True:
477
+ inter_arrival_time = rand.expovariate(self.rate)
478
+ start_time += inter_arrival_time
479
+ yield start_time
480
+
481
+
482
+ def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:
483
+ strategy_type = strategy if isinstance(strategy, str) else strategy.type_
484
+ strategy_instance = strategy if isinstance(strategy, SchedulingStrategy) else None
485
+
486
+ if strategy_type == "concurrent":
487
+ rate = f"@{strategy_instance.streams}" if strategy_instance else "@##" # type: ignore[attr-defined]
488
+ elif strategy_type in ("constant", "poisson"):
489
+ rate = f"@{strategy_instance.rate:.2f}" if strategy_instance else "@#.##" # type: ignore[attr-defined]
490
+ else:
491
+ rate = ""
492
+
493
+ return f"{strategy_type}{rate}"
@@ -0,0 +1,7 @@
1
+ from typing import TypeVar
2
+
3
+ __all__ = ["RequestT", "ResponseT"]
4
+
5
+
6
+ RequestT = TypeVar("RequestT")
7
+ ResponseT = TypeVar("ResponseT")