guidellm 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,382 @@
1
+ import asyncio
2
+ import math
3
+ import multiprocessing
4
+ import multiprocessing.queues
5
+ import time
6
+ from collections.abc import AsyncGenerator, Iterable, Iterator
7
+ from concurrent.futures import ProcessPoolExecutor
8
+ from typing import (
9
+ Any,
10
+ Generic,
11
+ Optional,
12
+ Union,
13
+ )
14
+
15
+ from loguru import logger
16
+
17
+ from guidellm.config import settings
18
+ from guidellm.scheduler.result import (
19
+ SchedulerRequestResult,
20
+ SchedulerResult,
21
+ SchedulerRunInfo,
22
+ )
23
+ from guidellm.scheduler.strategy import SchedulingStrategy
24
+ from guidellm.scheduler.types import RequestT, ResponseT
25
+ from guidellm.scheduler.worker import (
26
+ RequestsWorker,
27
+ WorkerProcessRequest,
28
+ WorkerProcessResult,
29
+ )
30
+
31
+ __all__ = ["Scheduler"]
32
+
33
+
34
+ class Scheduler(Generic[RequestT, ResponseT]):
35
+ """
36
+ A class that handles the scheduling of requests to a worker.
37
+ This class is responsible for managing the lifecycle of the requests,
38
+ including their creation, queuing, and processing.
39
+ It uses a multiprocessing approach to handle requests concurrently
40
+ and efficiently, based on the specified scheduling strategy.
41
+ The Scheduler class is designed to work with a RequestsWorker,
42
+ which is an abstract base class that defines the interface for a worker
43
+ that can resolve requests asynchronously or synchronously.
44
+ The Scheduler class also supports different scheduling strategies,
45
+ including synchronous, throughput, and concurrent strategies.
46
+
47
+ :param worker: The worker that will process the requests.
48
+ This should be an instance of RequestsWorker.
49
+ :param request_loader: An iterable that generates requests.
50
+ This can be a list, generator, or any other iterable.
51
+ The requests will be processed by the worker.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ worker: RequestsWorker[RequestT, ResponseT],
57
+ request_loader: Iterable[RequestT],
58
+ ):
59
+ if not isinstance(worker, RequestsWorker):
60
+ raise ValueError(f"Invalid worker: {worker}")
61
+
62
+ if not isinstance(request_loader, Iterable):
63
+ raise ValueError(f"Invalid request_loader: {request_loader}")
64
+
65
+ self.worker = worker
66
+ self.request_loader = request_loader
67
+
68
+ async def run(
69
+ self,
70
+ scheduling_strategy: SchedulingStrategy,
71
+ max_number: Optional[int] = None,
72
+ max_duration: Optional[float] = None,
73
+ ) -> AsyncGenerator[
74
+ Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
75
+ ]:
76
+ """
77
+ The main method that runs the scheduler.
78
+ This method is a generator that yields SchedulerResult objects
79
+ at the start and end of the run, as well as at the start and end
80
+ of each request.
81
+ It uses multiprocessing to handle requests concurrently
82
+ and efficiently, based on the specified scheduling strategy.
83
+ The method also handles the lifecycle of the requests,
84
+ including their creation, queuing, and processing.
85
+ The method is designed to be used as an asynchronous generator,
86
+ allowing it to be used with asyncio and other asynchronous frameworks.
87
+
88
+ :param scheduling_strategy: The scheduling strategy to use.
89
+ Specifies the times at which requests will be sent as well how many
90
+ worker processes are used and if requests are scheduled sync or async.
91
+ This can be one of the following:
92
+ - "synchronous": Requests are sent synchronously.
93
+ - "throughput": Requests are sent at the maximum rate possible.
94
+ - An instance of SchedulingStrategy.
95
+ :param max_number: The maximum number of requests to process.
96
+ If None, then no limit is set and either the iterator must be exhaustible
97
+ or the max_duration must be set.
98
+ :param max_duration: The maximum duration for the scheduling run.
99
+ If None, then no limit is set and either the iterator must be exhaustible
100
+ or the max_number must be set.
101
+ :return: An asynchronous generator that yields SchedulerResult objects.
102
+ Each SchedulerResult object contains information about the request,
103
+ the response, and the run information.
104
+ """
105
+ if scheduling_strategy is None or not isinstance(
106
+ scheduling_strategy, SchedulingStrategy
107
+ ):
108
+ raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}")
109
+
110
+ if max_number is not None and max_number < 1:
111
+ raise ValueError(f"Invalid max_number: {max_number}")
112
+
113
+ if max_duration is not None and max_duration < 0:
114
+ raise ValueError(f"Invalid max_duration: {max_duration}")
115
+
116
+ with (
117
+ multiprocessing.Manager() as manager,
118
+ ProcessPoolExecutor(
119
+ max_workers=scheduling_strategy.processes_limit
120
+ ) as executor,
121
+ ):
122
+ requests_iter: Optional[Iterator[Any]] = None
123
+ futures, requests_queue, responses_queue = await self._start_processes(
124
+ manager, executor, scheduling_strategy
125
+ )
126
+ run_info, requests_iter, times_iter = self._run_setup(
127
+ futures, scheduling_strategy, max_number, max_duration
128
+ )
129
+ yield SchedulerResult(
130
+ type_="run_start",
131
+ run_info=run_info,
132
+ )
133
+
134
+ try:
135
+ while True:
136
+ # check errors and raise them
137
+ for future in futures:
138
+ if future.done() and (err := future.exception()) is not None:
139
+ raise err
140
+
141
+ if (
142
+ requests_iter is None
143
+ and run_info.completed_requests >= run_info.created_requests
144
+ ):
145
+ # we've exhausted all requests we've wanted to run
146
+ # and yielded all responses
147
+ break
148
+
149
+ requests_iter = self._add_requests(
150
+ requests_iter,
151
+ times_iter,
152
+ requests_queue,
153
+ run_info,
154
+ )
155
+ await asyncio.sleep(0) # enable requests to start
156
+
157
+ iter_result = self._check_result_ready(
158
+ responses_queue,
159
+ run_info,
160
+ )
161
+ if iter_result is not None:
162
+ yield iter_result
163
+
164
+ # yield control to the event loop
165
+ await asyncio.sleep(settings.default_async_loop_sleep)
166
+ except Exception as err:
167
+ raise RuntimeError(f"Scheduler run failed: {err}") from err
168
+
169
+ yield SchedulerResult(
170
+ type_="run_complete",
171
+ run_info=run_info,
172
+ )
173
+
174
+ await self._stop_processes(futures, requests_queue)
175
+
176
+ async def _start_processes(
177
+ self,
178
+ manager,
179
+ executor: ProcessPoolExecutor,
180
+ scheduling_strategy: SchedulingStrategy,
181
+ ) -> tuple[
182
+ list[asyncio.Future],
183
+ multiprocessing.Queue,
184
+ multiprocessing.Queue,
185
+ ]:
186
+ await self.worker.prepare_multiprocessing()
187
+ requests_queue = manager.Queue(
188
+ maxsize=scheduling_strategy.queued_requests_limit
189
+ )
190
+ responses_queue = manager.Queue()
191
+
192
+ num_processes = min(
193
+ scheduling_strategy.processes_limit,
194
+ scheduling_strategy.processing_requests_limit,
195
+ )
196
+ requests_limit_split = (
197
+ scheduling_strategy.processing_requests_limit
198
+ // scheduling_strategy.processes_limit
199
+ )
200
+ requests_limit_remain = (
201
+ scheduling_strategy.processing_requests_limit
202
+ % scheduling_strategy.processes_limit
203
+ )
204
+ process_ids = (id_ for id_ in range(num_processes))
205
+ process_requests_limits = (
206
+ requests_limit_split + 1
207
+ if i < requests_limit_remain
208
+ else requests_limit_split
209
+ for i in range(num_processes)
210
+ )
211
+
212
+ futures = []
213
+ loop = asyncio.get_event_loop()
214
+ for id_, requests_limit in zip(process_ids, process_requests_limits):
215
+ if scheduling_strategy.processing_mode == "sync":
216
+ futures.append(
217
+ loop.run_in_executor(
218
+ executor,
219
+ self.worker.process_loop_synchronous,
220
+ requests_queue,
221
+ responses_queue,
222
+ id_,
223
+ )
224
+ )
225
+ elif scheduling_strategy.processing_mode == "async":
226
+ futures.append(
227
+ loop.run_in_executor(
228
+ executor,
229
+ self.worker.process_loop_asynchronous,
230
+ requests_queue,
231
+ responses_queue,
232
+ requests_limit,
233
+ id_,
234
+ )
235
+ )
236
+ else:
237
+ raise ValueError(
238
+ f"Invalid processing mode: {scheduling_strategy.processing_mode} "
239
+ f"for strategy: {scheduling_strategy}"
240
+ )
241
+
242
+ await asyncio.sleep(0.1) # give time for processes to start
243
+
244
+ return futures, requests_queue, responses_queue
245
+
246
+ def _run_setup(
247
+ self,
248
+ processes: list[asyncio.Future],
249
+ scheduling_strategy: SchedulingStrategy,
250
+ max_number: Optional[int],
251
+ max_duration: Optional[float],
252
+ ) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
253
+ requests_iter = iter(self.request_loader)
254
+ start_time = time.time()
255
+ times_iter = iter(scheduling_strategy.request_times())
256
+ end_time = time.time() + (max_duration or math.inf)
257
+ end_number = max_number or math.inf
258
+
259
+ try:
260
+ # update end number if the request loader is finite and less than max
261
+ iter_length = len(self.request_loader) # type: ignore[arg-type]
262
+ if 0 < iter_length < end_number:
263
+ end_number = iter_length
264
+ except Exception: # noqa: BLE001, S110
265
+ pass
266
+
267
+ if end_number == math.inf and end_time is None:
268
+ logger.warning(
269
+ "No end number or end time set, "
270
+ "scheduler will run indefinitely until the request loader is exhausted."
271
+ )
272
+
273
+ info = SchedulerRunInfo(
274
+ start_time=start_time,
275
+ end_time=end_time,
276
+ end_number=end_number,
277
+ processes=len(processes),
278
+ strategy=scheduling_strategy,
279
+ )
280
+
281
+ return info, requests_iter, times_iter
282
+
283
+ def _add_requests(
284
+ self,
285
+ requests_iter: Optional[Iterator[Any]],
286
+ times_iter: Iterator[float],
287
+ requests_queue: multiprocessing.Queue,
288
+ run_info: SchedulerRunInfo,
289
+ ) -> Optional[Iterator[Any]]:
290
+ if requests_iter is not None:
291
+ try:
292
+ added_count = 0
293
+
294
+ while (
295
+ not requests_queue.full()
296
+ and added_count < settings.max_add_requests_per_loop
297
+ ):
298
+ if run_info.created_requests >= run_info.end_number:
299
+ raise StopIteration
300
+
301
+ if (
302
+ request_time := next(times_iter)
303
+ ) >= run_info.end_time or time.time() >= run_info.end_time:
304
+ raise StopIteration
305
+
306
+ request = next(requests_iter)
307
+ work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest(
308
+ request=request,
309
+ start_time=request_time,
310
+ timeout_time=run_info.end_time,
311
+ queued_time=time.time(),
312
+ )
313
+ requests_queue.put(work_req)
314
+
315
+ run_info.created_requests += 1
316
+ run_info.queued_requests += 1
317
+ added_count += 1
318
+ except StopIteration:
319
+ # we've reached the limit number, limit time, or exhausted the requests
320
+ # set to None to stop adding more and tell the loop no more requests
321
+ requests_iter = None
322
+
323
+ return requests_iter
324
+
325
+ def _check_result_ready(
326
+ self,
327
+ responses_queue: multiprocessing.Queue,
328
+ run_info: SchedulerRunInfo,
329
+ ) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
330
+ try:
331
+ process_response: WorkerProcessResult[RequestT, ResponseT] = (
332
+ responses_queue.get_nowait()
333
+ )
334
+ except multiprocessing.queues.Empty: # type: ignore[attr-defined]
335
+ return None
336
+
337
+ if process_response.type_ == "request_scheduled":
338
+ run_info.queued_requests -= 1
339
+ run_info.scheduled_requests += 1
340
+
341
+ return SchedulerRequestResult(
342
+ type_="request_scheduled",
343
+ run_info=run_info,
344
+ request=process_response.request,
345
+ request_info=process_response.info,
346
+ response=None,
347
+ )
348
+
349
+ if process_response.type_ == "request_start":
350
+ run_info.scheduled_requests -= 1
351
+ run_info.processing_requests += 1
352
+
353
+ return SchedulerRequestResult(
354
+ type_="request_start",
355
+ run_info=run_info,
356
+ request=process_response.request,
357
+ request_info=process_response.info,
358
+ response=None,
359
+ )
360
+
361
+ if process_response.type_ == "request_complete":
362
+ run_info.processing_requests -= 1
363
+ run_info.completed_requests += 1
364
+
365
+ return SchedulerRequestResult(
366
+ type_="request_complete",
367
+ run_info=run_info,
368
+ request=process_response.request,
369
+ request_info=process_response.info,
370
+ response=process_response.response,
371
+ )
372
+ raise ValueError(f"Invalid process response type: {process_response}")
373
+
374
+ async def _stop_processes(
375
+ self,
376
+ futures: list[asyncio.Future],
377
+ requests_queue: multiprocessing.Queue,
378
+ ):
379
+ for _ in futures:
380
+ requests_queue.put(None)
381
+
382
+ await asyncio.gather(*futures)