guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +38 -6
- guidellm/__main__.py +294 -0
- guidellm/backend/__init__.py +19 -6
- guidellm/backend/backend.py +238 -0
- guidellm/backend/openai.py +532 -122
- guidellm/backend/response.py +132 -0
- guidellm/benchmark/__init__.py +73 -0
- guidellm/benchmark/aggregator.py +760 -0
- guidellm/benchmark/benchmark.py +838 -0
- guidellm/benchmark/benchmarker.py +334 -0
- guidellm/benchmark/entrypoints.py +141 -0
- guidellm/benchmark/output.py +946 -0
- guidellm/benchmark/profile.py +409 -0
- guidellm/benchmark/progress.py +720 -0
- guidellm/config.py +34 -56
- guidellm/data/__init__.py +4 -0
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +22 -0
- guidellm/dataset/creator.py +213 -0
- guidellm/dataset/entrypoints.py +42 -0
- guidellm/dataset/file.py +90 -0
- guidellm/dataset/hf_datasets.py +62 -0
- guidellm/dataset/in_memory.py +132 -0
- guidellm/dataset/synthetic.py +262 -0
- guidellm/objects/__init__.py +18 -0
- guidellm/objects/pydantic.py +60 -0
- guidellm/objects/statistics.py +947 -0
- guidellm/request/__init__.py +12 -10
- guidellm/request/loader.py +281 -0
- guidellm/request/request.py +79 -0
- guidellm/scheduler/__init__.py +51 -3
- guidellm/scheduler/result.py +137 -0
- guidellm/scheduler/scheduler.py +382 -0
- guidellm/scheduler/strategy.py +493 -0
- guidellm/scheduler/types.py +7 -0
- guidellm/scheduler/worker.py +511 -0
- guidellm/utils/__init__.py +16 -29
- guidellm/utils/colors.py +8 -0
- guidellm/utils/hf_transformers.py +35 -0
- guidellm/utils/random.py +43 -0
- guidellm/utils/text.py +118 -357
- {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
- guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
- {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
- guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
- guidellm/backend/base.py +0 -320
- guidellm/core/__init__.py +0 -24
- guidellm/core/distribution.py +0 -190
- guidellm/core/report.py +0 -321
- guidellm/core/request.py +0 -44
- guidellm/core/result.py +0 -545
- guidellm/core/serializable.py +0 -169
- guidellm/executor/__init__.py +0 -10
- guidellm/executor/base.py +0 -213
- guidellm/executor/profile_generator.py +0 -343
- guidellm/main.py +0 -336
- guidellm/request/base.py +0 -194
- guidellm/request/emulated.py +0 -391
- guidellm/request/file.py +0 -76
- guidellm/request/transformers.py +0 -100
- guidellm/scheduler/base.py +0 -374
- guidellm/scheduler/load_generator.py +0 -196
- guidellm/utils/injector.py +0 -70
- guidellm/utils/progress.py +0 -196
- guidellm/utils/transformers.py +0 -151
- guidellm-0.1.0.dist-info/RECORD +0 -35
- guidellm-0.1.0.dist-info/entry_points.txt +0 -3
- {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
- {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import math
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import multiprocessing.queues
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import AsyncGenerator, Iterable, Iterator
|
|
7
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
Generic,
|
|
11
|
+
Optional,
|
|
12
|
+
Union,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from loguru import logger
|
|
16
|
+
|
|
17
|
+
from guidellm.config import settings
|
|
18
|
+
from guidellm.scheduler.result import (
|
|
19
|
+
SchedulerRequestResult,
|
|
20
|
+
SchedulerResult,
|
|
21
|
+
SchedulerRunInfo,
|
|
22
|
+
)
|
|
23
|
+
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
24
|
+
from guidellm.scheduler.types import RequestT, ResponseT
|
|
25
|
+
from guidellm.scheduler.worker import (
|
|
26
|
+
RequestsWorker,
|
|
27
|
+
WorkerProcessRequest,
|
|
28
|
+
WorkerProcessResult,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = ["Scheduler"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Scheduler(Generic[RequestT, ResponseT]):
|
|
35
|
+
"""
|
|
36
|
+
A class that handles the scheduling of requests to a worker.
|
|
37
|
+
This class is responsible for managing the lifecycle of the requests,
|
|
38
|
+
including their creation, queuing, and processing.
|
|
39
|
+
It uses a multiprocessing approach to handle requests concurrently
|
|
40
|
+
and efficiently, based on the specified scheduling strategy.
|
|
41
|
+
The Scheduler class is designed to work with a RequestsWorker,
|
|
42
|
+
which is an abstract base class that defines the interface for a worker
|
|
43
|
+
that can resolve requests asynchronously or synchronously.
|
|
44
|
+
The Scheduler class also supports different scheduling strategies,
|
|
45
|
+
including synchronous, throughput, and concurrent strategies.
|
|
46
|
+
|
|
47
|
+
:param worker: The worker that will process the requests.
|
|
48
|
+
This should be an instance of RequestsWorker.
|
|
49
|
+
:param request_loader: An iterable that generates requests.
|
|
50
|
+
This can be a list, generator, or any other iterable.
|
|
51
|
+
The requests will be processed by the worker.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
worker: RequestsWorker[RequestT, ResponseT],
|
|
57
|
+
request_loader: Iterable[RequestT],
|
|
58
|
+
):
|
|
59
|
+
if not isinstance(worker, RequestsWorker):
|
|
60
|
+
raise ValueError(f"Invalid worker: {worker}")
|
|
61
|
+
|
|
62
|
+
if not isinstance(request_loader, Iterable):
|
|
63
|
+
raise ValueError(f"Invalid request_loader: {request_loader}")
|
|
64
|
+
|
|
65
|
+
self.worker = worker
|
|
66
|
+
self.request_loader = request_loader
|
|
67
|
+
|
|
68
|
+
async def run(
|
|
69
|
+
self,
|
|
70
|
+
scheduling_strategy: SchedulingStrategy,
|
|
71
|
+
max_number: Optional[int] = None,
|
|
72
|
+
max_duration: Optional[float] = None,
|
|
73
|
+
) -> AsyncGenerator[
|
|
74
|
+
Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
|
|
75
|
+
]:
|
|
76
|
+
"""
|
|
77
|
+
The main method that runs the scheduler.
|
|
78
|
+
This method is a generator that yields SchedulerResult objects
|
|
79
|
+
at the start and end of the run, as well as at the start and end
|
|
80
|
+
of each request.
|
|
81
|
+
It uses multiprocessing to handle requests concurrently
|
|
82
|
+
and efficiently, based on the specified scheduling strategy.
|
|
83
|
+
The method also handles the lifecycle of the requests,
|
|
84
|
+
including their creation, queuing, and processing.
|
|
85
|
+
The method is designed to be used as an asynchronous generator,
|
|
86
|
+
allowing it to be used with asyncio and other asynchronous frameworks.
|
|
87
|
+
|
|
88
|
+
:param scheduling_strategy: The scheduling strategy to use.
|
|
89
|
+
Specifies the times at which requests will be sent as well how many
|
|
90
|
+
worker processes are used and if requests are scheduled sync or async.
|
|
91
|
+
This can be one of the following:
|
|
92
|
+
- "synchronous": Requests are sent synchronously.
|
|
93
|
+
- "throughput": Requests are sent at the maximum rate possible.
|
|
94
|
+
- An instance of SchedulingStrategy.
|
|
95
|
+
:param max_number: The maximum number of requests to process.
|
|
96
|
+
If None, then no limit is set and either the iterator must be exhaustible
|
|
97
|
+
or the max_duration must be set.
|
|
98
|
+
:param max_duration: The maximum duration for the scheduling run.
|
|
99
|
+
If None, then no limit is set and either the iterator must be exhaustible
|
|
100
|
+
or the max_number must be set.
|
|
101
|
+
:return: An asynchronous generator that yields SchedulerResult objects.
|
|
102
|
+
Each SchedulerResult object contains information about the request,
|
|
103
|
+
the response, and the run information.
|
|
104
|
+
"""
|
|
105
|
+
if scheduling_strategy is None or not isinstance(
|
|
106
|
+
scheduling_strategy, SchedulingStrategy
|
|
107
|
+
):
|
|
108
|
+
raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}")
|
|
109
|
+
|
|
110
|
+
if max_number is not None and max_number < 1:
|
|
111
|
+
raise ValueError(f"Invalid max_number: {max_number}")
|
|
112
|
+
|
|
113
|
+
if max_duration is not None and max_duration < 0:
|
|
114
|
+
raise ValueError(f"Invalid max_duration: {max_duration}")
|
|
115
|
+
|
|
116
|
+
with (
|
|
117
|
+
multiprocessing.Manager() as manager,
|
|
118
|
+
ProcessPoolExecutor(
|
|
119
|
+
max_workers=scheduling_strategy.processes_limit
|
|
120
|
+
) as executor,
|
|
121
|
+
):
|
|
122
|
+
requests_iter: Optional[Iterator[Any]] = None
|
|
123
|
+
futures, requests_queue, responses_queue = await self._start_processes(
|
|
124
|
+
manager, executor, scheduling_strategy
|
|
125
|
+
)
|
|
126
|
+
run_info, requests_iter, times_iter = self._run_setup(
|
|
127
|
+
futures, scheduling_strategy, max_number, max_duration
|
|
128
|
+
)
|
|
129
|
+
yield SchedulerResult(
|
|
130
|
+
type_="run_start",
|
|
131
|
+
run_info=run_info,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
while True:
|
|
136
|
+
# check errors and raise them
|
|
137
|
+
for future in futures:
|
|
138
|
+
if future.done() and (err := future.exception()) is not None:
|
|
139
|
+
raise err
|
|
140
|
+
|
|
141
|
+
if (
|
|
142
|
+
requests_iter is None
|
|
143
|
+
and run_info.completed_requests >= run_info.created_requests
|
|
144
|
+
):
|
|
145
|
+
# we've exhausted all requests we've wanted to run
|
|
146
|
+
# and yielded all responses
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
requests_iter = self._add_requests(
|
|
150
|
+
requests_iter,
|
|
151
|
+
times_iter,
|
|
152
|
+
requests_queue,
|
|
153
|
+
run_info,
|
|
154
|
+
)
|
|
155
|
+
await asyncio.sleep(0) # enable requests to start
|
|
156
|
+
|
|
157
|
+
iter_result = self._check_result_ready(
|
|
158
|
+
responses_queue,
|
|
159
|
+
run_info,
|
|
160
|
+
)
|
|
161
|
+
if iter_result is not None:
|
|
162
|
+
yield iter_result
|
|
163
|
+
|
|
164
|
+
# yield control to the event loop
|
|
165
|
+
await asyncio.sleep(settings.default_async_loop_sleep)
|
|
166
|
+
except Exception as err:
|
|
167
|
+
raise RuntimeError(f"Scheduler run failed: {err}") from err
|
|
168
|
+
|
|
169
|
+
yield SchedulerResult(
|
|
170
|
+
type_="run_complete",
|
|
171
|
+
run_info=run_info,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
await self._stop_processes(futures, requests_queue)
|
|
175
|
+
|
|
176
|
+
async def _start_processes(
|
|
177
|
+
self,
|
|
178
|
+
manager,
|
|
179
|
+
executor: ProcessPoolExecutor,
|
|
180
|
+
scheduling_strategy: SchedulingStrategy,
|
|
181
|
+
) -> tuple[
|
|
182
|
+
list[asyncio.Future],
|
|
183
|
+
multiprocessing.Queue,
|
|
184
|
+
multiprocessing.Queue,
|
|
185
|
+
]:
|
|
186
|
+
await self.worker.prepare_multiprocessing()
|
|
187
|
+
requests_queue = manager.Queue(
|
|
188
|
+
maxsize=scheduling_strategy.queued_requests_limit
|
|
189
|
+
)
|
|
190
|
+
responses_queue = manager.Queue()
|
|
191
|
+
|
|
192
|
+
num_processes = min(
|
|
193
|
+
scheduling_strategy.processes_limit,
|
|
194
|
+
scheduling_strategy.processing_requests_limit,
|
|
195
|
+
)
|
|
196
|
+
requests_limit_split = (
|
|
197
|
+
scheduling_strategy.processing_requests_limit
|
|
198
|
+
// scheduling_strategy.processes_limit
|
|
199
|
+
)
|
|
200
|
+
requests_limit_remain = (
|
|
201
|
+
scheduling_strategy.processing_requests_limit
|
|
202
|
+
% scheduling_strategy.processes_limit
|
|
203
|
+
)
|
|
204
|
+
process_ids = (id_ for id_ in range(num_processes))
|
|
205
|
+
process_requests_limits = (
|
|
206
|
+
requests_limit_split + 1
|
|
207
|
+
if i < requests_limit_remain
|
|
208
|
+
else requests_limit_split
|
|
209
|
+
for i in range(num_processes)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
futures = []
|
|
213
|
+
loop = asyncio.get_event_loop()
|
|
214
|
+
for id_, requests_limit in zip(process_ids, process_requests_limits):
|
|
215
|
+
if scheduling_strategy.processing_mode == "sync":
|
|
216
|
+
futures.append(
|
|
217
|
+
loop.run_in_executor(
|
|
218
|
+
executor,
|
|
219
|
+
self.worker.process_loop_synchronous,
|
|
220
|
+
requests_queue,
|
|
221
|
+
responses_queue,
|
|
222
|
+
id_,
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
elif scheduling_strategy.processing_mode == "async":
|
|
226
|
+
futures.append(
|
|
227
|
+
loop.run_in_executor(
|
|
228
|
+
executor,
|
|
229
|
+
self.worker.process_loop_asynchronous,
|
|
230
|
+
requests_queue,
|
|
231
|
+
responses_queue,
|
|
232
|
+
requests_limit,
|
|
233
|
+
id_,
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
raise ValueError(
|
|
238
|
+
f"Invalid processing mode: {scheduling_strategy.processing_mode} "
|
|
239
|
+
f"for strategy: {scheduling_strategy}"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
await asyncio.sleep(0.1) # give time for processes to start
|
|
243
|
+
|
|
244
|
+
return futures, requests_queue, responses_queue
|
|
245
|
+
|
|
246
|
+
def _run_setup(
|
|
247
|
+
self,
|
|
248
|
+
processes: list[asyncio.Future],
|
|
249
|
+
scheduling_strategy: SchedulingStrategy,
|
|
250
|
+
max_number: Optional[int],
|
|
251
|
+
max_duration: Optional[float],
|
|
252
|
+
) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
|
|
253
|
+
requests_iter = iter(self.request_loader)
|
|
254
|
+
start_time = time.time()
|
|
255
|
+
times_iter = iter(scheduling_strategy.request_times())
|
|
256
|
+
end_time = time.time() + (max_duration or math.inf)
|
|
257
|
+
end_number = max_number or math.inf
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
# update end number if the request loader is finite and less than max
|
|
261
|
+
iter_length = len(self.request_loader) # type: ignore[arg-type]
|
|
262
|
+
if 0 < iter_length < end_number:
|
|
263
|
+
end_number = iter_length
|
|
264
|
+
except Exception: # noqa: BLE001, S110
|
|
265
|
+
pass
|
|
266
|
+
|
|
267
|
+
if end_number == math.inf and end_time is None:
|
|
268
|
+
logger.warning(
|
|
269
|
+
"No end number or end time set, "
|
|
270
|
+
"scheduler will run indefinitely until the request loader is exhausted."
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
info = SchedulerRunInfo(
|
|
274
|
+
start_time=start_time,
|
|
275
|
+
end_time=end_time,
|
|
276
|
+
end_number=end_number,
|
|
277
|
+
processes=len(processes),
|
|
278
|
+
strategy=scheduling_strategy,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return info, requests_iter, times_iter
|
|
282
|
+
|
|
283
|
+
def _add_requests(
|
|
284
|
+
self,
|
|
285
|
+
requests_iter: Optional[Iterator[Any]],
|
|
286
|
+
times_iter: Iterator[float],
|
|
287
|
+
requests_queue: multiprocessing.Queue,
|
|
288
|
+
run_info: SchedulerRunInfo,
|
|
289
|
+
) -> Optional[Iterator[Any]]:
|
|
290
|
+
if requests_iter is not None:
|
|
291
|
+
try:
|
|
292
|
+
added_count = 0
|
|
293
|
+
|
|
294
|
+
while (
|
|
295
|
+
not requests_queue.full()
|
|
296
|
+
and added_count < settings.max_add_requests_per_loop
|
|
297
|
+
):
|
|
298
|
+
if run_info.created_requests >= run_info.end_number:
|
|
299
|
+
raise StopIteration
|
|
300
|
+
|
|
301
|
+
if (
|
|
302
|
+
request_time := next(times_iter)
|
|
303
|
+
) >= run_info.end_time or time.time() >= run_info.end_time:
|
|
304
|
+
raise StopIteration
|
|
305
|
+
|
|
306
|
+
request = next(requests_iter)
|
|
307
|
+
work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest(
|
|
308
|
+
request=request,
|
|
309
|
+
start_time=request_time,
|
|
310
|
+
timeout_time=run_info.end_time,
|
|
311
|
+
queued_time=time.time(),
|
|
312
|
+
)
|
|
313
|
+
requests_queue.put(work_req)
|
|
314
|
+
|
|
315
|
+
run_info.created_requests += 1
|
|
316
|
+
run_info.queued_requests += 1
|
|
317
|
+
added_count += 1
|
|
318
|
+
except StopIteration:
|
|
319
|
+
# we've reached the limit number, limit time, or exhausted the requests
|
|
320
|
+
# set to None to stop adding more and tell the loop no more requests
|
|
321
|
+
requests_iter = None
|
|
322
|
+
|
|
323
|
+
return requests_iter
|
|
324
|
+
|
|
325
|
+
def _check_result_ready(
|
|
326
|
+
self,
|
|
327
|
+
responses_queue: multiprocessing.Queue,
|
|
328
|
+
run_info: SchedulerRunInfo,
|
|
329
|
+
) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
|
|
330
|
+
try:
|
|
331
|
+
process_response: WorkerProcessResult[RequestT, ResponseT] = (
|
|
332
|
+
responses_queue.get_nowait()
|
|
333
|
+
)
|
|
334
|
+
except multiprocessing.queues.Empty: # type: ignore[attr-defined]
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
if process_response.type_ == "request_scheduled":
|
|
338
|
+
run_info.queued_requests -= 1
|
|
339
|
+
run_info.scheduled_requests += 1
|
|
340
|
+
|
|
341
|
+
return SchedulerRequestResult(
|
|
342
|
+
type_="request_scheduled",
|
|
343
|
+
run_info=run_info,
|
|
344
|
+
request=process_response.request,
|
|
345
|
+
request_info=process_response.info,
|
|
346
|
+
response=None,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if process_response.type_ == "request_start":
|
|
350
|
+
run_info.scheduled_requests -= 1
|
|
351
|
+
run_info.processing_requests += 1
|
|
352
|
+
|
|
353
|
+
return SchedulerRequestResult(
|
|
354
|
+
type_="request_start",
|
|
355
|
+
run_info=run_info,
|
|
356
|
+
request=process_response.request,
|
|
357
|
+
request_info=process_response.info,
|
|
358
|
+
response=None,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
if process_response.type_ == "request_complete":
|
|
362
|
+
run_info.processing_requests -= 1
|
|
363
|
+
run_info.completed_requests += 1
|
|
364
|
+
|
|
365
|
+
return SchedulerRequestResult(
|
|
366
|
+
type_="request_complete",
|
|
367
|
+
run_info=run_info,
|
|
368
|
+
request=process_response.request,
|
|
369
|
+
request_info=process_response.info,
|
|
370
|
+
response=process_response.response,
|
|
371
|
+
)
|
|
372
|
+
raise ValueError(f"Invalid process response type: {process_response}")
|
|
373
|
+
|
|
374
|
+
async def _stop_processes(
|
|
375
|
+
self,
|
|
376
|
+
futures: list[asyncio.Future],
|
|
377
|
+
requests_queue: multiprocessing.Queue,
|
|
378
|
+
):
|
|
379
|
+
for _ in futures:
|
|
380
|
+
requests_queue.put(None)
|
|
381
|
+
|
|
382
|
+
await asyncio.gather(*futures)
|