guidellm 0.3.0rc20250429__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +8 -13
- guidellm/__main__.py +290 -69
- guidellm/backend/__init__.py +6 -6
- guidellm/backend/backend.py +25 -4
- guidellm/backend/openai.py +153 -30
- guidellm/backend/response.py +6 -2
- guidellm/benchmark/__init__.py +16 -22
- guidellm/benchmark/aggregator.py +3 -3
- guidellm/benchmark/benchmark.py +11 -12
- guidellm/benchmark/benchmarker.py +2 -2
- guidellm/benchmark/entrypoints.py +34 -10
- guidellm/benchmark/output.py +59 -8
- guidellm/benchmark/profile.py +4 -4
- guidellm/benchmark/progress.py +2 -2
- guidellm/benchmark/scenario.py +104 -0
- guidellm/benchmark/scenarios/__init__.py +0 -0
- guidellm/config.py +32 -7
- guidellm/dataset/__init__.py +4 -4
- guidellm/dataset/creator.py +1 -1
- guidellm/dataset/synthetic.py +36 -11
- guidellm/logger.py +8 -4
- guidellm/objects/__init__.py +2 -2
- guidellm/objects/pydantic.py +30 -1
- guidellm/objects/statistics.py +20 -14
- guidellm/preprocess/__init__.py +3 -0
- guidellm/preprocess/dataset.py +374 -0
- guidellm/presentation/__init__.py +28 -0
- guidellm/presentation/builder.py +27 -0
- guidellm/presentation/data_models.py +232 -0
- guidellm/presentation/injector.py +66 -0
- guidellm/request/__init__.py +6 -3
- guidellm/request/loader.py +5 -5
- guidellm/{scheduler → request}/types.py +4 -1
- guidellm/scheduler/__init__.py +10 -15
- guidellm/scheduler/queues.py +25 -0
- guidellm/scheduler/result.py +21 -3
- guidellm/scheduler/scheduler.py +68 -60
- guidellm/scheduler/strategy.py +26 -24
- guidellm/scheduler/worker.py +64 -103
- guidellm/utils/__init__.py +17 -5
- guidellm/utils/cli.py +62 -0
- guidellm/utils/default_group.py +105 -0
- guidellm/utils/dict.py +23 -0
- guidellm/utils/hf_datasets.py +36 -0
- guidellm/utils/random.py +1 -1
- guidellm/utils/text.py +14 -15
- guidellm/version.py +6 -0
- guidellm-0.3.1.dist-info/METADATA +329 -0
- guidellm-0.3.1.dist-info/RECORD +62 -0
- {guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
- guidellm-0.3.0rc20250429.dist-info/METADATA +0 -453
- guidellm-0.3.0rc20250429.dist-info/RECORD +0 -48
- {guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from guidellm.config import settings
|
|
8
|
+
from guidellm.utils.text import load_text
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
|
|
12
|
+
"""
|
|
13
|
+
Creates a report from the dictionary and saves it to the output path.
|
|
14
|
+
|
|
15
|
+
:param js_data: dict with match str and json data to inject
|
|
16
|
+
:type js_data: dict
|
|
17
|
+
:param output_path: the file to save the report to.
|
|
18
|
+
:type output_path: str
|
|
19
|
+
:return: the path to the saved report
|
|
20
|
+
:rtype: str
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
if not isinstance(output_path, Path):
|
|
24
|
+
output_path = Path(output_path)
|
|
25
|
+
|
|
26
|
+
html_content = load_text(settings.report_generation.source)
|
|
27
|
+
report_content = inject_data(
|
|
28
|
+
js_data,
|
|
29
|
+
html_content,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
output_path.write_text(report_content)
|
|
34
|
+
return output_path
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def inject_data(
|
|
38
|
+
js_data: dict,
|
|
39
|
+
html: str,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Injects the json data into the HTML,
|
|
43
|
+
replacing placeholders only within the <head> section.
|
|
44
|
+
|
|
45
|
+
:param js_data: the json data to inject
|
|
46
|
+
:type js_data: dict
|
|
47
|
+
:param html: the html to inject the data into
|
|
48
|
+
:type html: str
|
|
49
|
+
:return: the html with the json data injected
|
|
50
|
+
:rtype: str
|
|
51
|
+
"""
|
|
52
|
+
head_match = re.search(r"<head[^>]*>(.*?)</head>", html, re.DOTALL | re.IGNORECASE)
|
|
53
|
+
if not head_match:
|
|
54
|
+
logger.warning("<head> section missing, returning original HTML.")
|
|
55
|
+
|
|
56
|
+
return html
|
|
57
|
+
|
|
58
|
+
head_content = head_match.group(1)
|
|
59
|
+
|
|
60
|
+
# Replace placeholders only inside the <head> content
|
|
61
|
+
for placeholder, script in js_data.items():
|
|
62
|
+
head_content = head_content.replace(placeholder, script)
|
|
63
|
+
|
|
64
|
+
# Rebuild the HTML
|
|
65
|
+
new_head = f"<head>{head_content}</head>"
|
|
66
|
+
return html[: head_match.start()] + new_head + html[head_match.end() :]
|
guidellm/request/__init__.py
CHANGED
|
@@ -5,11 +5,14 @@ from .loader import (
|
|
|
5
5
|
RequestLoaderDescription,
|
|
6
6
|
)
|
|
7
7
|
from .request import GenerationRequest
|
|
8
|
+
from .types import RequestT, ResponseT
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
11
|
+
"GenerationRequest",
|
|
12
|
+
"GenerativeRequestLoader",
|
|
13
|
+
"GenerativeRequestLoaderDescription",
|
|
10
14
|
"RequestLoader",
|
|
11
15
|
"RequestLoaderDescription",
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"GenerationRequest",
|
|
16
|
+
"RequestT",
|
|
17
|
+
"ResponseT",
|
|
15
18
|
]
|
guidellm/request/loader.py
CHANGED
|
@@ -17,10 +17,10 @@ from guidellm.objects import StandardBaseModel
|
|
|
17
17
|
from guidellm.request.request import GenerationRequest
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
|
-
"RequestLoaderDescription",
|
|
21
|
-
"RequestLoader",
|
|
22
|
-
"GenerativeRequestLoaderDescription",
|
|
23
20
|
"GenerativeRequestLoader",
|
|
21
|
+
"GenerativeRequestLoaderDescription",
|
|
22
|
+
"RequestLoader",
|
|
23
|
+
"RequestLoaderDescription",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
|
|
@@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):
|
|
|
30
30
|
|
|
31
31
|
class RequestLoader(Iterable):
|
|
32
32
|
@abstractmethod
|
|
33
|
-
def __iter__(self): ...
|
|
33
|
+
def __iter__(self) -> Iterator: ...
|
|
34
34
|
|
|
35
35
|
@abstractmethod
|
|
36
|
-
def __len__(self): ...
|
|
36
|
+
def __len__(self) -> int: ...
|
|
37
37
|
|
|
38
38
|
@property
|
|
39
39
|
@abstractmethod
|
guidellm/scheduler/__init__.py
CHANGED
|
@@ -15,38 +15,33 @@ from .strategy import (
|
|
|
15
15
|
ThroughputStrategy,
|
|
16
16
|
strategy_display_str,
|
|
17
17
|
)
|
|
18
|
-
from .types import RequestT, ResponseT
|
|
19
18
|
from .worker import (
|
|
20
19
|
GenerativeRequestsWorker,
|
|
21
20
|
GenerativeRequestsWorkerDescription,
|
|
22
21
|
RequestsWorker,
|
|
23
22
|
ResolveStatus,
|
|
24
23
|
WorkerDescription,
|
|
25
|
-
WorkerProcessRequest,
|
|
26
24
|
WorkerProcessResult,
|
|
27
25
|
)
|
|
28
26
|
|
|
29
27
|
__all__ = [
|
|
28
|
+
"AsyncConstantStrategy",
|
|
29
|
+
"AsyncPoissonStrategy",
|
|
30
|
+
"ConcurrentStrategy",
|
|
31
|
+
"GenerativeRequestsWorker",
|
|
32
|
+
"GenerativeRequestsWorkerDescription",
|
|
33
|
+
"RequestsWorker",
|
|
34
|
+
"ResolveStatus",
|
|
35
|
+
"Scheduler",
|
|
30
36
|
"SchedulerRequestInfo",
|
|
31
37
|
"SchedulerRequestResult",
|
|
32
38
|
"SchedulerResult",
|
|
33
39
|
"SchedulerRunInfo",
|
|
34
|
-
"Scheduler",
|
|
35
|
-
"AsyncConstantStrategy",
|
|
36
|
-
"AsyncPoissonStrategy",
|
|
37
|
-
"ConcurrentStrategy",
|
|
38
40
|
"SchedulingStrategy",
|
|
39
41
|
"StrategyType",
|
|
40
42
|
"SynchronousStrategy",
|
|
41
43
|
"ThroughputStrategy",
|
|
42
|
-
"strategy_display_str",
|
|
43
|
-
"RequestT",
|
|
44
|
-
"ResponseT",
|
|
45
|
-
"WorkerProcessRequest",
|
|
46
|
-
"WorkerProcessResult",
|
|
47
|
-
"ResolveStatus",
|
|
48
44
|
"WorkerDescription",
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"GenerativeRequestsWorker",
|
|
45
|
+
"WorkerProcessResult",
|
|
46
|
+
"strategy_display_str",
|
|
52
47
|
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper module for importing the correct queue types.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from queue import Empty as QueueEmpty
|
|
7
|
+
from queue import Full as QueueFull
|
|
8
|
+
from queue import Queue
|
|
9
|
+
from typing import Generic
|
|
10
|
+
|
|
11
|
+
from guidellm.request.types import RequestT, ResponseT
|
|
12
|
+
from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"MPQueues",
|
|
16
|
+
"Queue",
|
|
17
|
+
"QueueEmpty",
|
|
18
|
+
"QueueFull",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class MPQueues(Generic[RequestT, ResponseT]):
|
|
24
|
+
requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
|
|
25
|
+
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
|
guidellm/scheduler/result.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from typing import (
|
|
2
3
|
Generic,
|
|
3
4
|
Literal,
|
|
@@ -5,14 +6,16 @@ from typing import (
|
|
|
5
6
|
)
|
|
6
7
|
|
|
7
8
|
from guidellm.objects import StandardBaseModel
|
|
9
|
+
from guidellm.request.types import RequestT, ResponseT
|
|
8
10
|
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
9
|
-
from guidellm.scheduler.types import RequestT, ResponseT
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
|
-
"
|
|
13
|
+
"SchedulerRequestInfo",
|
|
13
14
|
"SchedulerRequestResult",
|
|
15
|
+
"SchedulerResult",
|
|
14
16
|
"SchedulerRunInfo",
|
|
15
|
-
"
|
|
17
|
+
"WorkerProcessRequest",
|
|
18
|
+
"WorkerProcessResult",
|
|
16
19
|
]
|
|
17
20
|
|
|
18
21
|
|
|
@@ -135,3 +138,18 @@ class SchedulerRequestResult(
|
|
|
135
138
|
request: RequestT
|
|
136
139
|
request_info: SchedulerRequestInfo
|
|
137
140
|
response: Optional[ResponseT] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
|
|
145
|
+
request: RequestT
|
|
146
|
+
timeout_time: float
|
|
147
|
+
queued_time: float
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class WorkerProcessResult(Generic[RequestT, ResponseT]):
|
|
152
|
+
type_: Literal["request_scheduled", "request_start", "request_complete"]
|
|
153
|
+
request: RequestT
|
|
154
|
+
response: Optional[ResponseT]
|
|
155
|
+
info: SchedulerRequestInfo
|
guidellm/scheduler/scheduler.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import math
|
|
3
|
-
import multiprocessing
|
|
4
|
-
import multiprocessing.queues
|
|
5
3
|
import time
|
|
6
4
|
from collections.abc import AsyncGenerator, Iterable, Iterator
|
|
7
5
|
from concurrent.futures import ProcessPoolExecutor
|
|
6
|
+
from multiprocessing import Manager
|
|
7
|
+
from threading import Event
|
|
8
8
|
from typing import (
|
|
9
9
|
Any,
|
|
10
10
|
Generic,
|
|
@@ -15,17 +15,21 @@ from typing import (
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
|
|
17
17
|
from guidellm.config import settings
|
|
18
|
+
from guidellm.request.types import (
|
|
19
|
+
RequestT,
|
|
20
|
+
ResponseT,
|
|
21
|
+
)
|
|
22
|
+
from guidellm.scheduler.queues import MPQueues, Queue, QueueEmpty
|
|
18
23
|
from guidellm.scheduler.result import (
|
|
19
24
|
SchedulerRequestResult,
|
|
20
25
|
SchedulerResult,
|
|
21
26
|
SchedulerRunInfo,
|
|
27
|
+
WorkerProcessRequest,
|
|
28
|
+
WorkerProcessResult,
|
|
22
29
|
)
|
|
23
30
|
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
24
|
-
from guidellm.scheduler.types import RequestT, ResponseT
|
|
25
31
|
from guidellm.scheduler.worker import (
|
|
26
32
|
RequestsWorker,
|
|
27
|
-
WorkerProcessRequest,
|
|
28
|
-
WorkerProcessResult,
|
|
29
33
|
)
|
|
30
34
|
|
|
31
35
|
__all__ = ["Scheduler"]
|
|
@@ -114,18 +118,31 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
114
118
|
raise ValueError(f"Invalid max_duration: {max_duration}")
|
|
115
119
|
|
|
116
120
|
with (
|
|
117
|
-
|
|
121
|
+
Manager() as manager,
|
|
118
122
|
ProcessPoolExecutor(
|
|
119
123
|
max_workers=scheduling_strategy.processes_limit
|
|
120
124
|
) as executor,
|
|
121
125
|
):
|
|
122
126
|
requests_iter: Optional[Iterator[Any]] = None
|
|
123
|
-
|
|
127
|
+
scheduling_strategy.start_time = (
|
|
128
|
+
time.time() + settings.scheduler_start_delay
|
|
129
|
+
) # Add a small delay to allow processes to start
|
|
130
|
+
futures, queues, stop_event = await self._start_processes(
|
|
124
131
|
manager, executor, scheduling_strategy
|
|
125
132
|
)
|
|
126
133
|
run_info, requests_iter, times_iter = self._run_setup(
|
|
127
134
|
futures, scheduling_strategy, max_number, max_duration
|
|
128
135
|
)
|
|
136
|
+
|
|
137
|
+
# Add some initial requests to the queue
|
|
138
|
+
requests_iter = self._add_requests(
|
|
139
|
+
requests_iter,
|
|
140
|
+
queues.requests,
|
|
141
|
+
times_iter,
|
|
142
|
+
run_info,
|
|
143
|
+
)
|
|
144
|
+
# Wait for the test to start
|
|
145
|
+
await asyncio.sleep(time.time() - scheduling_strategy.start_time)
|
|
129
146
|
yield SchedulerResult(
|
|
130
147
|
type_="run_start",
|
|
131
148
|
run_info=run_info,
|
|
@@ -140,7 +157,11 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
140
157
|
|
|
141
158
|
if (
|
|
142
159
|
requests_iter is None
|
|
143
|
-
and run_info.
|
|
160
|
+
and run_info.processing_requests <= 0
|
|
161
|
+
and ( # Ensure we have met one of the end conditions
|
|
162
|
+
time.time() >= run_info.end_time
|
|
163
|
+
or run_info.completed_requests >= run_info.end_number
|
|
164
|
+
)
|
|
144
165
|
):
|
|
145
166
|
# we've exhausted all requests we've wanted to run
|
|
146
167
|
# and yielded all responses
|
|
@@ -148,14 +169,14 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
148
169
|
|
|
149
170
|
requests_iter = self._add_requests(
|
|
150
171
|
requests_iter,
|
|
172
|
+
queues.requests,
|
|
151
173
|
times_iter,
|
|
152
|
-
requests_queue,
|
|
153
174
|
run_info,
|
|
154
175
|
)
|
|
155
176
|
await asyncio.sleep(0) # enable requests to start
|
|
156
177
|
|
|
157
178
|
iter_result = self._check_result_ready(
|
|
158
|
-
|
|
179
|
+
queues.responses,
|
|
159
180
|
run_info,
|
|
160
181
|
)
|
|
161
182
|
if iter_result is not None:
|
|
@@ -171,7 +192,7 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
171
192
|
run_info=run_info,
|
|
172
193
|
)
|
|
173
194
|
|
|
174
|
-
await self._stop_processes(futures,
|
|
195
|
+
await self._stop_processes(futures, stop_event)
|
|
175
196
|
|
|
176
197
|
async def _start_processes(
|
|
177
198
|
self,
|
|
@@ -180,14 +201,17 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
180
201
|
scheduling_strategy: SchedulingStrategy,
|
|
181
202
|
) -> tuple[
|
|
182
203
|
list[asyncio.Future],
|
|
183
|
-
|
|
184
|
-
|
|
204
|
+
MPQueues[RequestT, ResponseT],
|
|
205
|
+
Event,
|
|
185
206
|
]:
|
|
186
207
|
await self.worker.prepare_multiprocessing()
|
|
187
|
-
|
|
188
|
-
|
|
208
|
+
queues: MPQueues[RequestT, ResponseT] = MPQueues(
|
|
209
|
+
requests=manager.Queue(
|
|
210
|
+
maxsize=scheduling_strategy.processing_requests_limit
|
|
211
|
+
),
|
|
212
|
+
responses=manager.Queue(),
|
|
189
213
|
)
|
|
190
|
-
|
|
214
|
+
stop_event = manager.Event()
|
|
191
215
|
|
|
192
216
|
num_processes = min(
|
|
193
217
|
scheduling_strategy.processes_limit,
|
|
@@ -212,36 +236,22 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
212
236
|
futures = []
|
|
213
237
|
loop = asyncio.get_event_loop()
|
|
214
238
|
for id_, requests_limit in zip(process_ids, process_requests_limits):
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
elif scheduling_strategy.processing_mode == "async":
|
|
226
|
-
futures.append(
|
|
227
|
-
loop.run_in_executor(
|
|
228
|
-
executor,
|
|
229
|
-
self.worker.process_loop_asynchronous,
|
|
230
|
-
requests_queue,
|
|
231
|
-
responses_queue,
|
|
232
|
-
requests_limit,
|
|
233
|
-
id_,
|
|
234
|
-
)
|
|
235
|
-
)
|
|
236
|
-
else:
|
|
237
|
-
raise ValueError(
|
|
238
|
-
f"Invalid processing mode: {scheduling_strategy.processing_mode} "
|
|
239
|
-
f"for strategy: {scheduling_strategy}"
|
|
239
|
+
futures.append(
|
|
240
|
+
loop.run_in_executor(
|
|
241
|
+
executor,
|
|
242
|
+
self.worker.process_loop_asynchronous,
|
|
243
|
+
queues,
|
|
244
|
+
scheduling_strategy,
|
|
245
|
+
stop_event,
|
|
246
|
+
requests_limit,
|
|
247
|
+
id_,
|
|
248
|
+
num_processes,
|
|
240
249
|
)
|
|
250
|
+
)
|
|
241
251
|
|
|
242
252
|
await asyncio.sleep(0.1) # give time for processes to start
|
|
243
253
|
|
|
244
|
-
return futures,
|
|
254
|
+
return futures, queues, stop_event
|
|
245
255
|
|
|
246
256
|
def _run_setup(
|
|
247
257
|
self,
|
|
@@ -251,9 +261,8 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
251
261
|
max_duration: Optional[float],
|
|
252
262
|
) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
|
|
253
263
|
requests_iter = iter(self.request_loader)
|
|
254
|
-
start_time = time.time()
|
|
255
264
|
times_iter = iter(scheduling_strategy.request_times())
|
|
256
|
-
end_time =
|
|
265
|
+
end_time = scheduling_strategy.start_time + (max_duration or math.inf)
|
|
257
266
|
end_number = max_number or math.inf
|
|
258
267
|
|
|
259
268
|
try:
|
|
@@ -271,7 +280,7 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
271
280
|
)
|
|
272
281
|
|
|
273
282
|
info = SchedulerRunInfo(
|
|
274
|
-
start_time=start_time,
|
|
283
|
+
start_time=scheduling_strategy.start_time,
|
|
275
284
|
end_time=end_time,
|
|
276
285
|
end_number=end_number,
|
|
277
286
|
processes=len(processes),
|
|
@@ -283,30 +292,29 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
283
292
|
def _add_requests(
|
|
284
293
|
self,
|
|
285
294
|
requests_iter: Optional[Iterator[Any]],
|
|
295
|
+
requests_queue: Queue[WorkerProcessRequest[RequestT, ResponseT]],
|
|
286
296
|
times_iter: Iterator[float],
|
|
287
|
-
requests_queue: multiprocessing.Queue,
|
|
288
297
|
run_info: SchedulerRunInfo,
|
|
289
298
|
) -> Optional[Iterator[Any]]:
|
|
290
299
|
if requests_iter is not None:
|
|
291
300
|
try:
|
|
292
301
|
added_count = 0
|
|
293
302
|
|
|
294
|
-
while (
|
|
295
|
-
|
|
296
|
-
|
|
303
|
+
while not requests_queue.full() and added_count < (
|
|
304
|
+
run_info.strategy.queued_requests_limit
|
|
305
|
+
or settings.min_queued_requests
|
|
297
306
|
):
|
|
298
307
|
if run_info.created_requests >= run_info.end_number:
|
|
299
308
|
raise StopIteration
|
|
300
309
|
|
|
301
310
|
if (
|
|
302
|
-
|
|
303
|
-
|
|
311
|
+
next(times_iter) >= run_info.end_time
|
|
312
|
+
or time.time() >= run_info.end_time
|
|
313
|
+
):
|
|
304
314
|
raise StopIteration
|
|
305
315
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
request=request,
|
|
309
|
-
start_time=request_time,
|
|
316
|
+
work_req = WorkerProcessRequest[RequestT, ResponseT](
|
|
317
|
+
request=next(requests_iter),
|
|
310
318
|
timeout_time=run_info.end_time,
|
|
311
319
|
queued_time=time.time(),
|
|
312
320
|
)
|
|
@@ -324,14 +332,14 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
324
332
|
|
|
325
333
|
def _check_result_ready(
|
|
326
334
|
self,
|
|
327
|
-
responses_queue:
|
|
335
|
+
responses_queue: Queue[WorkerProcessResult[RequestT, ResponseT]],
|
|
328
336
|
run_info: SchedulerRunInfo,
|
|
329
337
|
) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
|
|
330
338
|
try:
|
|
331
339
|
process_response: WorkerProcessResult[RequestT, ResponseT] = (
|
|
332
340
|
responses_queue.get_nowait()
|
|
333
341
|
)
|
|
334
|
-
except
|
|
342
|
+
except QueueEmpty:
|
|
335
343
|
return None
|
|
336
344
|
|
|
337
345
|
if process_response.type_ == "request_scheduled":
|
|
@@ -374,9 +382,9 @@ class Scheduler(Generic[RequestT, ResponseT]):
|
|
|
374
382
|
async def _stop_processes(
|
|
375
383
|
self,
|
|
376
384
|
futures: list[asyncio.Future],
|
|
377
|
-
|
|
385
|
+
stop_event: Event,
|
|
378
386
|
):
|
|
379
|
-
|
|
380
|
-
|
|
387
|
+
# stop all processes
|
|
388
|
+
stop_event.set()
|
|
381
389
|
|
|
382
390
|
await asyncio.gather(*futures)
|
guidellm/scheduler/strategy.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import math
|
|
2
|
-
import os
|
|
3
2
|
import random
|
|
4
3
|
import time
|
|
5
4
|
from collections.abc import Generator
|
|
@@ -15,13 +14,13 @@ from guidellm.config import settings
|
|
|
15
14
|
from guidellm.objects import StandardBaseModel
|
|
16
15
|
|
|
17
16
|
__all__ = [
|
|
18
|
-
"
|
|
17
|
+
"AsyncConstantStrategy",
|
|
18
|
+
"AsyncPoissonStrategy",
|
|
19
|
+
"ConcurrentStrategy",
|
|
19
20
|
"SchedulingStrategy",
|
|
21
|
+
"StrategyType",
|
|
20
22
|
"SynchronousStrategy",
|
|
21
|
-
"ConcurrentStrategy",
|
|
22
23
|
"ThroughputStrategy",
|
|
23
|
-
"AsyncConstantStrategy",
|
|
24
|
-
"AsyncPoissonStrategy",
|
|
25
24
|
"strategy_display_str",
|
|
26
25
|
]
|
|
27
26
|
|
|
@@ -44,6 +43,10 @@ class SchedulingStrategy(StandardBaseModel):
|
|
|
44
43
|
type_: Literal["strategy"] = Field(
|
|
45
44
|
description="The type of scheduling strategy schedule requests with.",
|
|
46
45
|
)
|
|
46
|
+
start_time: float = Field(
|
|
47
|
+
default_factory=time.time,
|
|
48
|
+
description="The start time for the scheduling strategy.",
|
|
49
|
+
)
|
|
47
50
|
|
|
48
51
|
@property
|
|
49
52
|
def processing_mode(self) -> Literal["sync", "async"]:
|
|
@@ -68,9 +71,7 @@ class SchedulingStrategy(StandardBaseModel):
|
|
|
68
71
|
|
|
69
72
|
:return: The number of processes for the scheduling strategy.
|
|
70
73
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
return min(max(1, cpu_cores - 1), settings.max_worker_processes)
|
|
74
|
+
return settings.max_worker_processes
|
|
74
75
|
|
|
75
76
|
@property
|
|
76
77
|
def queued_requests_limit(self) -> Optional[int]:
|
|
@@ -175,8 +176,9 @@ class SynchronousStrategy(SchedulingStrategy):
|
|
|
175
176
|
|
|
176
177
|
:return: A generator that yields time.time() for immediate request scheduling.
|
|
177
178
|
"""
|
|
179
|
+
init_time = self.start_time
|
|
178
180
|
while True:
|
|
179
|
-
yield time.time()
|
|
181
|
+
yield max(init_time, time.time())
|
|
180
182
|
|
|
181
183
|
|
|
182
184
|
class ConcurrentStrategy(SchedulingStrategy):
|
|
@@ -226,7 +228,8 @@ class ConcurrentStrategy(SchedulingStrategy):
|
|
|
226
228
|
:return: {self.streams} for the concurrent scheduling strategy to limit
|
|
227
229
|
the worker processes to the number of streams.
|
|
228
230
|
"""
|
|
229
|
-
|
|
231
|
+
|
|
232
|
+
return min(self.streams, settings.max_worker_processes)
|
|
230
233
|
|
|
231
234
|
@property
|
|
232
235
|
def queued_requests_limit(self) -> int:
|
|
@@ -260,8 +263,9 @@ class ConcurrentStrategy(SchedulingStrategy):
|
|
|
260
263
|
|
|
261
264
|
:return: A generator that yields time.time() for immediate request scheduling.
|
|
262
265
|
"""
|
|
266
|
+
init_time = self.start_time
|
|
263
267
|
while True:
|
|
264
|
-
yield time.time()
|
|
268
|
+
yield max(init_time, time.time())
|
|
265
269
|
|
|
266
270
|
|
|
267
271
|
class ThroughputStrategy(SchedulingStrategy):
|
|
@@ -334,10 +338,9 @@ class ThroughputStrategy(SchedulingStrategy):
|
|
|
334
338
|
:return: A generator that yields the start time.time()
|
|
335
339
|
for immediate request scheduling.
|
|
336
340
|
"""
|
|
337
|
-
|
|
338
|
-
|
|
341
|
+
init_time = self.start_time
|
|
339
342
|
while True:
|
|
340
|
-
yield
|
|
343
|
+
yield init_time
|
|
341
344
|
|
|
342
345
|
|
|
343
346
|
class AsyncConstantStrategy(ThroughputStrategy):
|
|
@@ -389,24 +392,24 @@ class AsyncConstantStrategy(ThroughputStrategy):
|
|
|
389
392
|
|
|
390
393
|
:return: A generator that yields timestamps for request scheduling.
|
|
391
394
|
"""
|
|
392
|
-
start_time = time.time()
|
|
393
395
|
constant_increment = 1.0 / self.rate
|
|
394
396
|
|
|
397
|
+
init_time = self.start_time
|
|
395
398
|
# handle bursts first to get to the desired rate
|
|
396
399
|
if self.initial_burst is not None:
|
|
397
400
|
# send an initial burst equal to the rate
|
|
398
401
|
# to reach the target rate
|
|
399
402
|
burst_count = math.floor(self.rate)
|
|
400
403
|
for _ in range(burst_count):
|
|
401
|
-
yield
|
|
404
|
+
yield init_time
|
|
402
405
|
|
|
403
|
-
|
|
406
|
+
init_time += constant_increment
|
|
404
407
|
|
|
405
408
|
counter = 0
|
|
406
409
|
|
|
407
410
|
# continue with constant rate after bursting
|
|
408
411
|
while True:
|
|
409
|
-
yield
|
|
412
|
+
yield init_time + constant_increment * counter
|
|
410
413
|
counter += 1
|
|
411
414
|
|
|
412
415
|
|
|
@@ -459,24 +462,23 @@ class AsyncPoissonStrategy(ThroughputStrategy):
|
|
|
459
462
|
|
|
460
463
|
:return: A generator that yields timestamps for request scheduling.
|
|
461
464
|
"""
|
|
462
|
-
|
|
463
|
-
|
|
465
|
+
init_time = self.start_time
|
|
464
466
|
if self.initial_burst is not None:
|
|
465
467
|
# send an initial burst equal to the rate
|
|
466
468
|
# to reach the target rate
|
|
467
469
|
burst_count = math.floor(self.rate)
|
|
468
470
|
for _ in range(burst_count):
|
|
469
|
-
yield
|
|
471
|
+
yield init_time
|
|
470
472
|
else:
|
|
471
|
-
yield
|
|
473
|
+
yield init_time
|
|
472
474
|
|
|
473
475
|
# set the random seed for reproducibility
|
|
474
476
|
rand = random.Random(self.random_seed) # noqa: S311
|
|
475
477
|
|
|
476
478
|
while True:
|
|
477
479
|
inter_arrival_time = rand.expovariate(self.rate)
|
|
478
|
-
|
|
479
|
-
yield
|
|
480
|
+
init_time += inter_arrival_time
|
|
481
|
+
yield init_time
|
|
480
482
|
|
|
481
483
|
|
|
482
484
|
def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:
|