guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
guidellm/scheduler/scheduler.py
CHANGED
|
@@ -1,390 +1,162 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Any,
|
|
10
|
-
Generic,
|
|
11
|
-
Optional,
|
|
12
|
-
Union,
|
|
13
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
Thread-safe singleton scheduler for distributed benchmarking workload coordination.
|
|
3
|
+
|
|
4
|
+
Orchestrates request processing across worker processes with distributed timing
|
|
5
|
+
coordination, constraint enforcement, and result aggregation. Integrates with
|
|
6
|
+
backends, environments, and strategies to enable scalable load testing across
|
|
7
|
+
various scenarios including LLM inference benchmarking.
|
|
8
|
+
"""
|
|
14
9
|
|
|
15
|
-
from
|
|
10
|
+
from __future__ import annotations
|
|
16
11
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
12
|
+
from collections.abc import AsyncIterator, Iterable
|
|
13
|
+
from typing import Any, Generic
|
|
14
|
+
|
|
15
|
+
from guidellm.scheduler.constraints import Constraint, ConstraintsInitializerFactory
|
|
16
|
+
from guidellm.scheduler.environments import Environment, NonDistributedEnvironment
|
|
17
|
+
from guidellm.scheduler.schemas import (
|
|
18
|
+
BackendInterface,
|
|
19
|
+
MultiTurnRequestT,
|
|
19
20
|
RequestT,
|
|
20
21
|
ResponseT,
|
|
22
|
+
SchedulerState,
|
|
21
23
|
)
|
|
22
|
-
from guidellm.scheduler.
|
|
23
|
-
from guidellm.scheduler.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
SchedulerRunInfo,
|
|
27
|
-
WorkerProcessRequest,
|
|
28
|
-
WorkerProcessResult,
|
|
29
|
-
)
|
|
30
|
-
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
31
|
-
from guidellm.scheduler.worker import (
|
|
32
|
-
RequestsWorker,
|
|
33
|
-
)
|
|
24
|
+
from guidellm.scheduler.strategies import SchedulingStrategy
|
|
25
|
+
from guidellm.scheduler.worker_group import WorkerProcessGroup
|
|
26
|
+
from guidellm.schemas import RequestInfo
|
|
27
|
+
from guidellm.utils.singleton import ThreadSafeSingletonMixin
|
|
34
28
|
|
|
35
29
|
__all__ = ["Scheduler"]
|
|
36
30
|
|
|
37
31
|
|
|
38
|
-
class Scheduler(
|
|
32
|
+
class Scheduler(
|
|
33
|
+
Generic[RequestT, ResponseT],
|
|
34
|
+
ThreadSafeSingletonMixin,
|
|
35
|
+
):
|
|
39
36
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
37
|
+
Thread-safe singleton scheduler for distributed benchmarking workload coordination.
|
|
38
|
+
|
|
39
|
+
Orchestrates request processing across worker processes with distributed timing
|
|
40
|
+
coordination, constraint enforcement, and result aggregation. Abstracts the
|
|
41
|
+
complexity of multi-process coordination, environment synchronization, and
|
|
42
|
+
resource management while providing a unified interface for executing benchmarking
|
|
43
|
+
operations. Implements singleton pattern to ensure consistent execution state.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
::
|
|
47
|
+
from guidellm.scheduler import Scheduler
|
|
48
|
+
from guidellm.scheduler import NonDistributedEnvironment, SynchronousStrategy
|
|
49
|
+
|
|
50
|
+
scheduler = Scheduler()
|
|
51
|
+
async for response, request, info, state in scheduler.run(
|
|
52
|
+
requests=request_list,
|
|
53
|
+
backend=backend,
|
|
54
|
+
strategy=SynchronousStrategy(),
|
|
55
|
+
env=NonDistributedEnvironment(),
|
|
56
|
+
max_requests=1000
|
|
57
|
+
):
|
|
58
|
+
print(f"Processed: {request}")
|
|
56
59
|
"""
|
|
57
60
|
|
|
58
|
-
def __init__(
|
|
59
|
-
self,
|
|
60
|
-
worker: RequestsWorker[RequestT, ResponseT],
|
|
61
|
-
request_loader: Iterable[RequestT],
|
|
62
|
-
):
|
|
63
|
-
if not isinstance(worker, RequestsWorker):
|
|
64
|
-
raise ValueError(f"Invalid worker: {worker}")
|
|
65
|
-
|
|
66
|
-
if not isinstance(request_loader, Iterable):
|
|
67
|
-
raise ValueError(f"Invalid request_loader: {request_loader}")
|
|
68
|
-
|
|
69
|
-
self.worker = worker
|
|
70
|
-
self.request_loader = request_loader
|
|
71
|
-
|
|
72
61
|
async def run(
|
|
73
62
|
self,
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
63
|
+
requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
64
|
+
backend: BackendInterface[RequestT, ResponseT],
|
|
65
|
+
strategy: SchedulingStrategy,
|
|
66
|
+
startup_duration: float,
|
|
67
|
+
env: Environment[RequestT, ResponseT] | None,
|
|
68
|
+
**constraints: Any | dict[str, Any] | Constraint,
|
|
69
|
+
) -> AsyncIterator[
|
|
70
|
+
tuple[
|
|
71
|
+
ResponseT | None,
|
|
72
|
+
RequestT,
|
|
73
|
+
RequestInfo,
|
|
74
|
+
SchedulerState,
|
|
75
|
+
]
|
|
79
76
|
]:
|
|
80
77
|
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
:param
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
If None, then no limit is set and either the iterator must be exhaustible
|
|
104
|
-
or the max_number must be set.
|
|
105
|
-
:return: An asynchronous generator that yields SchedulerResult objects.
|
|
106
|
-
Each SchedulerResult object contains information about the request,
|
|
107
|
-
the response, and the run information.
|
|
78
|
+
Execute distributed request processing with coordinated timing and constraints.
|
|
79
|
+
|
|
80
|
+
Orchestrates the complete benchmarking workflow across worker processes with
|
|
81
|
+
environment synchronization, constraint enforcement, and error handling. Manages
|
|
82
|
+
resource lifecycle from initialization through cleanup while yielding real-time
|
|
83
|
+
processing updates for monitoring and aggregation.
|
|
84
|
+
|
|
85
|
+
:param requests: Request collection to process, supporting single requests or
|
|
86
|
+
multi-turn sequences with optional inter-request delays
|
|
87
|
+
:param backend: Backend interface for request processing and response generation
|
|
88
|
+
:param strategy: Scheduling strategy controlling request timing and distribution
|
|
89
|
+
:param startup_duration: Duration in seconds for requests to ramp up
|
|
90
|
+
:param env: Environment interface for distributed coordination and
|
|
91
|
+
synchronization. Defaults to NonDistributedEnvironment if None
|
|
92
|
+
:param constraints: Runtime constraints for execution control (max_requests,
|
|
93
|
+
max_duration, max_error_rate, etc.) as primitives, dictionaries, or
|
|
94
|
+
constraint instances
|
|
95
|
+
:yields: Request updates as (response, request, request_info, scheduler_state)
|
|
96
|
+
tuples. Each request generates three ordered updates: queued, in_progress,
|
|
97
|
+
completed | errored | cancelled
|
|
98
|
+
:raises Exception: Worker process errors, environment synchronization failures,
|
|
99
|
+
or constraint evaluation errors are propagated after cleanup
|
|
108
100
|
"""
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}")
|
|
113
|
-
|
|
114
|
-
if max_number is not None and max_number < 1:
|
|
115
|
-
raise ValueError(f"Invalid max_number: {max_number}")
|
|
101
|
+
with self.thread_lock:
|
|
102
|
+
if env is None:
|
|
103
|
+
env = NonDistributedEnvironment[RequestT, ResponseT]()
|
|
116
104
|
|
|
117
|
-
|
|
118
|
-
raise ValueError(f"Invalid max_duration: {max_duration}")
|
|
119
|
-
|
|
120
|
-
with (
|
|
121
|
-
Manager() as manager,
|
|
122
|
-
ProcessPoolExecutor(
|
|
123
|
-
max_workers=scheduling_strategy.processes_limit
|
|
124
|
-
) as executor,
|
|
125
|
-
):
|
|
126
|
-
requests_iter: Optional[Iterator[Any]] = None
|
|
127
|
-
scheduling_strategy.start_time = (
|
|
128
|
-
time.time() + settings.scheduler_start_delay
|
|
129
|
-
) # Add a small delay to allow processes to start
|
|
130
|
-
futures, queues, stop_event = await self._start_processes(
|
|
131
|
-
manager, executor, scheduling_strategy
|
|
132
|
-
)
|
|
133
|
-
run_info, requests_iter, times_iter = self._run_setup(
|
|
134
|
-
futures, scheduling_strategy, max_number, max_duration
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# Add some initial requests to the queue
|
|
138
|
-
requests_iter = self._add_requests(
|
|
139
|
-
requests_iter,
|
|
140
|
-
queues.requests,
|
|
141
|
-
times_iter,
|
|
142
|
-
run_info,
|
|
143
|
-
)
|
|
144
|
-
# Wait for the test to start
|
|
145
|
-
await asyncio.sleep(time.time() - scheduling_strategy.start_time)
|
|
146
|
-
yield SchedulerResult(
|
|
147
|
-
type_="run_start",
|
|
148
|
-
run_info=run_info,
|
|
149
|
-
)
|
|
105
|
+
worker_group: WorkerProcessGroup[RequestT, ResponseT] | None = None
|
|
150
106
|
|
|
107
|
+
# Any issues during the run will raise an error (local or remote),
|
|
108
|
+
# be caught and passed to the environment,
|
|
109
|
+
# and will ensure clean up before raising the error.
|
|
151
110
|
try:
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
if future.done() and (err := future.exception()) is not None:
|
|
156
|
-
raise err
|
|
157
|
-
|
|
158
|
-
if (
|
|
159
|
-
requests_iter is None
|
|
160
|
-
and run_info.processing_requests <= 0
|
|
161
|
-
and ( # Ensure we have met one of the end conditions
|
|
162
|
-
time.time() >= run_info.end_time
|
|
163
|
-
or run_info.completed_requests >= run_info.end_number
|
|
164
|
-
)
|
|
165
|
-
):
|
|
166
|
-
# we've exhausted all requests we've wanted to run
|
|
167
|
-
# and yielded all responses
|
|
168
|
-
break
|
|
169
|
-
|
|
170
|
-
requests_iter = self._add_requests(
|
|
171
|
-
requests_iter,
|
|
172
|
-
queues.requests,
|
|
173
|
-
times_iter,
|
|
174
|
-
run_info,
|
|
175
|
-
)
|
|
176
|
-
await asyncio.sleep(0) # enable requests to start
|
|
177
|
-
|
|
178
|
-
iter_result = self._check_result_ready(
|
|
179
|
-
queues.responses,
|
|
180
|
-
run_info,
|
|
181
|
-
)
|
|
182
|
-
if iter_result is not None:
|
|
183
|
-
yield iter_result
|
|
184
|
-
|
|
185
|
-
# yield control to the event loop
|
|
186
|
-
await asyncio.sleep(settings.default_async_loop_sleep)
|
|
187
|
-
except Exception as err:
|
|
188
|
-
raise RuntimeError(f"Scheduler run failed: {err}") from err
|
|
189
|
-
|
|
190
|
-
yield SchedulerResult(
|
|
191
|
-
type_="run_complete",
|
|
192
|
-
run_info=run_info,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
await self._stop_processes(futures, stop_event)
|
|
196
|
-
|
|
197
|
-
async def _start_processes(
|
|
198
|
-
self,
|
|
199
|
-
manager,
|
|
200
|
-
executor: ProcessPoolExecutor,
|
|
201
|
-
scheduling_strategy: SchedulingStrategy,
|
|
202
|
-
) -> tuple[
|
|
203
|
-
list[asyncio.Future],
|
|
204
|
-
MPQueues[RequestT, ResponseT],
|
|
205
|
-
Event,
|
|
206
|
-
]:
|
|
207
|
-
await self.worker.prepare_multiprocessing()
|
|
208
|
-
queues: MPQueues[RequestT, ResponseT] = MPQueues(
|
|
209
|
-
requests=manager.Queue(
|
|
210
|
-
maxsize=scheduling_strategy.processing_requests_limit
|
|
211
|
-
),
|
|
212
|
-
responses=manager.Queue(),
|
|
213
|
-
)
|
|
214
|
-
stop_event = manager.Event()
|
|
215
|
-
|
|
216
|
-
num_processes = min(
|
|
217
|
-
scheduling_strategy.processes_limit,
|
|
218
|
-
scheduling_strategy.processing_requests_limit,
|
|
219
|
-
)
|
|
220
|
-
requests_limit_split = (
|
|
221
|
-
scheduling_strategy.processing_requests_limit
|
|
222
|
-
// scheduling_strategy.processes_limit
|
|
223
|
-
)
|
|
224
|
-
requests_limit_remain = (
|
|
225
|
-
scheduling_strategy.processing_requests_limit
|
|
226
|
-
% scheduling_strategy.processes_limit
|
|
227
|
-
)
|
|
228
|
-
process_ids = (id_ for id_ in range(num_processes))
|
|
229
|
-
process_requests_limits = (
|
|
230
|
-
requests_limit_split + 1
|
|
231
|
-
if i < requests_limit_remain
|
|
232
|
-
else requests_limit_split
|
|
233
|
-
for i in range(num_processes)
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
futures = []
|
|
237
|
-
loop = asyncio.get_event_loop()
|
|
238
|
-
for id_, requests_limit in zip(process_ids, process_requests_limits):
|
|
239
|
-
futures.append(
|
|
240
|
-
loop.run_in_executor(
|
|
241
|
-
executor,
|
|
242
|
-
self.worker.process_loop_asynchronous,
|
|
243
|
-
queues,
|
|
244
|
-
scheduling_strategy,
|
|
245
|
-
stop_event,
|
|
246
|
-
requests_limit,
|
|
247
|
-
id_,
|
|
248
|
-
num_processes,
|
|
111
|
+
# Setup local run parameters, sync with the environment
|
|
112
|
+
resolved_constraints = (
|
|
113
|
+
ConstraintsInitializerFactory.resolve_constraints(constraints)
|
|
249
114
|
)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
logger.warning(
|
|
278
|
-
"No end number or end time set, "
|
|
279
|
-
"scheduler will run indefinitely until the request loader is exhausted."
|
|
280
|
-
)
|
|
281
|
-
|
|
282
|
-
info = SchedulerRunInfo(
|
|
283
|
-
start_time=scheduling_strategy.start_time,
|
|
284
|
-
end_time=end_time,
|
|
285
|
-
end_number=end_number,
|
|
286
|
-
processes=len(processes),
|
|
287
|
-
strategy=scheduling_strategy,
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
return info, requests_iter, times_iter
|
|
291
|
-
|
|
292
|
-
def _add_requests(
|
|
293
|
-
self,
|
|
294
|
-
requests_iter: Optional[Iterator[Any]],
|
|
295
|
-
requests_queue: Queue[WorkerProcessRequest[RequestT, ResponseT]],
|
|
296
|
-
times_iter: Iterator[float],
|
|
297
|
-
run_info: SchedulerRunInfo,
|
|
298
|
-
) -> Optional[Iterator[Any]]:
|
|
299
|
-
if requests_iter is not None:
|
|
300
|
-
try:
|
|
301
|
-
added_count = 0
|
|
302
|
-
|
|
303
|
-
while not requests_queue.full() and added_count < (
|
|
304
|
-
run_info.strategy.queued_requests_limit
|
|
305
|
-
or settings.min_queued_requests
|
|
306
|
-
):
|
|
307
|
-
if run_info.created_requests >= run_info.end_number:
|
|
308
|
-
raise StopIteration
|
|
309
|
-
|
|
310
|
-
if (
|
|
311
|
-
next(times_iter) >= run_info.end_time
|
|
312
|
-
or time.time() >= run_info.end_time
|
|
313
|
-
):
|
|
314
|
-
raise StopIteration
|
|
315
|
-
|
|
316
|
-
work_req = WorkerProcessRequest[RequestT, ResponseT](
|
|
317
|
-
request=next(requests_iter),
|
|
318
|
-
timeout_time=run_info.end_time,
|
|
319
|
-
queued_time=time.time(),
|
|
115
|
+
(
|
|
116
|
+
local_requests,
|
|
117
|
+
local_strategy,
|
|
118
|
+
local_constraints,
|
|
119
|
+
) = await env.sync_run_params(requests, strategy, resolved_constraints)
|
|
120
|
+
|
|
121
|
+
# Setup the worker group, sync start with the environment
|
|
122
|
+
worker_group = WorkerProcessGroup[RequestT, ResponseT](
|
|
123
|
+
requests=local_requests,
|
|
124
|
+
backend=backend,
|
|
125
|
+
strategy=local_strategy,
|
|
126
|
+
startup_duration=startup_duration,
|
|
127
|
+
**local_constraints,
|
|
128
|
+
)
|
|
129
|
+
await worker_group.create_processes()
|
|
130
|
+
local_start_time = await env.sync_run_start()
|
|
131
|
+
await worker_group.start(local_start_time)
|
|
132
|
+
|
|
133
|
+
# Yield any updates and sync with the environment for non-local updates
|
|
134
|
+
async for (
|
|
135
|
+
response,
|
|
136
|
+
request,
|
|
137
|
+
request_info,
|
|
138
|
+
state,
|
|
139
|
+
) in worker_group.request_updates():
|
|
140
|
+
await env.update_run_iteration(
|
|
141
|
+
response, request, request_info, state
|
|
320
142
|
)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
)
|
|
342
|
-
except QueueEmpty:
|
|
343
|
-
return None
|
|
344
|
-
|
|
345
|
-
if process_response.type_ == "request_scheduled":
|
|
346
|
-
run_info.queued_requests -= 1
|
|
347
|
-
run_info.scheduled_requests += 1
|
|
348
|
-
|
|
349
|
-
return SchedulerRequestResult(
|
|
350
|
-
type_="request_scheduled",
|
|
351
|
-
run_info=run_info,
|
|
352
|
-
request=process_response.request,
|
|
353
|
-
request_info=process_response.info,
|
|
354
|
-
response=None,
|
|
355
|
-
)
|
|
356
|
-
|
|
357
|
-
if process_response.type_ == "request_start":
|
|
358
|
-
run_info.scheduled_requests -= 1
|
|
359
|
-
run_info.processing_requests += 1
|
|
360
|
-
|
|
361
|
-
return SchedulerRequestResult(
|
|
362
|
-
type_="request_start",
|
|
363
|
-
run_info=run_info,
|
|
364
|
-
request=process_response.request,
|
|
365
|
-
request_info=process_response.info,
|
|
366
|
-
response=None,
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
if process_response.type_ == "request_complete":
|
|
370
|
-
run_info.processing_requests -= 1
|
|
371
|
-
run_info.completed_requests += 1
|
|
372
|
-
|
|
373
|
-
return SchedulerRequestResult(
|
|
374
|
-
type_="request_complete",
|
|
375
|
-
run_info=run_info,
|
|
376
|
-
request=process_response.request,
|
|
377
|
-
request_info=process_response.info,
|
|
378
|
-
response=process_response.response,
|
|
379
|
-
)
|
|
380
|
-
raise ValueError(f"Invalid process response type: {process_response}")
|
|
381
|
-
|
|
382
|
-
async def _stop_processes(
|
|
383
|
-
self,
|
|
384
|
-
futures: list[asyncio.Future],
|
|
385
|
-
stop_event: Event,
|
|
386
|
-
):
|
|
387
|
-
# stop all processes
|
|
388
|
-
stop_event.set()
|
|
389
|
-
|
|
390
|
-
await asyncio.gather(*futures)
|
|
143
|
+
yield response, request, request_info, state
|
|
144
|
+
except Exception as err: # noqa: BLE001
|
|
145
|
+
await env.sync_run_error(err)
|
|
146
|
+
raise err
|
|
147
|
+
finally:
|
|
148
|
+
# Ensure all worker processes are cleaned up for error or completion
|
|
149
|
+
if worker_group is not None:
|
|
150
|
+
err = await worker_group.shutdown() # type: ignore[misc]
|
|
151
|
+
if err is not None:
|
|
152
|
+
await env.sync_run_error(err)
|
|
153
|
+
|
|
154
|
+
# Ensure any errors are raised and all responses
|
|
155
|
+
# are yielded for aggregation on the primary node
|
|
156
|
+
async for (
|
|
157
|
+
dist_response,
|
|
158
|
+
dist_request,
|
|
159
|
+
dist_request_info,
|
|
160
|
+
dist_state,
|
|
161
|
+
) in env.sync_run_end():
|
|
162
|
+
yield dist_response, dist_request, dist_request_info, dist_state
|