guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -1,390 +1,162 @@
1
- import asyncio
2
- import math
3
- import time
4
- from collections.abc import AsyncGenerator, Iterable, Iterator
5
- from concurrent.futures import ProcessPoolExecutor
6
- from multiprocessing import Manager
7
- from threading import Event
8
- from typing import (
9
- Any,
10
- Generic,
11
- Optional,
12
- Union,
13
- )
1
+ """
2
+ Thread-safe singleton scheduler for distributed benchmarking workload coordination.
3
+
4
+ Orchestrates request processing across worker processes with distributed timing
5
+ coordination, constraint enforcement, and result aggregation. Integrates with
6
+ backends, environments, and strategies to enable scalable load testing across
7
+ various scenarios including LLM inference benchmarking.
8
+ """
14
9
 
15
- from loguru import logger
10
+ from __future__ import annotations
16
11
 
17
- from guidellm.config import settings
18
- from guidellm.request.types import (
12
+ from collections.abc import AsyncIterator, Iterable
13
+ from typing import Any, Generic
14
+
15
+ from guidellm.scheduler.constraints import Constraint, ConstraintsInitializerFactory
16
+ from guidellm.scheduler.environments import Environment, NonDistributedEnvironment
17
+ from guidellm.scheduler.schemas import (
18
+ BackendInterface,
19
+ MultiTurnRequestT,
19
20
  RequestT,
20
21
  ResponseT,
22
+ SchedulerState,
21
23
  )
22
- from guidellm.scheduler.queues import MPQueues, Queue, QueueEmpty
23
- from guidellm.scheduler.result import (
24
- SchedulerRequestResult,
25
- SchedulerResult,
26
- SchedulerRunInfo,
27
- WorkerProcessRequest,
28
- WorkerProcessResult,
29
- )
30
- from guidellm.scheduler.strategy import SchedulingStrategy
31
- from guidellm.scheduler.worker import (
32
- RequestsWorker,
33
- )
24
+ from guidellm.scheduler.strategies import SchedulingStrategy
25
+ from guidellm.scheduler.worker_group import WorkerProcessGroup
26
+ from guidellm.schemas import RequestInfo
27
+ from guidellm.utils.singleton import ThreadSafeSingletonMixin
34
28
 
35
29
  __all__ = ["Scheduler"]
36
30
 
37
31
 
38
- class Scheduler(Generic[RequestT, ResponseT]):
32
+ class Scheduler(
33
+ Generic[RequestT, ResponseT],
34
+ ThreadSafeSingletonMixin,
35
+ ):
39
36
  """
40
- A class that handles the scheduling of requests to a worker.
41
- This class is responsible for managing the lifecycle of the requests,
42
- including their creation, queuing, and processing.
43
- It uses a multiprocessing approach to handle requests concurrently
44
- and efficiently, based on the specified scheduling strategy.
45
- The Scheduler class is designed to work with a RequestsWorker,
46
- which is an abstract base class that defines the interface for a worker
47
- that can resolve requests asynchronously or synchronously.
48
- The Scheduler class also supports different scheduling strategies,
49
- including synchronous, throughput, and concurrent strategies.
50
-
51
- :param worker: The worker that will process the requests.
52
- This should be an instance of RequestsWorker.
53
- :param request_loader: An iterable that generates requests.
54
- This can be a list, generator, or any other iterable.
55
- The requests will be processed by the worker.
37
+ Thread-safe singleton scheduler for distributed benchmarking workload coordination.
38
+
39
+ Orchestrates request processing across worker processes with distributed timing
40
+ coordination, constraint enforcement, and result aggregation. Abstracts the
41
+ complexity of multi-process coordination, environment synchronization, and
42
+ resource management while providing a unified interface for executing benchmarking
43
+ operations. Implements singleton pattern to ensure consistent execution state.
44
+
45
+ Example:
46
+ ::
47
+ from guidellm.scheduler import Scheduler
48
+ from guidellm.scheduler import NonDistributedEnvironment, SynchronousStrategy
49
+
50
+ scheduler = Scheduler()
51
+ async for response, request, info, state in scheduler.run(
52
+ requests=request_list,
53
+ backend=backend,
54
+ strategy=SynchronousStrategy(),
55
+ env=NonDistributedEnvironment(),
56
+ max_requests=1000
57
+ ):
58
+ print(f"Processed: {request}")
56
59
  """
57
60
 
58
- def __init__(
59
- self,
60
- worker: RequestsWorker[RequestT, ResponseT],
61
- request_loader: Iterable[RequestT],
62
- ):
63
- if not isinstance(worker, RequestsWorker):
64
- raise ValueError(f"Invalid worker: {worker}")
65
-
66
- if not isinstance(request_loader, Iterable):
67
- raise ValueError(f"Invalid request_loader: {request_loader}")
68
-
69
- self.worker = worker
70
- self.request_loader = request_loader
71
-
72
61
  async def run(
73
62
  self,
74
- scheduling_strategy: SchedulingStrategy,
75
- max_number: Optional[int] = None,
76
- max_duration: Optional[float] = None,
77
- ) -> AsyncGenerator[
78
- Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
63
+ requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
64
+ backend: BackendInterface[RequestT, ResponseT],
65
+ strategy: SchedulingStrategy,
66
+ startup_duration: float,
67
+ env: Environment[RequestT, ResponseT] | None,
68
+ **constraints: Any | dict[str, Any] | Constraint,
69
+ ) -> AsyncIterator[
70
+ tuple[
71
+ ResponseT | None,
72
+ RequestT,
73
+ RequestInfo,
74
+ SchedulerState,
75
+ ]
79
76
  ]:
80
77
  """
81
- The main method that runs the scheduler.
82
- This method is a generator that yields SchedulerResult objects
83
- at the start and end of the run, as well as at the start and end
84
- of each request.
85
- It uses multiprocessing to handle requests concurrently
86
- and efficiently, based on the specified scheduling strategy.
87
- The method also handles the lifecycle of the requests,
88
- including their creation, queuing, and processing.
89
- The method is designed to be used as an asynchronous generator,
90
- allowing it to be used with asyncio and other asynchronous frameworks.
91
-
92
- :param scheduling_strategy: The scheduling strategy to use.
93
- Specifies the times at which requests will be sent as well how many
94
- worker processes are used and if requests are scheduled sync or async.
95
- This can be one of the following:
96
- - "synchronous": Requests are sent synchronously.
97
- - "throughput": Requests are sent at the maximum rate possible.
98
- - An instance of SchedulingStrategy.
99
- :param max_number: The maximum number of requests to process.
100
- If None, then no limit is set and either the iterator must be exhaustible
101
- or the max_duration must be set.
102
- :param max_duration: The maximum duration for the scheduling run.
103
- If None, then no limit is set and either the iterator must be exhaustible
104
- or the max_number must be set.
105
- :return: An asynchronous generator that yields SchedulerResult objects.
106
- Each SchedulerResult object contains information about the request,
107
- the response, and the run information.
78
+ Execute distributed request processing with coordinated timing and constraints.
79
+
80
+ Orchestrates the complete benchmarking workflow across worker processes with
81
+ environment synchronization, constraint enforcement, and error handling. Manages
82
+ resource lifecycle from initialization through cleanup while yielding real-time
83
+ processing updates for monitoring and aggregation.
84
+
85
+ :param requests: Request collection to process, supporting single requests or
86
+ multi-turn sequences with optional inter-request delays
87
+ :param backend: Backend interface for request processing and response generation
88
+ :param strategy: Scheduling strategy controlling request timing and distribution
89
+ :param startup_duration: Duration in seconds for requests to ramp up
90
+ :param env: Environment interface for distributed coordination and
91
+ synchronization. Defaults to NonDistributedEnvironment if None
92
+ :param constraints: Runtime constraints for execution control (max_requests,
93
+ max_duration, max_error_rate, etc.) as primitives, dictionaries, or
94
+ constraint instances
95
+ :yields: Request updates as (response, request, request_info, scheduler_state)
96
+ tuples. Each request generates three ordered updates: queued, in_progress,
97
+ completed | errored | cancelled
98
+ :raises Exception: Worker process errors, environment synchronization failures,
99
+ or constraint evaluation errors are propagated after cleanup
108
100
  """
109
- if scheduling_strategy is None or not isinstance(
110
- scheduling_strategy, SchedulingStrategy
111
- ):
112
- raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}")
113
-
114
- if max_number is not None and max_number < 1:
115
- raise ValueError(f"Invalid max_number: {max_number}")
101
+ with self.thread_lock:
102
+ if env is None:
103
+ env = NonDistributedEnvironment[RequestT, ResponseT]()
116
104
 
117
- if max_duration is not None and max_duration < 0:
118
- raise ValueError(f"Invalid max_duration: {max_duration}")
119
-
120
- with (
121
- Manager() as manager,
122
- ProcessPoolExecutor(
123
- max_workers=scheduling_strategy.processes_limit
124
- ) as executor,
125
- ):
126
- requests_iter: Optional[Iterator[Any]] = None
127
- scheduling_strategy.start_time = (
128
- time.time() + settings.scheduler_start_delay
129
- ) # Add a small delay to allow processes to start
130
- futures, queues, stop_event = await self._start_processes(
131
- manager, executor, scheduling_strategy
132
- )
133
- run_info, requests_iter, times_iter = self._run_setup(
134
- futures, scheduling_strategy, max_number, max_duration
135
- )
136
-
137
- # Add some initial requests to the queue
138
- requests_iter = self._add_requests(
139
- requests_iter,
140
- queues.requests,
141
- times_iter,
142
- run_info,
143
- )
144
- # Wait for the test to start
145
- await asyncio.sleep(time.time() - scheduling_strategy.start_time)
146
- yield SchedulerResult(
147
- type_="run_start",
148
- run_info=run_info,
149
- )
105
+ worker_group: WorkerProcessGroup[RequestT, ResponseT] | None = None
150
106
 
107
+ # Any issues during the run will raise an error (local or remote),
108
+ # be caught and passed to the environment,
109
+ # and will ensure clean up before raising the error.
151
110
  try:
152
- while True:
153
- # check errors and raise them
154
- for future in futures:
155
- if future.done() and (err := future.exception()) is not None:
156
- raise err
157
-
158
- if (
159
- requests_iter is None
160
- and run_info.processing_requests <= 0
161
- and ( # Ensure we have met one of the end conditions
162
- time.time() >= run_info.end_time
163
- or run_info.completed_requests >= run_info.end_number
164
- )
165
- ):
166
- # we've exhausted all requests we've wanted to run
167
- # and yielded all responses
168
- break
169
-
170
- requests_iter = self._add_requests(
171
- requests_iter,
172
- queues.requests,
173
- times_iter,
174
- run_info,
175
- )
176
- await asyncio.sleep(0) # enable requests to start
177
-
178
- iter_result = self._check_result_ready(
179
- queues.responses,
180
- run_info,
181
- )
182
- if iter_result is not None:
183
- yield iter_result
184
-
185
- # yield control to the event loop
186
- await asyncio.sleep(settings.default_async_loop_sleep)
187
- except Exception as err:
188
- raise RuntimeError(f"Scheduler run failed: {err}") from err
189
-
190
- yield SchedulerResult(
191
- type_="run_complete",
192
- run_info=run_info,
193
- )
194
-
195
- await self._stop_processes(futures, stop_event)
196
-
197
- async def _start_processes(
198
- self,
199
- manager,
200
- executor: ProcessPoolExecutor,
201
- scheduling_strategy: SchedulingStrategy,
202
- ) -> tuple[
203
- list[asyncio.Future],
204
- MPQueues[RequestT, ResponseT],
205
- Event,
206
- ]:
207
- await self.worker.prepare_multiprocessing()
208
- queues: MPQueues[RequestT, ResponseT] = MPQueues(
209
- requests=manager.Queue(
210
- maxsize=scheduling_strategy.processing_requests_limit
211
- ),
212
- responses=manager.Queue(),
213
- )
214
- stop_event = manager.Event()
215
-
216
- num_processes = min(
217
- scheduling_strategy.processes_limit,
218
- scheduling_strategy.processing_requests_limit,
219
- )
220
- requests_limit_split = (
221
- scheduling_strategy.processing_requests_limit
222
- // scheduling_strategy.processes_limit
223
- )
224
- requests_limit_remain = (
225
- scheduling_strategy.processing_requests_limit
226
- % scheduling_strategy.processes_limit
227
- )
228
- process_ids = (id_ for id_ in range(num_processes))
229
- process_requests_limits = (
230
- requests_limit_split + 1
231
- if i < requests_limit_remain
232
- else requests_limit_split
233
- for i in range(num_processes)
234
- )
235
-
236
- futures = []
237
- loop = asyncio.get_event_loop()
238
- for id_, requests_limit in zip(process_ids, process_requests_limits):
239
- futures.append(
240
- loop.run_in_executor(
241
- executor,
242
- self.worker.process_loop_asynchronous,
243
- queues,
244
- scheduling_strategy,
245
- stop_event,
246
- requests_limit,
247
- id_,
248
- num_processes,
111
+ # Setup local run parameters, sync with the environment
112
+ resolved_constraints = (
113
+ ConstraintsInitializerFactory.resolve_constraints(constraints)
249
114
  )
250
- )
251
-
252
- await asyncio.sleep(0.1) # give time for processes to start
253
-
254
- return futures, queues, stop_event
255
-
256
- def _run_setup(
257
- self,
258
- processes: list[asyncio.Future],
259
- scheduling_strategy: SchedulingStrategy,
260
- max_number: Optional[int],
261
- max_duration: Optional[float],
262
- ) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
263
- requests_iter = iter(self.request_loader)
264
- times_iter = iter(scheduling_strategy.request_times())
265
- end_time = scheduling_strategy.start_time + (max_duration or math.inf)
266
- end_number = max_number or math.inf
267
-
268
- try:
269
- # update end number if the request loader is finite and less than max
270
- iter_length = len(self.request_loader) # type: ignore[arg-type]
271
- if 0 < iter_length < end_number:
272
- end_number = iter_length
273
- except Exception: # noqa: BLE001, S110
274
- pass
275
-
276
- if end_number == math.inf and end_time is None:
277
- logger.warning(
278
- "No end number or end time set, "
279
- "scheduler will run indefinitely until the request loader is exhausted."
280
- )
281
-
282
- info = SchedulerRunInfo(
283
- start_time=scheduling_strategy.start_time,
284
- end_time=end_time,
285
- end_number=end_number,
286
- processes=len(processes),
287
- strategy=scheduling_strategy,
288
- )
289
-
290
- return info, requests_iter, times_iter
291
-
292
- def _add_requests(
293
- self,
294
- requests_iter: Optional[Iterator[Any]],
295
- requests_queue: Queue[WorkerProcessRequest[RequestT, ResponseT]],
296
- times_iter: Iterator[float],
297
- run_info: SchedulerRunInfo,
298
- ) -> Optional[Iterator[Any]]:
299
- if requests_iter is not None:
300
- try:
301
- added_count = 0
302
-
303
- while not requests_queue.full() and added_count < (
304
- run_info.strategy.queued_requests_limit
305
- or settings.min_queued_requests
306
- ):
307
- if run_info.created_requests >= run_info.end_number:
308
- raise StopIteration
309
-
310
- if (
311
- next(times_iter) >= run_info.end_time
312
- or time.time() >= run_info.end_time
313
- ):
314
- raise StopIteration
315
-
316
- work_req = WorkerProcessRequest[RequestT, ResponseT](
317
- request=next(requests_iter),
318
- timeout_time=run_info.end_time,
319
- queued_time=time.time(),
115
+ (
116
+ local_requests,
117
+ local_strategy,
118
+ local_constraints,
119
+ ) = await env.sync_run_params(requests, strategy, resolved_constraints)
120
+
121
+ # Setup the worker group, sync start with the environment
122
+ worker_group = WorkerProcessGroup[RequestT, ResponseT](
123
+ requests=local_requests,
124
+ backend=backend,
125
+ strategy=local_strategy,
126
+ startup_duration=startup_duration,
127
+ **local_constraints,
128
+ )
129
+ await worker_group.create_processes()
130
+ local_start_time = await env.sync_run_start()
131
+ await worker_group.start(local_start_time)
132
+
133
+ # Yield any updates and sync with the environment for non-local updates
134
+ async for (
135
+ response,
136
+ request,
137
+ request_info,
138
+ state,
139
+ ) in worker_group.request_updates():
140
+ await env.update_run_iteration(
141
+ response, request, request_info, state
320
142
  )
321
- requests_queue.put(work_req)
322
-
323
- run_info.created_requests += 1
324
- run_info.queued_requests += 1
325
- added_count += 1
326
- except StopIteration:
327
- # we've reached the limit number, limit time, or exhausted the requests
328
- # set to None to stop adding more and tell the loop no more requests
329
- requests_iter = None
330
-
331
- return requests_iter
332
-
333
- def _check_result_ready(
334
- self,
335
- responses_queue: Queue[WorkerProcessResult[RequestT, ResponseT]],
336
- run_info: SchedulerRunInfo,
337
- ) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
338
- try:
339
- process_response: WorkerProcessResult[RequestT, ResponseT] = (
340
- responses_queue.get_nowait()
341
- )
342
- except QueueEmpty:
343
- return None
344
-
345
- if process_response.type_ == "request_scheduled":
346
- run_info.queued_requests -= 1
347
- run_info.scheduled_requests += 1
348
-
349
- return SchedulerRequestResult(
350
- type_="request_scheduled",
351
- run_info=run_info,
352
- request=process_response.request,
353
- request_info=process_response.info,
354
- response=None,
355
- )
356
-
357
- if process_response.type_ == "request_start":
358
- run_info.scheduled_requests -= 1
359
- run_info.processing_requests += 1
360
-
361
- return SchedulerRequestResult(
362
- type_="request_start",
363
- run_info=run_info,
364
- request=process_response.request,
365
- request_info=process_response.info,
366
- response=None,
367
- )
368
-
369
- if process_response.type_ == "request_complete":
370
- run_info.processing_requests -= 1
371
- run_info.completed_requests += 1
372
-
373
- return SchedulerRequestResult(
374
- type_="request_complete",
375
- run_info=run_info,
376
- request=process_response.request,
377
- request_info=process_response.info,
378
- response=process_response.response,
379
- )
380
- raise ValueError(f"Invalid process response type: {process_response}")
381
-
382
- async def _stop_processes(
383
- self,
384
- futures: list[asyncio.Future],
385
- stop_event: Event,
386
- ):
387
- # stop all processes
388
- stop_event.set()
389
-
390
- await asyncio.gather(*futures)
143
+ yield response, request, request_info, state
144
+ except Exception as err: # noqa: BLE001
145
+ await env.sync_run_error(err)
146
+ raise err
147
+ finally:
148
+ # Ensure all worker processes are cleaned up for error or completion
149
+ if worker_group is not None:
150
+ err = await worker_group.shutdown() # type: ignore[misc]
151
+ if err is not None:
152
+ await env.sync_run_error(err)
153
+
154
+ # Ensure any errors are raised and all responses
155
+ # are yielded for aggregation on the primary node
156
+ async for (
157
+ dist_response,
158
+ dist_request,
159
+ dist_request_info,
160
+ dist_state,
161
+ ) in env.sync_run_end():
162
+ yield dist_response, dist_request, dist_request_info, dist_state