guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,707 @@
1
+ """
2
+ Multi-process worker group orchestration for distributed request scheduling.
3
+
4
+ Provides infrastructure for coordinating worker processes with shared state
5
+ management, inter-process communication, and lifecycle coordination. Handles
6
+ dynamic scaling, load balancing, constraint evaluation, and graceful shutdown
7
+ across distributed workers processing concurrent requests.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import math
14
+ import threading
15
+ import time
16
+ import uuid
17
+ from collections.abc import AsyncIterator, Generator, Iterable
18
+ from multiprocessing import get_context
19
+ from multiprocessing.context import BaseContext
20
+ from multiprocessing.managers import BaseManager
21
+ from multiprocessing.process import BaseProcess
22
+ from multiprocessing.synchronize import Barrier, Event
23
+ from typing import Generic, NamedTuple
24
+
25
+ from guidellm.logger import logger
26
+ from guidellm.scheduler.constraints import Constraint, RequestsExhaustedConstraint
27
+ from guidellm.scheduler.schemas import (
28
+ BackendInterface,
29
+ MultiTurnRequestT,
30
+ RequestT,
31
+ ResponseT,
32
+ SchedulerState,
33
+ SchedulerUpdateAction,
34
+ )
35
+ from guidellm.scheduler.strategies import SchedulingStrategy
36
+ from guidellm.scheduler.worker import WorkerProcess
37
+ from guidellm.schemas import RequestInfo
38
+ from guidellm.settings import settings
39
+ from guidellm.utils import (
40
+ InterProcessMessaging,
41
+ InterProcessMessagingManagerQueue,
42
+ InterProcessMessagingPipe,
43
+ InterProcessMessagingQueue,
44
+ wait_for_sync_objects,
45
+ )
46
+
47
+ __all__ = ["WorkerGroupState", "WorkerProcessGroup"]
48
+
49
+
50
+ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
51
+ """
52
+ Orchestrates multiple worker processes for distributed request processing.
53
+
54
+ Manages process lifecycle, request distribution, response collection, and state
55
+ synchronization across workers. Handles dynamic scaling, load balancing, and
56
+ constraint evaluation with graceful shutdown coordination for high-throughput
57
+ request processing workloads.
58
+
59
+ Example:
60
+ ::
61
+ from guidellm.scheduler.worker_group import WorkerProcessGroup
62
+
63
+ group = WorkerProcessGroup(
64
+ requests=request_iterable,
65
+ backend=backend_instance,
66
+ strategy=scheduling_strategy,
67
+ constraints={"max_time": time_constraint}
68
+ )
69
+
70
+ await group.create_processes()
71
+ await group.start(time.time())
72
+
73
+ async for response, request, info, state in group.request_updates():
74
+ if response is not None:
75
+ # Process completed request
76
+ handle_response(response)
77
+
78
+ await group.shutdown()
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
84
+ backend: BackendInterface[RequestT, ResponseT],
85
+ strategy: SchedulingStrategy,
86
+ startup_duration: float,
87
+ **constraints: dict[str, Constraint],
88
+ ):
89
+ """
90
+ Initialize a worker process group for distributed request processing.
91
+
92
+ :param requests: Finite iterable of requests to process sequentially
93
+ :param backend: Backend interface for processing requests
94
+ :param strategy: Scheduling strategy for request timing and distribution
95
+ :param startup_duration: Duration in seconds for request startup ramping
96
+ :param constraints: Named constraints for controlling execution behavior
97
+ """
98
+ self.requests = requests
99
+ self.backend = backend
100
+ self.strategy = strategy
101
+ self.startup_duration = startup_duration
102
+ self.constraints = constraints
103
+
104
+ # Multiprocessing contexts and primitives, created in create_processes
105
+ self.mp_context: BaseContext | None = None
106
+ self.mp_manager: BaseManager | None = None
107
+ self.processes: list[BaseProcess] | None = None
108
+ self.startup_barrier: Barrier | None = None
109
+ self.requests_generated_event: Event | None = None
110
+ self.constraint_reached_event: Event | None = None
111
+ self.shutdown_event: Event | None = None
112
+ self.error_event: Event | None = None
113
+
114
+ # Scheduler and messaging state, created in start
115
+ self.state: WorkerGroupState[RequestT, ResponseT] | None = None
116
+ self.messaging: (
117
+ InterProcessMessaging[
118
+ tuple[
119
+ RequestT | MultiTurnRequestT[RequestT],
120
+ RequestInfo,
121
+ ],
122
+ tuple[
123
+ ResponseT | None,
124
+ RequestT | MultiTurnRequestT[RequestT],
125
+ RequestInfo,
126
+ SchedulerState,
127
+ ],
128
+ ]
129
+ | None
130
+ ) = None
131
+
132
+ async def create_processes(self):
133
+ """
134
+ Create and initialize worker processes for distributed request processing.
135
+
136
+ Sets up multiprocessing infrastructure and worker processes based on
137
+ strategy constraints, backend capabilities, and system configuration.
138
+ Determines optimal process count and concurrency limits, then spawns
139
+ worker processes with distributed request handling capabilities.
140
+
141
+ :raises RuntimeError: If process initialization or startup fails
142
+ """
143
+ # Processes limits and params
144
+ max_conc: int
145
+ if (
146
+ requests_limit := min(
147
+ self.strategy.requests_limit or math.inf,
148
+ self.backend.requests_limit or math.inf,
149
+ )
150
+ ) != math.inf:
151
+ max_conc = int(requests_limit)
152
+ else:
153
+ # If concurrency not specified, use settings
154
+ max_conc = settings.max_concurrency
155
+ if max_conc <= 0:
156
+ raise RuntimeError("max_concurrency resolved to 0; increase limits/config")
157
+
158
+ # Calculate number of processes, ensure we don't exceed the max concurrency,
159
+ # or limits from the backend, strategy, or user settings
160
+ num_processes: int = int(
161
+ min(
162
+ max_conc,
163
+ self.strategy.processes_limit or math.inf,
164
+ self.backend.processes_limit or math.inf,
165
+ settings.max_worker_processes,
166
+ )
167
+ )
168
+ if num_processes <= 0:
169
+ raise RuntimeError("num_processes resolved to 0; increase limits/config")
170
+
171
+ per_proc_max_conc = max_conc // num_processes
172
+ max_pending_size = max(
173
+ 1, math.floor(max_conc * settings.mp_max_pending_buffer_percent)
174
+ )
175
+ per_proc_max_buffer_size = 1
176
+
177
+ # Initialize multiprocessing components
178
+ self.mp_context = get_context(settings.mp_context_type)
179
+ self.mp_manager = self.mp_context.Manager()
180
+ self.startup_barrier = self.mp_context.Barrier(num_processes + 1)
181
+ self.requests_generated_event = self.mp_context.Event()
182
+ self.constraint_reached_event = self.mp_context.Event()
183
+ self.shutdown_event = self.mp_context.Event()
184
+ self.error_event = self.mp_context.Event()
185
+
186
+ if settings.mp_messaging_object == "queue":
187
+ self.messaging = InterProcessMessagingQueue(
188
+ mp_context=self.mp_context,
189
+ serialization=settings.mp_serialization,
190
+ encoding=settings.mp_encoding,
191
+ max_pending_size=max_pending_size,
192
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
193
+ poll_interval=settings.mp_poll_interval,
194
+ )
195
+ elif settings.mp_messaging_object == "manager_queue":
196
+ self.messaging = InterProcessMessagingManagerQueue(
197
+ manager=self.mp_manager,
198
+ mp_context=self.mp_context,
199
+ serialization=settings.mp_serialization,
200
+ encoding=settings.mp_encoding,
201
+ max_pending_size=max_pending_size,
202
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
203
+ poll_interval=settings.mp_poll_interval,
204
+ )
205
+ elif settings.mp_messaging_object == "pipe":
206
+ self.messaging = InterProcessMessagingPipe(
207
+ num_workers=num_processes,
208
+ mp_context=self.mp_context,
209
+ serialization=settings.mp_serialization,
210
+ encoding=settings.mp_encoding,
211
+ max_pending_size=max_pending_size,
212
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
213
+ poll_interval=settings.mp_poll_interval,
214
+ )
215
+
216
+ # Initialize worker processes
217
+ self.processes = []
218
+ self.strategy.init_processes_timings(
219
+ worker_count=num_processes,
220
+ max_concurrency=max_conc,
221
+ startup_duration=self.startup_duration,
222
+ )
223
+ for rank in range(num_processes):
224
+ # Distribute any remainder across the first N ranks
225
+ async_limit = per_proc_max_conc + (
226
+ 1 if rank < (max_conc % num_processes) else 0
227
+ )
228
+
229
+ worker = WorkerProcess[RequestT, ResponseT](
230
+ worker_index=rank,
231
+ messaging=self.messaging.create_worker_copy(
232
+ worker_index=rank,
233
+ max_buffer_send_size=None,
234
+ max_buffer_receive_size=per_proc_max_buffer_size,
235
+ ),
236
+ backend=self.backend,
237
+ strategy=self.strategy,
238
+ async_limit=async_limit,
239
+ fut_scheduling_time_limit=0.0,
240
+ startup_barrier=self.startup_barrier,
241
+ requests_generated_event=self.requests_generated_event,
242
+ constraint_reached_event=self.constraint_reached_event,
243
+ shutdown_event=self.shutdown_event,
244
+ error_event=self.error_event,
245
+ )
246
+ proc = self.mp_context.Process(target=worker.run, daemon=False)
247
+ proc.start()
248
+ self.processes.append(proc)
249
+
250
+ wait_key = await wait_for_sync_objects(
251
+ {
252
+ "startup_barrier": self.startup_barrier,
253
+ "shutdown_event": self.shutdown_event,
254
+ "error_event": self.error_event,
255
+ },
256
+ poll_interval=settings.mp_poll_interval,
257
+ )
258
+
259
+ if wait_key == "error_event":
260
+ raise RuntimeError(
261
+ "Worker process group startup failed: error_event is set"
262
+ )
263
+
264
+ async def start(self, start_time: float):
265
+ """
266
+ Begin request processing at the specified start time.
267
+
268
+ Initializes scheduler state and background tasks, then waits until the
269
+ specified start time before beginning operations. Sets up inter-process
270
+ communication and coordinates synchronized startup across all workers.
271
+
272
+ :param start_time: Unix timestamp when processing should begin
273
+ :raises RuntimeError: If workers encounter errors during startup or
274
+ if create_processes() was not called first
275
+ """
276
+ if (
277
+ not self.processes
278
+ or not self.requests_generated_event
279
+ or not self.constraint_reached_event
280
+ or not self.shutdown_event
281
+ or not self.error_event
282
+ or not self.messaging
283
+ ):
284
+ raise RuntimeError("create_processes() must be called before start()")
285
+
286
+ self.strategy.init_processes_start(start_time=start_time)
287
+ stop_send_requests_event = threading.Event()
288
+ send_requests_stopped_event = threading.Event()
289
+ self.state = WorkerGroupState[RequestT, ResponseT](
290
+ start_time=start_time,
291
+ processes=self.processes,
292
+ constraints=self.constraints,
293
+ stop_send_requests_event=stop_send_requests_event,
294
+ send_requests_stopped_event=send_requests_stopped_event,
295
+ requests_generated_event=self.requests_generated_event,
296
+ constraint_reached_event=self.constraint_reached_event,
297
+ shutdown_event=self.shutdown_event,
298
+ error_event=self.error_event,
299
+ messaging=self.messaging,
300
+ )
301
+ await self.messaging.start(
302
+ send_items=self.state.requests_generator(self.requests),
303
+ receive_callback=self.state.received_callback,
304
+ send_stopped_event=send_requests_stopped_event,
305
+ send_stop_criteria=[stop_send_requests_event],
306
+ receive_stop_criteria=[self.shutdown_event],
307
+ )
308
+
309
+ if (wait_time := start_time - time.time()) > 0:
310
+ await asyncio.sleep(wait_time)
311
+ if self.error_event.is_set():
312
+ raise RuntimeError(
313
+ "error_event is set in WorkerProcessGroup, "
314
+ "indicating an error occurred in one of the worker processes."
315
+ )
316
+
317
+ async def request_updates(
318
+ self,
319
+ ) -> AsyncIterator[
320
+ tuple[
321
+ ResponseT | None,
322
+ RequestT | MultiTurnRequestT[RequestT],
323
+ RequestInfo,
324
+ SchedulerState,
325
+ ]
326
+ ]:
327
+ """
328
+ Yield request processing updates as they become available.
329
+
330
+ Returns an async iterator of request updates including response, request,
331
+ request scheduling info, and scheduler state. Updates occur on request queued,
332
+ processing start, and completion. Response is None until processing completes.
333
+
334
+ :return: Async iterator yielding (response, request, request_info, state)
335
+ tuples where response is None until processing is complete
336
+ :raises RuntimeError: If workers encounter unrecoverable errors
337
+ """
338
+ while True:
339
+ if self.error_event.is_set(): # type: ignore[union-attr]
340
+ logger.error("Error event set in WorkerProcessGroup")
341
+ raise RuntimeError(
342
+ "error_event is set in WorkerProcessGroup, "
343
+ "indicating an error occurred in one of the worker processes."
344
+ )
345
+
346
+ try:
347
+ (
348
+ response,
349
+ request,
350
+ request_info,
351
+ scheduler_state,
352
+ ) = await self.messaging.get(timeout=settings.mp_poll_interval) # type: ignore[union-attr]
353
+
354
+ yield response, request, request_info, scheduler_state
355
+ except asyncio.TimeoutError:
356
+ if self.shutdown_event.is_set(): # type: ignore[union-attr]
357
+ # Everything yielded, exit
358
+ break
359
+
360
+ async def shutdown(self) -> list[Exception]: # noqa: C901
361
+ """
362
+ Gracefully shut down the worker process group and clean up resources.
363
+
364
+ Performs safe shutdown of worker processes, background tasks, and
365
+ multiprocessing resources. Coordinates orderly termination across
366
+ all workers and collects any exceptions encountered during shutdown.
367
+
368
+ :return: List of exceptions encountered during shutdown; empty if no errors
369
+ """
370
+ exceptions: list[Exception] = []
371
+ if self.shutdown_event is not None:
372
+ self.shutdown_event.set()
373
+
374
+ # Clear out start values
375
+ if self.messaging is not None:
376
+ try:
377
+ await asyncio.wait_for(self.messaging.stop(), timeout=5.0)
378
+ except Exception as err: # noqa: BLE001
379
+ exceptions.append(err)
380
+ self.messaging = None
381
+ self.state = None
382
+
383
+ # Clear out create processes values
384
+ if self.processes is not None:
385
+ for proc in self.processes:
386
+ try:
387
+ await asyncio.to_thread(proc.join, timeout=5.0)
388
+ if proc.exitcode is not None and proc.exitcode > 0:
389
+ exceptions.append(
390
+ RuntimeError(
391
+ f"Worker {proc.pid} exited with code {proc.exitcode}"
392
+ )
393
+ )
394
+ except Exception as err: # noqa: BLE001
395
+ exceptions.append(err)
396
+ self.processes = None
397
+ self.startup_barrier = None
398
+ self.requests_generated_event = None
399
+ self.constraint_reached_event = None
400
+ self.shutdown_event = None
401
+ self.error_event = None
402
+ if self.mp_manager is not None:
403
+ try:
404
+ self.mp_manager.shutdown()
405
+ except Exception as err: # noqa: BLE001
406
+ exceptions.append(err)
407
+ self.mp_manager = None
408
+ self.mp_context = None
409
+
410
+ return exceptions
411
+
412
+
413
+ class _StateUpdate(NamedTuple):
414
+ """Internal state update result with control flags."""
415
+
416
+ state: SchedulerState
417
+ stop_queueing: bool
418
+ stop_processing: bool
419
+
420
+
421
+ class WorkerGroupState(Generic[RequestT, ResponseT]):
422
+ """
423
+ Manages scheduler state and synchronization for worker process groups.
424
+
425
+ Handles request generation, state updates, constraint evaluation, and
426
+ coordination between worker processes. Provides thread-safe state management
427
+ with request lifecycle tracking and constraint-based termination logic.
428
+ """
429
+
430
+ def __init__(
431
+ self,
432
+ start_time: float,
433
+ processes: list[BaseProcess],
434
+ constraints: dict[str, Constraint],
435
+ stop_send_requests_event: threading.Event,
436
+ send_requests_stopped_event: threading.Event,
437
+ requests_generated_event: Event,
438
+ constraint_reached_event: Event,
439
+ shutdown_event: Event,
440
+ error_event: Event,
441
+ messaging: InterProcessMessaging[
442
+ tuple[RequestT | MultiTurnRequestT[RequestT], RequestInfo],
443
+ tuple[
444
+ ResponseT | None,
445
+ RequestT | MultiTurnRequestT[RequestT],
446
+ RequestInfo,
447
+ SchedulerState,
448
+ ],
449
+ ],
450
+ ):
451
+ """
452
+ Initialize worker group state management.
453
+
454
+ :param start_time: Unix timestamp when processing should begin
455
+ :param processes: List of worker process instances
456
+ :param constraints: Named constraints for controlling execution behavior
457
+ :param stop_send_requests_event: Threading event for stopping request generation
458
+ :param send_requests_stopped_event: Threading event for request coordination
459
+ :param requests_generated_event: Multiprocessing event for generation completion
460
+ :param constraint_reached_event: Multiprocessing event for constraint stopping
461
+ :param shutdown_event: Multiprocessing event for coordinated shutdown
462
+ :param error_event: Multiprocessing event for error condition signaling
463
+ """
464
+ self.start_time = start_time
465
+ self.processes = processes
466
+ self.constraints = constraints
467
+ self.stop_send_requests_event = stop_send_requests_event
468
+ self.send_requests_stopped_event = send_requests_stopped_event
469
+ self.requests_generated_event = requests_generated_event
470
+ self.constraint_reached_event = constraint_reached_event
471
+ self.shutdown_event = shutdown_event
472
+ self.error_event = error_event
473
+ self.messaging = messaging
474
+
475
+ self._update_lock: threading.Lock = threading.Lock()
476
+ self._state: SchedulerState = SchedulerState(
477
+ node_id=0,
478
+ num_processes=len(processes),
479
+ start_time=start_time,
480
+ )
481
+ self._queued_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
482
+ self._pending_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
483
+ self._processing_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
484
+
485
+ def requests_generator(
486
+ self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
487
+ ) -> Generator[
488
+ tuple[RequestT | MultiTurnRequestT[RequestT], RequestInfo], None, None
489
+ ]:
490
+ """
491
+ Generate request-info pairs for worker processing with constraint evaluation.
492
+
493
+ Processes finite requests sequentially then cycles through repeating requests
494
+ indefinitely. Creates scheduling metadata for each request and evaluates
495
+ constraints to determine when to stop request generation.
496
+
497
+ :param requests: Finite iterable of requests to process sequentially
498
+ :return: Generator yielding (request, request_info) tuples
499
+ """
500
+
501
+ try:
502
+ count = 0
503
+ for request in iter(requests):
504
+ count += 1
505
+
506
+ if hasattr(request, "request_id"):
507
+ request_id = request.request_id
508
+ elif hasattr(request, "id"):
509
+ request_id = request.id
510
+ else:
511
+ request_id = str(uuid.uuid4())
512
+ request_info: RequestInfo = RequestInfo(
513
+ request_id=request_id,
514
+ status="queued",
515
+ scheduler_process_id=0,
516
+ scheduler_start_time=self.start_time,
517
+ )
518
+ state_update = self._locked_update(request_info)
519
+ request_info.timings.queued = time.time()
520
+ self.messaging.buffer_receive_queue.sync_put(
521
+ (None, request, request_info, state_update.state)
522
+ )
523
+
524
+ yield (request, request_info)
525
+
526
+ if state_update.stop_queueing:
527
+ self.stop_send_requests_event.set()
528
+ return
529
+
530
+ # Reached the end, inject a RequestsExhaustedConstraint to record
531
+ self._locked_update(
532
+ info=None,
533
+ requests_exhausted={
534
+ "requests_exhausted": RequestsExhaustedConstraint(
535
+ num_requests=count
536
+ )
537
+ },
538
+ )
539
+ self.stop_send_requests_event.set()
540
+ except Exception as err:
541
+ logger.error(f"Error generating requests: {err}")
542
+ self.error_event.set()
543
+ raise err
544
+
545
+ def received_callback(
546
+ self,
547
+ update: tuple[
548
+ ResponseT | None,
549
+ RequestT | MultiTurnRequestT,
550
+ RequestInfo,
551
+ ],
552
+ ) -> tuple[
553
+ ResponseT | None,
554
+ RequestT | MultiTurnRequestT,
555
+ RequestInfo,
556
+ SchedulerState,
557
+ ]:
558
+ """
559
+ Process received request updates and inject current scheduler state.
560
+
561
+ Updates internal state tracking based on request status changes and
562
+ evaluates constraints to determine if processing should be terminated.
563
+ Triggers shutdown when stop conditions are met.
564
+
565
+ :param update: Tuple containing response, request, and request info
566
+ :return: Updated tuple with injected scheduler state
567
+ """
568
+ try:
569
+ response, request, request_info = update
570
+ state_update = self._locked_update(info=request_info)
571
+
572
+ # Check if we need to tell workers to stop pulling new requests
573
+ # based on no more requests sent and all requests removed from queue
574
+ if (
575
+ state_update.state.queued_requests == 0
576
+ and self.stop_send_requests_event.is_set()
577
+ and not self.requests_generated_event.is_set()
578
+ ):
579
+ self.requests_generated_event.set()
580
+
581
+ # Check if we need to tell workers to stop processing requests (constraints)
582
+ if (
583
+ state_update.stop_processing
584
+ and not self.constraint_reached_event.is_set()
585
+ ):
586
+ self.constraint_reached_event.set()
587
+
588
+ # Check if all requests have been processed and can shutdown
589
+ if (
590
+ state_update.state.processed_requests
591
+ == state_update.state.created_requests
592
+ and self.stop_send_requests_event.is_set()
593
+ and self.requests_generated_event.is_set()
594
+ and self.constraint_reached_event.is_set()
595
+ and not self.shutdown_event.is_set()
596
+ ):
597
+ self.shutdown_event.set()
598
+ except Exception as err:
599
+ logger.error(f"Error processing received update: {err}")
600
+ self.error_event.set()
601
+ raise err
602
+
603
+ return (
604
+ response,
605
+ request,
606
+ request_info,
607
+ state_update.state, # inject state for updates to be yielded back
608
+ )
609
+
610
+ def _locked_update(
611
+ self,
612
+ info: RequestInfo | None = None,
613
+ **add_constraints: dict[str, Constraint],
614
+ ) -> _StateUpdate:
615
+ with self._update_lock:
616
+ if add_constraints:
617
+ self.constraints.update(add_constraints)
618
+
619
+ if info is not None:
620
+ self._state.end_time = time.time() # Always update in case last update
621
+ self._update_state_request_counts(info)
622
+ self._update_with_constraints(info)
623
+
624
+ state_copy: SchedulerState = self._state.model_copy()
625
+
626
+ return _StateUpdate(
627
+ state_copy,
628
+ state_copy.end_queuing_time is not None,
629
+ state_copy.end_processing_time is not None,
630
+ )
631
+
632
+ def _update_state_request_counts(self, info: RequestInfo):
633
+ if info.status == "queued":
634
+ self._queued_requests.add(info.request_id)
635
+ self._state.queued_requests = len(self._queued_requests)
636
+ self._state.created_requests += 1
637
+ elif info.status == "pending":
638
+ self._queued_requests.remove(info.request_id)
639
+ self._state.queued_requests = len(self._queued_requests)
640
+ self._pending_requests.add(info.request_id)
641
+ self._state.pending_requests = len(self._pending_requests)
642
+ elif info.status == "in_progress":
643
+ self._pending_requests.remove(info.request_id)
644
+ self._state.pending_requests = len(self._pending_requests)
645
+ self._processing_requests.add(info.request_id)
646
+ self._state.processing_requests = len(self._processing_requests)
647
+ elif info.status == "completed":
648
+ self._processing_requests.remove(info.request_id)
649
+ self._state.processing_requests = len(self._processing_requests)
650
+ self._state.processed_requests += 1
651
+ self._state.successful_requests += 1
652
+ elif info.status in ("errored", "cancelled"):
653
+ if info.request_id in self._queued_requests:
654
+ self._queued_requests.remove(info.request_id)
655
+ self._state.queued_requests = len(self._queued_requests)
656
+ elif info.request_id in self._pending_requests:
657
+ self._pending_requests.remove(info.request_id)
658
+ self._state.pending_requests = len(self._pending_requests)
659
+ elif info.request_id in self._processing_requests:
660
+ self._processing_requests.remove(info.request_id)
661
+ self._state.processing_requests = len(self._processing_requests)
662
+
663
+ self._state.processed_requests += 1
664
+ self._state.errored_requests += 1 if info.status == "errored" else 0
665
+ self._state.cancelled_requests += 1 if info.status == "cancelled" else 0
666
+ else:
667
+ raise ValueError(f"Unknown request_info status {info.status} for {info}")
668
+
669
+ def _update_with_constraints(self, info: RequestInfo):
670
+ actions: dict[str, SchedulerUpdateAction] = {
671
+ name: const(self._state, info) for name, const in self.constraints.items()
672
+ }
673
+ self._state.scheduler_constraints = actions
674
+ stop_queuing_actions = {}
675
+ stop_processing_actions = {}
676
+
677
+ for key, action in actions.items():
678
+ # Action updates
679
+ if (
680
+ self._state.end_queuing_time is None
681
+ and action.request_queuing == "stop"
682
+ ):
683
+ stop_queuing_actions[key] = action
684
+ if (
685
+ self._state.end_processing_time is None
686
+ and action.request_processing in ("stop_local", "stop_all")
687
+ ):
688
+ stop_processing_actions[key] = action
689
+
690
+ for progress_key in (
691
+ "remaining_fraction",
692
+ "remaining_requests",
693
+ "remaining_duration",
694
+ ):
695
+ if (new_val := action.progress.get(progress_key)) is not None and (
696
+ getattr(self._state, progress_key) is None
697
+ or new_val < getattr(self._state, progress_key)
698
+ ):
699
+ setattr(self._state, progress_key, new_val)
700
+
701
+ if stop_queuing_actions:
702
+ self._state.end_queuing_constraints = stop_queuing_actions
703
+ self._state.end_queuing_time = time.time()
704
+
705
+ if stop_processing_actions:
706
+ self._state.end_processing_constraints = stop_processing_actions
707
+ self._state.end_processing_time = time.time()