guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,712 @@
1
+ """
2
+ Multi-process worker group orchestration for distributed request scheduling.
3
+
4
+ Provides infrastructure for coordinating worker processes with shared state
5
+ management, inter-process communication, and lifecycle coordination. Handles
6
+ dynamic scaling, load balancing, constraint evaluation, and graceful shutdown
7
+ across distributed workers processing concurrent requests.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import math
14
+ import threading
15
+ import time
16
+ import uuid
17
+ from collections.abc import AsyncIterator, Generator, Iterable
18
+ from multiprocessing import get_context
19
+ from multiprocessing.context import BaseContext
20
+ from multiprocessing.managers import BaseManager
21
+ from multiprocessing.process import BaseProcess
22
+ from multiprocessing.synchronize import Barrier, Event
23
+ from typing import Generic, NamedTuple
24
+
25
+ from guidellm.logger import logger
26
+ from guidellm.scheduler.constraints import Constraint, RequestsExhaustedConstraint
27
+ from guidellm.scheduler.schemas import (
28
+ BackendInterface,
29
+ MultiTurnRequestT,
30
+ RequestT,
31
+ ResponseT,
32
+ SchedulerState,
33
+ SchedulerUpdateAction,
34
+ )
35
+ from guidellm.scheduler.strategies import SchedulingStrategy
36
+ from guidellm.scheduler.worker import WorkerProcess
37
+ from guidellm.schemas import RequestInfo
38
+ from guidellm.settings import settings
39
+ from guidellm.utils import (
40
+ InterProcessMessaging,
41
+ InterProcessMessagingManagerQueue,
42
+ InterProcessMessagingPipe,
43
+ InterProcessMessagingQueue,
44
+ wait_for_sync_objects,
45
+ )
46
+
47
+ __all__ = ["WorkerGroupState", "WorkerProcessGroup"]
48
+
49
+
50
+ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
51
+ """
52
+ Orchestrates multiple worker processes for distributed request processing.
53
+
54
+ Manages process lifecycle, request distribution, response collection, and state
55
+ synchronization across workers. Handles dynamic scaling, load balancing, and
56
+ constraint evaluation with graceful shutdown coordination for high-throughput
57
+ request processing workloads.
58
+
59
+ Example:
60
+ ::
61
+ from guidellm.scheduler.worker_group import WorkerProcessGroup
62
+
63
+ group = WorkerProcessGroup(
64
+ requests=request_iterable,
65
+ backend=backend_instance,
66
+ strategy=scheduling_strategy,
67
+ constraints={"max_time": time_constraint}
68
+ )
69
+
70
+ await group.create_processes()
71
+ await group.start(time.time())
72
+
73
+ async for response, request, info, state in group.request_updates():
74
+ if response is not None:
75
+ # Process completed request
76
+ handle_response(response)
77
+
78
+ await group.shutdown()
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
84
+ backend: BackendInterface[RequestT, ResponseT],
85
+ strategy: SchedulingStrategy,
86
+ **constraints: Constraint,
87
+ ):
88
+ """
89
+ Initialize a worker process group for distributed request processing.
90
+
91
+ :param requests: Finite iterable of requests to process sequentially
92
+ :param backend: Backend interface for processing requests
93
+ :param strategy: Scheduling strategy for request timing and distribution
94
+ :param constraints: Named constraints for controlling execution behavior
95
+ """
96
+ self.requests = iter(requests)
97
+ self.backend = backend
98
+ self.strategy = strategy
99
+ self.constraints = constraints
100
+
101
+ # Multiprocessing contexts and primitives, created in create_processes
102
+ self.mp_context: BaseContext | None = None
103
+ self.mp_manager: BaseManager | None = None
104
+ self.processes: list[BaseProcess] | None = None
105
+ self.startup_barrier: Barrier | None = None
106
+ self.requests_generated_event: Event | None = None
107
+ self.constraint_reached_event: Event | None = None
108
+ self.shutdown_event: Event | None = None
109
+ self.error_event: Event | None = None
110
+
111
+ # Scheduler and messaging state, created in start
112
+ self.state: WorkerGroupState[RequestT, ResponseT] | None = None
113
+ self.messaging: (
114
+ InterProcessMessaging[
115
+ tuple[
116
+ RequestT | MultiTurnRequestT[RequestT],
117
+ RequestInfo,
118
+ ],
119
+ tuple[
120
+ ResponseT | None,
121
+ RequestT | MultiTurnRequestT[RequestT],
122
+ RequestInfo,
123
+ SchedulerState,
124
+ ],
125
+ ]
126
+ | None
127
+ ) = None
128
+
129
+ async def create_processes(self):
130
+ """
131
+ Create and initialize worker processes for distributed request processing.
132
+
133
+ Sets up multiprocessing infrastructure and worker processes based on
134
+ strategy constraints, backend capabilities, and system configuration.
135
+ Determines optimal process count and concurrency limits, then spawns
136
+ worker processes with distributed request handling capabilities.
137
+
138
+ :raises RuntimeError: If process initialization or startup fails
139
+ """
140
+ # Processes limits and params
141
+ max_conc: int
142
+ if (
143
+ requests_limit := min(
144
+ self.strategy.requests_limit or math.inf,
145
+ self.backend.requests_limit or math.inf,
146
+ )
147
+ ) != math.inf:
148
+ max_conc = int(requests_limit)
149
+ else:
150
+ # If concurrency not specified, use settings
151
+ max_conc = settings.max_concurrency
152
+ if max_conc <= 0:
153
+ raise RuntimeError("max_concurrency resolved to 0; increase limits/config")
154
+
155
+ # Calculate number of processes, ensure we don't exceed the max concurrency,
156
+ # or limits from the backend, strategy, or user settings
157
+ num_processes: int = int(
158
+ min(
159
+ max_conc,
160
+ self.strategy.processes_limit or math.inf,
161
+ self.backend.processes_limit or math.inf,
162
+ settings.max_worker_processes,
163
+ )
164
+ )
165
+ if num_processes <= 0:
166
+ raise RuntimeError("num_processes resolved to 0; increase limits/config")
167
+
168
+ per_proc_max_conc = max_conc // num_processes
169
+ max_pending_size = max(
170
+ 1, math.floor(max_conc * settings.mp_max_pending_buffer_percent)
171
+ )
172
+ per_proc_max_buffer_size = 1
173
+
174
+ # Initialize multiprocessing components
175
+ self.mp_context = get_context(settings.mp_context_type)
176
+ self.mp_manager = self.mp_context.Manager()
177
+ self.startup_barrier = self.mp_context.Barrier(num_processes + 1)
178
+ self.requests_generated_event = self.mp_context.Event()
179
+ self.constraint_reached_event = self.mp_context.Event()
180
+ self.shutdown_event = self.mp_context.Event()
181
+ self.error_event = self.mp_context.Event()
182
+
183
+ if settings.mp_messaging_object == "queue":
184
+ self.messaging = InterProcessMessagingQueue(
185
+ mp_context=self.mp_context,
186
+ serialization=settings.mp_serialization,
187
+ encoding=settings.mp_encoding,
188
+ max_pending_size=max_pending_size,
189
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
190
+ poll_interval=settings.mp_poll_interval,
191
+ )
192
+ elif settings.mp_messaging_object == "manager_queue":
193
+ self.messaging = InterProcessMessagingManagerQueue(
194
+ manager=self.mp_manager,
195
+ mp_context=self.mp_context,
196
+ serialization=settings.mp_serialization,
197
+ encoding=settings.mp_encoding,
198
+ max_pending_size=max_pending_size,
199
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
200
+ poll_interval=settings.mp_poll_interval,
201
+ )
202
+ elif settings.mp_messaging_object == "pipe":
203
+ self.messaging = InterProcessMessagingPipe(
204
+ num_workers=num_processes,
205
+ mp_context=self.mp_context,
206
+ serialization=settings.mp_serialization,
207
+ encoding=settings.mp_encoding,
208
+ max_pending_size=max_pending_size,
209
+ max_buffer_send_size=settings.mp_requests_send_buffer_size,
210
+ poll_interval=settings.mp_poll_interval,
211
+ )
212
+
213
+ # Initialize worker processes
214
+ self.processes = []
215
+ self.strategy.init_processes_timings(
216
+ worker_count=num_processes, max_concurrency=max_conc
217
+ )
218
+ for rank in range(num_processes):
219
+ # Distribute any remainder across the first N ranks
220
+ async_limit = per_proc_max_conc + (
221
+ 1 if rank < (max_conc % num_processes) else 0
222
+ )
223
+
224
+ worker = WorkerProcess[RequestT, ResponseT](
225
+ worker_index=rank,
226
+ messaging=self.messaging.create_worker_copy( # type: ignore[arg-type]
227
+ worker_index=rank,
228
+ max_buffer_send_size=None,
229
+ max_buffer_receive_size=per_proc_max_buffer_size,
230
+ ), # The non-group worker lacks the SchedulerState type. Type err.
231
+ backend=self.backend,
232
+ strategy=self.strategy,
233
+ async_limit=async_limit,
234
+ fut_scheduling_time_limit=0.0,
235
+ startup_barrier=self.startup_barrier,
236
+ requests_generated_event=self.requests_generated_event,
237
+ constraint_reached_event=self.constraint_reached_event,
238
+ shutdown_event=self.shutdown_event,
239
+ error_event=self.error_event,
240
+ )
241
+ proc = self.mp_context.Process(target=worker.run, daemon=False)
242
+ proc.start()
243
+ self.processes.append(proc)
244
+
245
+ wait_key = await wait_for_sync_objects(
246
+ {
247
+ "startup_barrier": self.startup_barrier,
248
+ "shutdown_event": self.shutdown_event,
249
+ "error_event": self.error_event,
250
+ },
251
+ poll_interval=settings.mp_poll_interval,
252
+ )
253
+
254
+ if wait_key == "error_event":
255
+ raise RuntimeError(
256
+ "Worker process group startup failed: error_event is set"
257
+ )
258
+
259
+ async def start(self, start_time: float):
260
+ """
261
+ Begin request processing at the specified start time.
262
+
263
+ Initializes scheduler state and background tasks, then waits until the
264
+ specified start time before beginning operations. Sets up inter-process
265
+ communication and coordinates synchronized startup across all workers.
266
+
267
+ :param start_time: Unix timestamp when processing should begin
268
+ :raises RuntimeError: If workers encounter errors during startup or
269
+ if create_processes() was not called first
270
+ """
271
+ if (
272
+ not self.processes
273
+ or not self.requests_generated_event
274
+ or not self.constraint_reached_event
275
+ or not self.shutdown_event
276
+ or not self.error_event
277
+ or not self.messaging
278
+ ):
279
+ raise RuntimeError("create_processes() must be called before start()")
280
+
281
+ self.strategy.init_processes_start(start_time=start_time)
282
+ stop_send_requests_event = threading.Event()
283
+ send_requests_stopped_event = threading.Event()
284
+ self.state = WorkerGroupState[RequestT, ResponseT](
285
+ start_time=start_time,
286
+ processes=self.processes,
287
+ constraints=self.constraints,
288
+ stop_send_requests_event=stop_send_requests_event,
289
+ send_requests_stopped_event=send_requests_stopped_event,
290
+ requests_generated_event=self.requests_generated_event,
291
+ constraint_reached_event=self.constraint_reached_event,
292
+ shutdown_event=self.shutdown_event,
293
+ error_event=self.error_event,
294
+ messaging=self.messaging,
295
+ )
296
+ await self.messaging.start(
297
+ send_items=self.state.requests_generator(self.requests),
298
+ receive_callback=self.state.received_callback,
299
+ send_stopped_event=send_requests_stopped_event,
300
+ send_stop_criteria=[stop_send_requests_event],
301
+ receive_stop_criteria=[self.shutdown_event],
302
+ )
303
+
304
+ if (wait_time := start_time - time.time()) > 0:
305
+ await asyncio.sleep(wait_time)
306
+ if self.error_event.is_set():
307
+ raise RuntimeError(
308
+ "error_event is set in WorkerProcessGroup, "
309
+ "indicating an error occurred in one of the worker processes."
310
+ )
311
+
312
+ async def request_updates(
313
+ self,
314
+ ) -> AsyncIterator[
315
+ tuple[
316
+ ResponseT | None,
317
+ RequestT | MultiTurnRequestT[RequestT],
318
+ RequestInfo,
319
+ SchedulerState,
320
+ ]
321
+ ]:
322
+ """
323
+ Yield request processing updates as they become available.
324
+
325
+ Returns an async iterator of request updates including response, request,
326
+ request scheduling info, and scheduler state. Updates occur on request queued,
327
+ processing start, and completion. Response is None until processing completes.
328
+
329
+ :return: Async iterator yielding (response, request, request_info, state)
330
+ tuples where response is None until processing is complete
331
+ :raises RuntimeError: If workers encounter unrecoverable errors
332
+ """
333
+ while True:
334
+ if self.error_event.is_set(): # type: ignore[union-attr]
335
+ logger.error("Error event set in WorkerProcessGroup")
336
+ raise RuntimeError(
337
+ "error_event is set in WorkerProcessGroup, "
338
+ "indicating an error occurred in one of the worker processes."
339
+ )
340
+
341
+ try:
342
+ (
343
+ response,
344
+ request,
345
+ request_info,
346
+ scheduler_state,
347
+ ) = await self.messaging.get(timeout=settings.mp_poll_interval) # type: ignore[union-attr]
348
+
349
+ yield response, request, request_info, scheduler_state
350
+ except asyncio.TimeoutError:
351
+ if self.shutdown_event.is_set(): # type: ignore[union-attr]
352
+ # Everything yielded, exit
353
+ break
354
+
355
+ async def shutdown(self) -> list[Exception]: # noqa: C901
356
+ """
357
+ Gracefully shut down the worker process group and clean up resources.
358
+
359
+ Performs safe shutdown of worker processes, background tasks, and
360
+ multiprocessing resources. Coordinates orderly termination across
361
+ all workers and collects any exceptions encountered during shutdown.
362
+
363
+ :return: List of exceptions encountered during shutdown; empty if no errors
364
+ """
365
+ exceptions: list[Exception] = []
366
+ if self.shutdown_event is not None:
367
+ self.shutdown_event.set()
368
+
369
+ # Clear out start values
370
+ if self.messaging is not None:
371
+ try:
372
+ await asyncio.wait_for(self.messaging.stop(), timeout=5.0)
373
+ except Exception as err: # noqa: BLE001
374
+ exceptions.append(err)
375
+ self.messaging = None
376
+ self.state = None
377
+
378
+ # Clear out create processes values
379
+ if self.processes is not None:
380
+ for proc in self.processes:
381
+ try:
382
+ await asyncio.to_thread(proc.join, timeout=5.0)
383
+ if proc.exitcode is not None and proc.exitcode > 0:
384
+ exceptions.append(
385
+ RuntimeError(
386
+ f"Worker {proc.pid} exited with code {proc.exitcode}"
387
+ )
388
+ )
389
+ except Exception as err: # noqa: BLE001
390
+ exceptions.append(err)
391
+ self.processes = None
392
+ self.startup_barrier = None
393
+ self.requests_generated_event = None
394
+ self.constraint_reached_event = None
395
+ self.shutdown_event = None
396
+ self.error_event = None
397
+ if self.mp_manager is not None:
398
+ try:
399
+ self.mp_manager.shutdown()
400
+ except Exception as err: # noqa: BLE001
401
+ exceptions.append(err)
402
+ self.mp_manager = None
403
+ self.mp_context = None
404
+
405
+ return exceptions
406
+
407
+
408
+ class _StateUpdate(NamedTuple):
409
+ """Internal state update result with control flags."""
410
+
411
+ state: SchedulerState
412
+ stop_queueing: bool
413
+ stop_processing: bool
414
+
415
+
416
+ class WorkerGroupState(Generic[RequestT, ResponseT]):
417
+ """
418
+ Manages scheduler state and synchronization for worker process groups.
419
+
420
+ Handles request generation, state updates, constraint evaluation, and
421
+ coordination between worker processes. Provides thread-safe state management
422
+ with request lifecycle tracking and constraint-based termination logic.
423
+ """
424
+
425
+ def __init__(
426
+ self,
427
+ start_time: float,
428
+ processes: list[BaseProcess],
429
+ constraints: dict[str, Constraint],
430
+ stop_send_requests_event: threading.Event,
431
+ send_requests_stopped_event: threading.Event,
432
+ requests_generated_event: Event,
433
+ constraint_reached_event: Event,
434
+ shutdown_event: Event,
435
+ error_event: Event,
436
+ messaging: InterProcessMessaging[
437
+ tuple[RequestT | MultiTurnRequestT[RequestT], RequestInfo],
438
+ tuple[
439
+ ResponseT | None,
440
+ RequestT | MultiTurnRequestT[RequestT],
441
+ RequestInfo,
442
+ SchedulerState,
443
+ ],
444
+ ],
445
+ ):
446
+ """
447
+ Initialize worker group state management.
448
+
449
+ :param start_time: Unix timestamp when processing should begin
450
+ :param processes: List of worker process instances
451
+ :param constraints: Named constraints for controlling execution behavior
452
+ :param stop_send_requests_event: Threading event for stopping request generation
453
+ :param send_requests_stopped_event: Threading event for request coordination
454
+ :param requests_generated_event: Multiprocessing event for generation completion
455
+ :param constraint_reached_event: Multiprocessing event for constraint stopping
456
+ :param shutdown_event: Multiprocessing event for coordinated shutdown
457
+ :param error_event: Multiprocessing event for error condition signaling
458
+ """
459
+ self.start_time = start_time
460
+ self.processes = processes
461
+ self.constraints = constraints
462
+ self.stop_send_requests_event = stop_send_requests_event
463
+ self.send_requests_stopped_event = send_requests_stopped_event
464
+ self.requests_generated_event = requests_generated_event
465
+ self.constraint_reached_event = constraint_reached_event
466
+ self.shutdown_event = shutdown_event
467
+ self.error_event = error_event
468
+ self.messaging = messaging
469
+
470
+ self._update_lock: threading.Lock = threading.Lock()
471
+ self._state: SchedulerState = SchedulerState(
472
+ node_id=0,
473
+ num_processes=len(processes),
474
+ start_time=start_time,
475
+ )
476
+ self._queued_request_ids: set[str] = set()
477
+ self._pending_request_ids: set[str] = set()
478
+ self._processing_request_ids: set[str] = set()
479
+
480
+ def requests_generator(
481
+ self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
482
+ ) -> Generator[
483
+ tuple[RequestT | MultiTurnRequestT[RequestT], RequestInfo], None, None
484
+ ]:
485
+ """
486
+ Generate request-info pairs for worker processing with constraint evaluation.
487
+
488
+ Processes finite requests sequentially then cycles through repeating requests
489
+ indefinitely. Creates scheduling metadata for each request and evaluates
490
+ constraints to determine when to stop request generation.
491
+
492
+ :param requests: Finite iterable of requests to process sequentially
493
+ :return: Generator yielding (request, request_info) tuples
494
+ """
495
+
496
+ try:
497
+ count = 0
498
+ for request in requests:
499
+ count += 1
500
+
501
+ if hasattr(request, "request_id"):
502
+ request_id = request.request_id
503
+ elif hasattr(request, "id"):
504
+ request_id = request.id
505
+ else:
506
+ request_id = str(uuid.uuid4())
507
+ request_info: RequestInfo = RequestInfo(
508
+ request_id=request_id,
509
+ status="queued",
510
+ scheduler_process_id=0,
511
+ scheduler_start_time=self.start_time,
512
+ )
513
+ state_update = self._locked_update(request_info)
514
+ request_info.timings.queued = time.time()
515
+ if self.messaging.buffer_receive_queue is None:
516
+ raise RuntimeError("buffer receive queue is None")
517
+ self.messaging.buffer_receive_queue.sync_put(
518
+ (None, request, request_info, state_update.state)
519
+ )
520
+
521
+ yield request, request_info
522
+
523
+ if state_update.stop_queueing:
524
+ self.stop_send_requests_event.set()
525
+ return
526
+
527
+ # Reached the end, inject a RequestsExhaustedConstraint to record
528
+ self._locked_update(
529
+ info=None,
530
+ add_constraints={
531
+ "requests_exhausted": RequestsExhaustedConstraint( # type: ignore[dict-item]
532
+ num_requests=count
533
+ )
534
+ },
535
+ )
536
+ self.stop_send_requests_event.set()
537
+ except Exception as err:
538
+ logger.error(f"Error generating requests: {err}")
539
+ self.error_event.set()
540
+ raise err
541
+
542
+ def received_callback(
543
+ self,
544
+ update: tuple[
545
+ ResponseT | None,
546
+ RequestT | MultiTurnRequestT,
547
+ RequestInfo,
548
+ ],
549
+ ) -> tuple[
550
+ ResponseT | None,
551
+ RequestT | MultiTurnRequestT,
552
+ RequestInfo,
553
+ SchedulerState,
554
+ ]:
555
+ """
556
+ Process received request updates and inject current scheduler state.
557
+
558
+ Updates internal state tracking based on request status changes and
559
+ evaluates constraints to determine if processing should be terminated.
560
+ Triggers shutdown when stop conditions are met.
561
+
562
+ :param update: Tuple containing response, request, and request info
563
+ :return: Updated tuple with injected scheduler state
564
+ """
565
+ try:
566
+ response, request, request_info = update
567
+ state_update = self._locked_update(info=request_info)
568
+
569
+ # Check if we need to tell workers to stop pulling new requests
570
+ # based on no more requests sent and all requests removed from queue
571
+ if (
572
+ state_update.state.queued_requests == 0
573
+ and self.stop_send_requests_event.is_set()
574
+ and not self.requests_generated_event.is_set()
575
+ ):
576
+ self.requests_generated_event.set()
577
+
578
+ # Check if we need to tell workers to stop processing requests (constraints)
579
+ if (
580
+ state_update.stop_processing
581
+ and not self.constraint_reached_event.is_set()
582
+ ):
583
+ self.constraint_reached_event.set()
584
+
585
+ # Check if all requests have been processed and can shutdown
586
+ if (
587
+ state_update.state.processed_requests
588
+ == state_update.state.created_requests
589
+ and self.stop_send_requests_event.is_set()
590
+ and self.requests_generated_event.is_set()
591
+ and self.constraint_reached_event.is_set()
592
+ and not self.shutdown_event.is_set()
593
+ ):
594
+ self.shutdown_event.set()
595
+ except Exception as err:
596
+ logger.error(f"Error processing received update: {err}")
597
+ self.error_event.set()
598
+ raise err
599
+
600
+ return (
601
+ response,
602
+ request,
603
+ request_info,
604
+ state_update.state, # inject state for updates to be yielded back
605
+ )
606
+
607
+ def _locked_update(
608
+ self,
609
+ info: RequestInfo | None = None,
610
+ add_constraints: dict[str, Constraint] | None = None,
611
+ ) -> _StateUpdate:
612
+ with self._update_lock:
613
+ if add_constraints is not None:
614
+ self.constraints.update(add_constraints)
615
+
616
+ if info is not None:
617
+ self._state.end_time = time.time() # Always update in case last update
618
+ self._update_state_request_counts(info)
619
+ self._update_with_constraints(info)
620
+
621
+ state_copy: SchedulerState = self._state.model_copy()
622
+
623
+ return _StateUpdate(
624
+ state_copy,
625
+ state_copy.end_queuing_time is not None,
626
+ state_copy.end_processing_time is not None,
627
+ )
628
+
629
+ def _update_state_request_counts(self, info: RequestInfo):
630
+ finalized = time.time()
631
+
632
+ if info.status == "queued":
633
+ self._queued_request_ids.add(info.request_id)
634
+ self._state.queued_requests = len(self._queued_request_ids)
635
+ self._state.created_requests += 1
636
+ elif info.status == "pending":
637
+ self._queued_request_ids.remove(info.request_id)
638
+ self._state.queued_requests = len(self._queued_request_ids)
639
+ self._pending_request_ids.add(info.request_id)
640
+ self._state.pending_requests = len(self._pending_request_ids)
641
+ elif info.status == "in_progress":
642
+ self._pending_request_ids.remove(info.request_id)
643
+ self._state.pending_requests = len(self._pending_request_ids)
644
+ self._processing_request_ids.add(info.request_id)
645
+ self._state.processing_requests = len(self._processing_request_ids)
646
+ elif info.status == "completed":
647
+ info.timings.finalized = finalized
648
+ self._processing_request_ids.remove(info.request_id)
649
+ self._state.processing_requests = len(self._processing_request_ids)
650
+ self._state.processed_requests += 1
651
+ self._state.successful_requests += 1
652
+ elif info.status in ("errored", "cancelled"):
653
+ info.timings.finalized = finalized
654
+ if info.request_id in self._queued_request_ids:
655
+ self._queued_request_ids.remove(info.request_id)
656
+ self._state.queued_requests = len(self._queued_request_ids)
657
+ elif info.request_id in self._pending_request_ids:
658
+ self._pending_request_ids.remove(info.request_id)
659
+ self._state.pending_requests = len(self._pending_request_ids)
660
+ elif info.request_id in self._processing_request_ids:
661
+ self._processing_request_ids.remove(info.request_id)
662
+ self._state.processing_requests = len(self._processing_request_ids)
663
+
664
+ self._state.processed_requests += 1
665
+ self._state.errored_requests += 1 if info.status == "errored" else 0
666
+ self._state.cancelled_requests += 1 if info.status == "cancelled" else 0
667
+ else:
668
+ raise ValueError(f"Unknown request_info status {info.status} for {info}")
669
+
670
+ # Keep global count of the earliest start and latest end
671
+ self._state.start_requests_time = min(
672
+ info.timings.request_start or float("inf"),
673
+ self._state.start_requests_time or float("inf"),
674
+ )
675
+ self._state.end_requests_time = max(
676
+ info.timings.request_end or float("-inf"),
677
+ self._state.end_requests_time or float("-inf"),
678
+ finalized,
679
+ )
680
+
681
+ def _update_with_constraints(self, info: RequestInfo):
682
+ actions: dict[str, SchedulerUpdateAction] = {
683
+ name: const(self._state, info) for name, const in self.constraints.items()
684
+ }
685
+ self._state.scheduler_constraints = actions
686
+ stop_queuing_actions = {}
687
+ stop_processing_actions = {}
688
+
689
+ for key, action in actions.items():
690
+ # Action updates
691
+ if (
692
+ self._state.end_queuing_time is None
693
+ and action.request_queuing == "stop"
694
+ ):
695
+ stop_queuing_actions[key] = action
696
+ if (
697
+ self._state.end_processing_time is None
698
+ and action.request_processing in ("stop_local", "stop_all")
699
+ ):
700
+ stop_processing_actions[key] = action
701
+
702
+ self._state.progress.combine(action.progress)
703
+
704
+ if stop_queuing_actions:
705
+ self._state.end_queuing_constraints = stop_queuing_actions
706
+ self._state.end_queuing_time = time.time()
707
+
708
+ if stop_processing_actions:
709
+ self._state.end_processing_constraints = stop_processing_actions
710
+ self._state.end_processing_time = time.time()
711
+ if self._state.progress.stop_time is None:
712
+ self._state.progress.stop_time = self._state.end_processing_time