guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a18.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,519 @@
1
+ """
2
+ Request scheduling strategies for controlling benchmark request processing patterns.
3
+
4
+ Provides timing implementations and concrete strategies that control request
5
+ concurrency, timing patterns, and throughput characteristics to simulate real-world
6
+ usage scenarios. Strategies define how requests are distributed across worker processes,
7
+ when they should be scheduled, and what constraints apply to concurrent processing.
8
+ The scheduling system separates timing logic from strategy constraints, enabling
9
+ flexible combination of timing behaviors with process and concurrency limits.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import random
16
+ import time
17
+ from abc import abstractmethod
18
+ from multiprocessing import Lock, Value
19
+ from typing import Annotated, ClassVar, Literal, TypeVar
20
+
21
+ from pydantic import Field, PrivateAttr
22
+
23
+ from guidellm.schemas import RequestInfo
24
+ from guidellm.utils import InfoMixin, PydanticClassRegistryMixin
25
+
26
+ __all__ = [
27
+ "AsyncConstantStrategy",
28
+ "AsyncPoissonStrategy",
29
+ "ConcurrentStrategy",
30
+ "SchedulingStrategy",
31
+ "StrategyT",
32
+ "StrategyType",
33
+ "SynchronousStrategy",
34
+ "ThroughputStrategy",
35
+ ]
36
+
37
+
38
+ StrategyType = Annotated[
39
+ Literal["synchronous", "concurrent", "throughput", "constant", "poisson"],
40
+ "Valid strategy type identifiers for scheduling request patterns",
41
+ ]
42
+
43
+
44
+ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoMixin):
45
+ """
46
+ Base class for scheduling strategies controlling request processing patterns.
47
+
48
+ Defines the interface for strategies that combine timing implementations with
49
+ process and concurrency constraints to enable various benchmark scenarios.
50
+ Strategies manage request timing, worker process coordination, and concurrency
51
+ limits across distributed execution environments.
52
+
53
+ :cvar schema_discriminator: Field name used for polymorphic deserialization
54
+ """
55
+
56
+ schema_discriminator: ClassVar[str] = "type_"
57
+
58
+ @classmethod
59
+ def __pydantic_schema_base_type__(cls) -> type[SchedulingStrategy]:
60
+ if cls.__name__ == "SchedulingStrategy":
61
+ return cls
62
+
63
+ return SchedulingStrategy
64
+
65
+ type_: Literal["strategy"] = Field(
66
+ description="The type of scheduling strategy to schedule requests with",
67
+ )
68
+ worker_count: int = Field(
69
+ default=0,
70
+ description="Number of worker processes to use for this strategy",
71
+ ge=0,
72
+ )
73
+ max_concurrency: int = Field(
74
+ default=0,
75
+ description="Maximum number of concurrent requests to allow",
76
+ ge=0,
77
+ )
78
+ startup_duration: float = Field(
79
+ default=0.0,
80
+ description="Duration in seconds for startup request distribution",
81
+ ge=0,
82
+ )
83
+
84
+ _processes_lock = PrivateAttr(None)
85
+ _processes_request_index = PrivateAttr(None)
86
+ _processes_start_time = PrivateAttr(None)
87
+ _cached_processes_start_time: float | None = PrivateAttr(None)
88
+
89
+ @property
90
+ def processes_limit(self) -> int | None:
91
+ """
92
+ Get the maximum number of worker processes supported by this strategy.
93
+
94
+ :return: Maximum number of worker processes, None if unlimited
95
+ """
96
+ return None
97
+
98
+ @property
99
+ def requests_limit(self) -> int | None:
100
+ """
101
+ Get the maximum number of concurrent requests supported by this strategy.
102
+
103
+ :return: Maximum number of concurrent requests, None if unlimited
104
+ """
105
+ return None
106
+
107
+ def init_processes_timings(
108
+ self,
109
+ worker_count: int,
110
+ max_concurrency: int,
111
+ startup_duration: float,
112
+ ):
113
+ """
114
+ Initialize shared timing state for multi-process coordination.
115
+
116
+ :param worker_count: Number of worker processes to coordinate
117
+ :param max_concurrency: Maximum number of concurrent requests allowed
118
+ :param startup_duration: Duration in seconds for request startup ramping
119
+ """
120
+ self.worker_count = worker_count
121
+ self.max_concurrency = max_concurrency
122
+ self.startup_duration = startup_duration
123
+
124
+ self._processes_request_index = Value("i", 0)
125
+ self._processes_lock = Lock()
126
+ self._processes_start_time = Value("d", -1.0)
127
+
128
+ def init_processes_start(self, start_time: float):
129
+ """
130
+ Set the synchronized start time for all worker processes.
131
+
132
+ :param start_time: Unix timestamp when request processing should begin
133
+ :raises RuntimeError: If called before init_processes_timings
134
+ """
135
+ if self._processes_lock is None:
136
+ raise RuntimeError(
137
+ "SchedulingStrategy init_processes_start called before "
138
+ "init_processes_timings"
139
+ )
140
+
141
+ with self._processes_lock:
142
+ self._processes_start_time.value = start_time
143
+
144
+ async def get_processes_start_time(self) -> float:
145
+ """
146
+ Get the synchronized start time, waiting if not yet set.
147
+
148
+ :return: Unix timestamp when request processing began
149
+ :raises RuntimeError: If called before init_processes_timings
150
+ """
151
+ if self._processes_lock is None:
152
+ raise RuntimeError(
153
+ "SchedulingStrategy get_processes_start_time called before "
154
+ "init_processes_timings"
155
+ )
156
+
157
+ while self._cached_processes_start_time is None:
158
+ with self._processes_lock:
159
+ if self._processes_start_time.value != -1.0:
160
+ self._cached_processes_start_time = self._processes_start_time.value
161
+ else:
162
+ await asyncio.sleep(0.01) # wait for start time to be set by main
163
+
164
+ return self._cached_processes_start_time
165
+
166
+ def next_request_index(self) -> int:
167
+ """
168
+ Get the next sequential request index across all worker processes.
169
+
170
+ :return: Globally unique request index for timing calculations
171
+ :raises RuntimeError: If called before init_processes_timings
172
+ """
173
+ if self._processes_lock is None:
174
+ raise RuntimeError(
175
+ "SchedulingStrategy next_request_index called before "
176
+ "init_processes_timings"
177
+ )
178
+
179
+ with self._processes_lock:
180
+ self._processes_request_index.value += 1
181
+ return self._processes_request_index.value
182
+
183
+ @abstractmethod
184
+ async def next_request_time(self, offset: int) -> float:
185
+ """
186
+ Calculate the scheduled start time for the next request.
187
+
188
+ :param offset: Worker process offset for distributing request timing
189
+ :return: Unix timestamp when the request should be processed
190
+ """
191
+
192
+ @abstractmethod
193
+ def request_completed(self, request_info: RequestInfo):
194
+ """
195
+ Handle request completion and update internal timing state.
196
+
197
+ :param request_info: Information about the completed request including
198
+ timing details and completion status
199
+ """
200
+
201
+
202
+ StrategyT = TypeVar("StrategyT", bound=SchedulingStrategy)
203
+
204
+
205
+ @SchedulingStrategy.register("synchronous")
206
+ class SynchronousStrategy(SchedulingStrategy):
207
+ """
208
+ Sequential request processing with strict single-request-at-a-time execution.
209
+
210
+ Processes requests one at a time in strict sequential order, providing predictable
211
+ timing behavior ideal for measuring maximum sequential throughput and ensuring
212
+ complete request isolation. Each request completes before the next begins.
213
+ """
214
+
215
+ type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment]
216
+ _process_last_request_time: float | None = PrivateAttr(None)
217
+
218
+ def __str__(self) -> str:
219
+ """
220
+ :return: String identifier for synchronous strategy
221
+ """
222
+ return "synchronous"
223
+
224
+ @property
225
+ def processes_limit(self) -> int | None:
226
+ """
227
+ :return: Always 1 to enforce single-process constraint
228
+ """
229
+ return 1
230
+
231
+ @property
232
+ def requests_limit(self) -> int | None:
233
+ """
234
+ :return: Always 1 to enforce single-request constraint
235
+ """
236
+ return 1
237
+
238
+ async def next_request_time(self, offset: int) -> float:
239
+ """
240
+ Calculate next request time based on previous completion.
241
+
242
+ :param offset: Unused for synchronous strategy
243
+ :return: Time of last completion or start time if first request
244
+ """
245
+ _ = offset # offset unused for synchronous strategy
246
+
247
+ if self._process_last_request_time is not None:
248
+ return self._process_last_request_time
249
+
250
+ return await self.get_processes_start_time()
251
+
252
+ def request_completed(self, request_info: RequestInfo):
253
+ """
254
+ Update timing state with completed request information.
255
+
256
+ :param request_info: Completed request metadata including timing
257
+ """
258
+ if request_info.completed_at is not None:
259
+ self._process_last_request_time = request_info.completed_at
260
+
261
+
262
+ @SchedulingStrategy.register("concurrent")
263
+ class ConcurrentStrategy(SchedulingStrategy):
264
+ """
265
+ Parallel request processing with fixed concurrency limits.
266
+
267
+ Enables concurrent request processing up to a specified number of streams,
268
+ providing balanced throughput while maintaining predictable resource usage.
269
+ Requests are distributed across streams with completion-based timing coordination.
270
+ """
271
+
272
+ type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment]
273
+ streams: int = Field(
274
+ description="Number of concurrent streams for scheduling requests",
275
+ gt=0,
276
+ )
277
+
278
+ _process_last_request_time: float | None = PrivateAttr(None)
279
+
280
+ def __str__(self) -> str:
281
+ """
282
+ :return: String identifier with stream count
283
+ """
284
+ return f"concurrent@{self.streams}"
285
+
286
+ @property
287
+ def processes_limit(self) -> int:
288
+ """
289
+ :return: Number of streams as maximum worker processes
290
+ """
291
+ return self.streams
292
+
293
+ @property
294
+ def requests_limit(self) -> int:
295
+ """
296
+ :return: Number of streams as maximum concurrent requests
297
+ """
298
+ return self.streams
299
+
300
+ async def next_request_time(self, offset: int) -> float:
301
+ """
302
+ Calculate next request time with stream-based distribution.
303
+
304
+ :param offset: Worker process offset for distributing initial requests
305
+ :return: Time of last completion or staggered start time if first request
306
+ """
307
+ if self._process_last_request_time is not None:
308
+ return self._process_last_request_time
309
+
310
+ start_time = await self.get_processes_start_time()
311
+
312
+ return start_time + (offset / self.worker_count)
313
+
314
+ def request_completed(self, request_info: RequestInfo):
315
+ """
316
+ Update timing state with completed request information.
317
+
318
+ :param request_info: Completed request metadata including timing
319
+ """
320
+ if request_info.completed_at is not None:
321
+ self._process_last_request_time = request_info.completed_at
322
+
323
+
324
+ @SchedulingStrategy.register("throughput")
325
+ class ThroughputStrategy(SchedulingStrategy):
326
+ """
327
+ Maximum throughput scheduling with optional concurrency limits.
328
+
329
+ Schedules requests to maximize system throughput by allowing unlimited concurrent
330
+ processing with optional constraints. Supports startup ramping to gradually
331
+ distribute initial requests for controlled system ramp-up.
332
+ """
333
+
334
+ type_: Literal["throughput"] = "throughput" # type: ignore[assignment]
335
+ max_concurrency: int | None = Field(
336
+ default=None,
337
+ description="Maximum number of concurrent requests to schedule",
338
+ gt=0,
339
+ )
340
+
341
+ def __str__(self) -> str:
342
+ """
343
+ :return: String identifier for throughput strategy
344
+ """
345
+ return "throughput"
346
+
347
+ @property
348
+ def processes_limit(self) -> int | None:
349
+ """
350
+ :return: Max concurrency if set, otherwise None for unlimited
351
+ """
352
+ return self.max_concurrency
353
+
354
+ @property
355
+ def requests_limit(self) -> int | None:
356
+ """
357
+ :return: Max concurrency if set, otherwise None for unlimited
358
+ """
359
+ return self.max_concurrency
360
+
361
+ async def next_request_time(self, offset: int) -> float:
362
+ """
363
+ Calculate next request time with optional startup ramping.
364
+
365
+ :param offset: Unused for throughput strategy
366
+ :return: Immediate start or ramped start time during startup period
367
+ """
368
+ _ = offset # offset unused for throughput strategy
369
+ start_time = await self.get_processes_start_time()
370
+
371
+ if (
372
+ self.startup_duration > 0
373
+ and (time.time() - start_time) < self.startup_duration
374
+ and (current_index := self.next_request_index()) <= self.max_concurrency
375
+ ):
376
+ # linearly ramp start times to spread max_concurrency requests evenly
377
+ # over startup_duration
378
+ return start_time + self.startup_duration * (
379
+ current_index / self.max_concurrency
380
+ )
381
+
382
+ return start_time + self.startup_duration
383
+
384
+ def request_completed(self, request_info: RequestInfo):
385
+ """
386
+ Handle request completion (no-op for throughput strategy).
387
+
388
+ :param request_info: Completed request metadata (unused)
389
+ """
390
+ _ = request_info # request_info unused for throughput strategy
391
+
392
+
393
+ @SchedulingStrategy.register("constant")
394
+ class AsyncConstantStrategy(ThroughputStrategy):
395
+ """
396
+ Constant-rate scheduling for predictable load patterns.
397
+
398
+ Schedules requests at a fixed rate distributed evenly across worker processes,
399
+ providing predictable timing behavior for steady-state load simulation and
400
+ consistent system performance measurement. Requests arrive at uniform intervals.
401
+ """
402
+
403
+ type_: Literal["constant"] = "constant" # type: ignore[assignment]
404
+ rate: float = Field(
405
+ description="Rate for scheduling requests asynchronously in requests/second",
406
+ gt=0,
407
+ )
408
+
409
+ def __str__(self) -> str:
410
+ """
411
+ :return: String identifier with rate value
412
+ """
413
+ return f"constant@{self.rate:.2f}"
414
+
415
+ async def next_request_time(self, offset: int) -> float:
416
+ """
417
+ Calculate next request time at fixed intervals.
418
+
419
+ :param offset: Unused for constant strategy
420
+ :return: Start time plus constant interval based on request index
421
+ """
422
+ _ = offset # offset unused for throughput strategy
423
+ current_index = self.next_request_index()
424
+ start_time = await self.get_processes_start_time()
425
+
426
+ return start_time + current_index / self.rate
427
+
428
+ def request_completed(self, request_info: RequestInfo):
429
+ """
430
+ Handle request completion (no-op for constant strategy).
431
+
432
+ :param request_info: Completed request metadata (unused)
433
+ """
434
+ _ = request_info # request_info unused for async constant strategy
435
+
436
+
437
+ @SchedulingStrategy.register("poisson")
438
+ class AsyncPoissonStrategy(ThroughputStrategy):
439
+ """
440
+ Poisson-distributed scheduling for realistic load simulation.
441
+
442
+ Schedules requests following a Poisson process with exponentially distributed
443
+ inter-arrival times, providing realistic simulation of user behavior and network
444
+ traffic patterns. Request arrivals have random variance around the target rate.
445
+ """
446
+
447
+ type_: Literal["poisson"] = "poisson" # type: ignore[assignment]
448
+ rate: float = Field(
449
+ description="Rate for scheduling requests asynchronously in requests/second",
450
+ gt=0,
451
+ )
452
+ random_seed: int = Field(
453
+ default=42,
454
+ description="Random seed to use for Poisson distribution",
455
+ )
456
+
457
+ _random: random.Random | None = PrivateAttr(None)
458
+ _offset = PrivateAttr(None)
459
+
460
+ def __str__(self) -> str:
461
+ """
462
+ :return: String identifier with rate value
463
+ """
464
+ return f"poisson@{self.rate:.2f}"
465
+
466
+ def init_processes_timings(
467
+ self,
468
+ worker_count: int,
469
+ max_concurrency: int,
470
+ startup_duration: float,
471
+ ):
472
+ """
473
+ Initialize Poisson-specific timing state.
474
+
475
+ :param worker_count: Number of worker processes to coordinate
476
+ :param max_concurrency: Maximum number of concurrent requests allowed
477
+ :param startup_duration: Duration in seconds for request startup ramping
478
+ """
479
+ super().init_processes_timings(worker_count, max_concurrency, startup_duration)
480
+ with self._processes_lock:
481
+ self._offset = Value("d", -1.0)
482
+
483
+ def init_processes_start(self, start_time: float):
484
+ """
485
+ Initialize the offset time for Poisson timing calculations.
486
+
487
+ :param start_time: Unix timestamp when request processing should begin
488
+ """
489
+ ThroughputStrategy.init_processes_start(self, start_time)
490
+ with self._processes_lock:
491
+ self._offset.value = start_time
492
+
493
+ async def next_request_time(self, offset: int) -> float:
494
+ """
495
+ Calculate next request time using exponential distribution.
496
+
497
+ :param offset: Unused for Poisson strategy
498
+ :return: Next arrival time based on Poisson process
499
+ """
500
+ _ = offset # offset unused for throughput strategy
501
+ _ = await self.get_processes_start_time() # ensure offset is initialized
502
+
503
+ if self._random is None:
504
+ self._random = random.Random(self.random_seed)
505
+
506
+ next_delay = self._random.expovariate(self.rate)
507
+
508
+ with self._processes_lock:
509
+ self._offset.value += next_delay
510
+
511
+ return self._offset.value
512
+
513
+ def request_completed(self, request_info: RequestInfo):
514
+ """
515
+ Handle request completion (no-op for Poisson strategy).
516
+
517
+ :param request_info: Completed request metadata (unused)
518
+ """
519
+ _ = request_info # request_info unused for async poisson strategy