guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Request scheduling strategies for controlling benchmark request processing patterns.
|
|
3
|
+
|
|
4
|
+
Provides timing implementations and concrete strategies that control request
|
|
5
|
+
concurrency, timing patterns, and throughput characteristics to simulate real-world
|
|
6
|
+
usage scenarios. Strategies define how requests are distributed across worker processes,
|
|
7
|
+
when they should be scheduled, and what constraints apply to concurrent processing.
|
|
8
|
+
The scheduling system separates timing logic from strategy constraints, enabling
|
|
9
|
+
flexible combination of timing behaviors with process and concurrency limits.
|
|
10
|
+
|
|
11
|
+
Available strategies include synchronous (sequential), concurrent (fixed streams),
|
|
12
|
+
throughput (maximum load), constant-rate (steady intervals), and Poisson-distributed
|
|
13
|
+
(realistic variance) patterns for comprehensive benchmarking scenarios.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import random
|
|
20
|
+
from abc import abstractmethod
|
|
21
|
+
from multiprocessing import Event, Value, synchronize
|
|
22
|
+
from multiprocessing.sharedctypes import Synchronized
|
|
23
|
+
from typing import Annotated, ClassVar, Literal, TypeVar
|
|
24
|
+
|
|
25
|
+
from pydantic import Field, NonNegativeFloat, NonNegativeInt, PositiveInt, PrivateAttr
|
|
26
|
+
|
|
27
|
+
from guidellm.schemas import PydanticClassRegistryMixin, RequestInfo
|
|
28
|
+
from guidellm.utils import InfoMixin
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"AsyncConstantStrategy",
|
|
32
|
+
"AsyncPoissonStrategy",
|
|
33
|
+
"ConcurrentStrategy",
|
|
34
|
+
"SchedulingStrategy",
|
|
35
|
+
"StrategyT",
|
|
36
|
+
"StrategyType",
|
|
37
|
+
"SynchronousStrategy",
|
|
38
|
+
"ThroughputStrategy",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
StrategyType = Annotated[
|
|
43
|
+
Literal["synchronous", "concurrent", "throughput", "constant", "poisson"],
|
|
44
|
+
"Valid strategy type identifiers for scheduling request patterns",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoMixin):
|
|
49
|
+
"""
|
|
50
|
+
Base class for scheduling strategies controlling request processing patterns.
|
|
51
|
+
|
|
52
|
+
Defines the interface for strategies that combine timing implementations with
|
|
53
|
+
process and concurrency constraints to enable various benchmark scenarios.
|
|
54
|
+
Strategies manage request timing, worker process coordination, and concurrency
|
|
55
|
+
limits across distributed execution environments.
|
|
56
|
+
|
|
57
|
+
:cvar schema_discriminator: Field name used for polymorphic deserialization
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
schema_discriminator: ClassVar[str] = "type_"
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def __pydantic_schema_base_type__(cls) -> type[SchedulingStrategy]:
|
|
64
|
+
if cls.__name__ == "SchedulingStrategy":
|
|
65
|
+
return cls
|
|
66
|
+
|
|
67
|
+
return SchedulingStrategy
|
|
68
|
+
|
|
69
|
+
type_: Literal["strategy"] = Field(
|
|
70
|
+
description="Scheduling strategy type identifier for polymorphic dispatch",
|
|
71
|
+
)
|
|
72
|
+
worker_count: PositiveInt | None = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description="Number of worker processes to use for this strategy",
|
|
75
|
+
)
|
|
76
|
+
max_concurrency: PositiveInt | None = Field(
|
|
77
|
+
default=None,
|
|
78
|
+
description="Maximum number of concurrent requests to allow",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
_processes_init_event: synchronize.Event | None = PrivateAttr(None)
|
|
82
|
+
_processes_request_index: Synchronized[int] | None = PrivateAttr(None)
|
|
83
|
+
_processes_start_time: Synchronized[float] | None = PrivateAttr(None)
|
|
84
|
+
_cached_processes_start_time: float | None = PrivateAttr(None)
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def processes_limit(self) -> PositiveInt | None:
|
|
88
|
+
"""
|
|
89
|
+
Get the maximum number of worker processes supported by this strategy.
|
|
90
|
+
|
|
91
|
+
:return: Maximum number of worker processes, None if unlimited
|
|
92
|
+
"""
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def requests_limit(self) -> PositiveInt | None:
|
|
97
|
+
"""
|
|
98
|
+
Get the maximum number of concurrent requests supported by this strategy.
|
|
99
|
+
|
|
100
|
+
:return: Maximum number of concurrent requests, None if unlimited
|
|
101
|
+
"""
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def init_processes_timings(
|
|
105
|
+
self, worker_count: PositiveInt, max_concurrency: PositiveInt
|
|
106
|
+
):
|
|
107
|
+
"""
|
|
108
|
+
Initialize shared timing state for multi-process coordination.
|
|
109
|
+
|
|
110
|
+
Sets up synchronized counters and locks for coordinating request timing
|
|
111
|
+
across distributed worker processes.
|
|
112
|
+
|
|
113
|
+
:param worker_count: Number of worker processes to coordinate
|
|
114
|
+
:param max_concurrency: Maximum number of concurrent requests allowed
|
|
115
|
+
"""
|
|
116
|
+
self.worker_count = worker_count
|
|
117
|
+
self.max_concurrency = max_concurrency
|
|
118
|
+
|
|
119
|
+
self._processes_init_event = Event()
|
|
120
|
+
self._processes_request_index = Value("i", 0)
|
|
121
|
+
self._processes_start_time = Value("d", -1.0)
|
|
122
|
+
|
|
123
|
+
def init_processes_start(self, start_time: float):
|
|
124
|
+
"""
|
|
125
|
+
Set the synchronized start time for all worker processes.
|
|
126
|
+
|
|
127
|
+
Updates shared state with the benchmark start time to coordinate request
|
|
128
|
+
scheduling across all workers.
|
|
129
|
+
|
|
130
|
+
:param start_time: Unix timestamp when request processing should begin
|
|
131
|
+
:raises RuntimeError: If called before init_processes_timings
|
|
132
|
+
"""
|
|
133
|
+
if self._processes_init_event is None:
|
|
134
|
+
raise RuntimeError(
|
|
135
|
+
"SchedulingStrategy init_processes_start called before "
|
|
136
|
+
"init_processes_timings"
|
|
137
|
+
)
|
|
138
|
+
if self._processes_start_time is None:
|
|
139
|
+
raise RuntimeError(
|
|
140
|
+
"_processes_lock is not None but _processes_start_time is None"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
with self._processes_start_time.get_lock():
|
|
144
|
+
self._processes_start_time.value = start_time
|
|
145
|
+
self._processes_init_event.set()
|
|
146
|
+
|
|
147
|
+
async def get_processes_start_time(self) -> float:
|
|
148
|
+
"""
|
|
149
|
+
Get the synchronized start time, waiting if not yet set.
|
|
150
|
+
|
|
151
|
+
Blocks until the main process sets the start time via init_processes_start,
|
|
152
|
+
enabling synchronized request scheduling across all workers.
|
|
153
|
+
|
|
154
|
+
:return: Unix timestamp when request processing began
|
|
155
|
+
:raises RuntimeError: If called before init_processes_timings
|
|
156
|
+
"""
|
|
157
|
+
if self._processes_init_event is None:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
"SchedulingStrategy get_processes_start_time called before "
|
|
160
|
+
"init_processes_timings"
|
|
161
|
+
)
|
|
162
|
+
if self._processes_start_time is None:
|
|
163
|
+
raise RuntimeError(
|
|
164
|
+
"_processes_lock is not None but _processes_start_time is None"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if self._cached_processes_start_time is None:
|
|
168
|
+
# Wait for the init event to be set by the main process
|
|
169
|
+
await asyncio.gather(asyncio.to_thread(self._processes_init_event.wait))
|
|
170
|
+
self._cached_processes_start_time = self._processes_start_time.value
|
|
171
|
+
|
|
172
|
+
return self._cached_processes_start_time
|
|
173
|
+
|
|
174
|
+
def next_request_index(self) -> PositiveInt:
|
|
175
|
+
"""
|
|
176
|
+
Get the next sequential request index across all worker processes.
|
|
177
|
+
|
|
178
|
+
Thread-safe counter providing globally unique indices for request timing
|
|
179
|
+
calculations in distributed environments.
|
|
180
|
+
|
|
181
|
+
:return: Globally unique request index for timing calculations
|
|
182
|
+
:raises RuntimeError: If called before init_processes_timings
|
|
183
|
+
"""
|
|
184
|
+
if self._processes_request_index is None:
|
|
185
|
+
raise RuntimeError(
|
|
186
|
+
"SchedulingStrategy next_request_index called before "
|
|
187
|
+
"init_processes_timings"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
with self._processes_request_index.get_lock():
|
|
191
|
+
self._processes_request_index.value += 1
|
|
192
|
+
return self._processes_request_index.value
|
|
193
|
+
|
|
194
|
+
@abstractmethod
|
|
195
|
+
async def next_request_time(self, worker_index: NonNegativeInt) -> float:
|
|
196
|
+
"""
|
|
197
|
+
Calculate the scheduled start time for the next request.
|
|
198
|
+
|
|
199
|
+
Strategy-specific implementation determining when requests should be
|
|
200
|
+
processed based on timing patterns and worker distribution.
|
|
201
|
+
|
|
202
|
+
:param worker_index: Worker process index for distributing request timing
|
|
203
|
+
:return: Unix timestamp when the request should be processed
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
@abstractmethod
|
|
207
|
+
def request_completed(self, request_info: RequestInfo):
|
|
208
|
+
"""
|
|
209
|
+
Handle request completion and update internal timing state.
|
|
210
|
+
|
|
211
|
+
Strategy-specific handling of completed requests to maintain timing
|
|
212
|
+
coordination and schedule subsequent requests.
|
|
213
|
+
|
|
214
|
+
:param request_info: Completed request metadata including timing details
|
|
215
|
+
and completion status
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
StrategyT = TypeVar("StrategyT", bound=SchedulingStrategy)
|
|
220
|
+
"Type variable bound to SchedulingStrategy for generic strategy operations"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@SchedulingStrategy.register("synchronous")
|
|
224
|
+
class SynchronousStrategy(SchedulingStrategy):
|
|
225
|
+
"""
|
|
226
|
+
Sequential request processing with strict single-request-at-a-time execution.
|
|
227
|
+
|
|
228
|
+
Processes requests one at a time in strict sequential order, providing predictable
|
|
229
|
+
timing behavior ideal for measuring maximum sequential throughput and ensuring
|
|
230
|
+
complete request isolation. Each request completes before the next begins.
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment]
|
|
234
|
+
_process_last_request_time: float | None = PrivateAttr(None)
|
|
235
|
+
|
|
236
|
+
def __str__(self) -> str:
|
|
237
|
+
"""
|
|
238
|
+
:return: String identifier for synchronous strategy
|
|
239
|
+
"""
|
|
240
|
+
return "synchronous"
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def processes_limit(self) -> PositiveInt:
|
|
244
|
+
"""
|
|
245
|
+
:return: Always 1 to enforce single-process constraint
|
|
246
|
+
"""
|
|
247
|
+
return 1
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def requests_limit(self) -> PositiveInt:
|
|
251
|
+
"""
|
|
252
|
+
:return: Always 1 to enforce single-request constraint
|
|
253
|
+
"""
|
|
254
|
+
return 1
|
|
255
|
+
|
|
256
|
+
async def next_request_time(self, worker_index: NonNegativeInt) -> float:
|
|
257
|
+
"""
|
|
258
|
+
Calculate next request time based on previous completion.
|
|
259
|
+
|
|
260
|
+
:param worker_index: Unused for synchronous strategy
|
|
261
|
+
:return: Time of last completion or start time if first request
|
|
262
|
+
"""
|
|
263
|
+
_ = worker_index # unused for synchronous strategy
|
|
264
|
+
|
|
265
|
+
if self._process_last_request_time is not None:
|
|
266
|
+
return self._process_last_request_time
|
|
267
|
+
|
|
268
|
+
return await self.get_processes_start_time()
|
|
269
|
+
|
|
270
|
+
def request_completed(self, request_info: RequestInfo):
|
|
271
|
+
"""
|
|
272
|
+
Update timing state with completed request information.
|
|
273
|
+
|
|
274
|
+
:param request_info: Completed request metadata including timing
|
|
275
|
+
"""
|
|
276
|
+
if request_info.completed_at is not None:
|
|
277
|
+
self._process_last_request_time = request_info.completed_at
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@SchedulingStrategy.register("concurrent")
|
|
281
|
+
class ConcurrentStrategy(SchedulingStrategy):
|
|
282
|
+
"""
|
|
283
|
+
Parallel request processing with fixed concurrency limits.
|
|
284
|
+
|
|
285
|
+
Enables concurrent request processing up to a specified number of streams,
|
|
286
|
+
providing balanced throughput while maintaining predictable resource usage.
|
|
287
|
+
Requests are distributed across streams with completion-based timing coordination.
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment]
|
|
291
|
+
streams: PositiveInt = Field(
|
|
292
|
+
description="Number of concurrent streams for scheduling requests",
|
|
293
|
+
)
|
|
294
|
+
rampup_duration: NonNegativeFloat = Field(
|
|
295
|
+
default=0.0,
|
|
296
|
+
description=(
|
|
297
|
+
"Duration in seconds to spread initial requests up to max_concurrency "
|
|
298
|
+
"at the beginning of each strategy run"
|
|
299
|
+
),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
_process_last_request_time: float | None = PrivateAttr(None)
|
|
303
|
+
|
|
304
|
+
def __str__(self) -> str:
|
|
305
|
+
"""
|
|
306
|
+
:return: String identifier with stream count
|
|
307
|
+
"""
|
|
308
|
+
return f"concurrent@{self.streams}"
|
|
309
|
+
|
|
310
|
+
@property
|
|
311
|
+
def processes_limit(self) -> PositiveInt:
|
|
312
|
+
"""
|
|
313
|
+
:return: Number of streams as maximum worker processes
|
|
314
|
+
"""
|
|
315
|
+
return self.streams
|
|
316
|
+
|
|
317
|
+
@property
|
|
318
|
+
def requests_limit(self) -> PositiveInt:
|
|
319
|
+
"""
|
|
320
|
+
:return: Number of streams as maximum concurrent requests
|
|
321
|
+
"""
|
|
322
|
+
return self.streams
|
|
323
|
+
|
|
324
|
+
async def next_request_time(self, worker_index: PositiveInt) -> float:
|
|
325
|
+
"""
|
|
326
|
+
Calculate next request time with stream-based distribution.
|
|
327
|
+
|
|
328
|
+
Initial requests are staggered across streams during rampup, subsequent
|
|
329
|
+
requests scheduled after previous completion within each stream.
|
|
330
|
+
|
|
331
|
+
:param worker_index: Worker process index for distributing initial requests
|
|
332
|
+
:return: Time of last completion or staggered start time if first request
|
|
333
|
+
"""
|
|
334
|
+
_ = worker_index # unused
|
|
335
|
+
current_index = self.next_request_index()
|
|
336
|
+
start_time = await self.get_processes_start_time()
|
|
337
|
+
|
|
338
|
+
if current_index < self.streams and self.rampup_duration > 0:
|
|
339
|
+
# linearly spread start times for first concurrent requests across rampup
|
|
340
|
+
return start_time + self.rampup_duration * (current_index / self.streams)
|
|
341
|
+
|
|
342
|
+
if self._process_last_request_time is not None:
|
|
343
|
+
return self._process_last_request_time
|
|
344
|
+
|
|
345
|
+
return start_time
|
|
346
|
+
|
|
347
|
+
def request_completed(self, request_info: RequestInfo):
|
|
348
|
+
"""
|
|
349
|
+
Update timing state with completed request information.
|
|
350
|
+
|
|
351
|
+
Tracks completion time to schedule next request in the same stream.
|
|
352
|
+
|
|
353
|
+
:param request_info: Completed request metadata including timing
|
|
354
|
+
"""
|
|
355
|
+
if request_info.completed_at is not None:
|
|
356
|
+
self._process_last_request_time = request_info.completed_at
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
@SchedulingStrategy.register("throughput")
|
|
360
|
+
class ThroughputStrategy(SchedulingStrategy):
|
|
361
|
+
"""
|
|
362
|
+
Maximum throughput scheduling with optional concurrency limits.
|
|
363
|
+
|
|
364
|
+
Schedules requests to maximize system throughput by allowing unlimited concurrent
|
|
365
|
+
processing with optional constraints. Supports startup ramping to gradually
|
|
366
|
+
distribute initial requests for controlled system ramp-up.
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
type_: Literal["throughput"] = "throughput" # type: ignore[assignment]
|
|
370
|
+
max_concurrency: PositiveInt | None = Field(
|
|
371
|
+
default=None,
|
|
372
|
+
description="Maximum number of concurrent requests to schedule",
|
|
373
|
+
)
|
|
374
|
+
rampup_duration: NonNegativeFloat = Field(
|
|
375
|
+
default=0.0,
|
|
376
|
+
description=(
|
|
377
|
+
"Duration in seconds to spread initial requests up to max_concurrency "
|
|
378
|
+
"at the beginning of each strategy run"
|
|
379
|
+
),
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def __str__(self) -> str:
|
|
383
|
+
"""
|
|
384
|
+
:return: String identifier for throughput strategy
|
|
385
|
+
"""
|
|
386
|
+
return f"throughput@{self.max_concurrency or 'unlimited'}"
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def processes_limit(self) -> PositiveInt | None:
|
|
390
|
+
"""
|
|
391
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
392
|
+
"""
|
|
393
|
+
return self.max_concurrency
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def requests_limit(self) -> PositiveInt | None:
|
|
397
|
+
"""
|
|
398
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
399
|
+
"""
|
|
400
|
+
return self.max_concurrency
|
|
401
|
+
|
|
402
|
+
async def next_request_time(self, worker_index: int) -> float:
|
|
403
|
+
"""
|
|
404
|
+
Calculate next request time with optional startup ramping.
|
|
405
|
+
|
|
406
|
+
Spreads initial requests linearly during rampup period, then schedules
|
|
407
|
+
all subsequent requests immediately.
|
|
408
|
+
|
|
409
|
+
:param worker_index: Unused for throughput strategy
|
|
410
|
+
:return: Immediate start or ramped start time during startup period
|
|
411
|
+
"""
|
|
412
|
+
_ = worker_index # unused for throughput strategy
|
|
413
|
+
start_time = await self.get_processes_start_time()
|
|
414
|
+
|
|
415
|
+
if self.max_concurrency is not None and self.rampup_duration > 0:
|
|
416
|
+
current_index = self.next_request_index()
|
|
417
|
+
delay = (
|
|
418
|
+
self.rampup_duration
|
|
419
|
+
if current_index >= self.max_concurrency
|
|
420
|
+
else self.rampup_duration
|
|
421
|
+
* (current_index / float(self.max_concurrency))
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return start_time + delay
|
|
425
|
+
else:
|
|
426
|
+
return start_time
|
|
427
|
+
|
|
428
|
+
def request_completed(self, request_info: RequestInfo):
|
|
429
|
+
"""
|
|
430
|
+
Handle request completion (no-op for throughput strategy).
|
|
431
|
+
|
|
432
|
+
:param request_info: Completed request metadata (unused)
|
|
433
|
+
"""
|
|
434
|
+
_ = request_info # request_info unused for throughput strategy
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
@SchedulingStrategy.register("constant")
|
|
438
|
+
class AsyncConstantStrategy(SchedulingStrategy):
|
|
439
|
+
"""
|
|
440
|
+
Constant-rate scheduling for predictable load patterns.
|
|
441
|
+
|
|
442
|
+
Schedules requests at a fixed rate distributed evenly across worker processes,
|
|
443
|
+
providing predictable timing behavior for steady-state load simulation and
|
|
444
|
+
consistent system performance measurement. Requests arrive at uniform intervals.
|
|
445
|
+
"""
|
|
446
|
+
|
|
447
|
+
type_: Literal["constant"] = "constant" # type: ignore[assignment]
|
|
448
|
+
rate: float = Field(
|
|
449
|
+
description="Request scheduling rate in requests per second",
|
|
450
|
+
gt=0,
|
|
451
|
+
)
|
|
452
|
+
max_concurrency: PositiveInt | None = Field(
|
|
453
|
+
default=None,
|
|
454
|
+
description="Maximum number of concurrent requests to schedule",
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
def __str__(self) -> str:
|
|
458
|
+
"""
|
|
459
|
+
:return: String identifier with rate value
|
|
460
|
+
"""
|
|
461
|
+
return f"constant@{self.rate:.2f}"
|
|
462
|
+
|
|
463
|
+
@property
|
|
464
|
+
def processes_limit(self) -> PositiveInt | None:
|
|
465
|
+
"""
|
|
466
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
467
|
+
"""
|
|
468
|
+
return self.max_concurrency
|
|
469
|
+
|
|
470
|
+
@property
|
|
471
|
+
def requests_limit(self) -> PositiveInt | None:
|
|
472
|
+
"""
|
|
473
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
474
|
+
"""
|
|
475
|
+
return self.max_concurrency
|
|
476
|
+
|
|
477
|
+
async def next_request_time(self, worker_index: PositiveInt) -> float:
|
|
478
|
+
"""
|
|
479
|
+
Calculate next request time at fixed intervals.
|
|
480
|
+
|
|
481
|
+
Schedules requests at uniform intervals determined by the configured rate,
|
|
482
|
+
independent of request completion times.
|
|
483
|
+
|
|
484
|
+
:param worker_index: Unused for constant strategy
|
|
485
|
+
:return: Start time plus constant interval based on request index
|
|
486
|
+
"""
|
|
487
|
+
_ = worker_index # unused
|
|
488
|
+
current_index = self.next_request_index()
|
|
489
|
+
start_time = await self.get_processes_start_time()
|
|
490
|
+
|
|
491
|
+
return start_time + current_index / self.rate
|
|
492
|
+
|
|
493
|
+
def request_completed(self, request_info: RequestInfo):
|
|
494
|
+
"""
|
|
495
|
+
Handle request completion (no-op for constant strategy).
|
|
496
|
+
|
|
497
|
+
:param request_info: Completed request metadata (unused)
|
|
498
|
+
"""
|
|
499
|
+
_ = request_info # request_info unused for async constant strategy
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
@SchedulingStrategy.register("poisson")
|
|
503
|
+
class AsyncPoissonStrategy(SchedulingStrategy):
|
|
504
|
+
"""
|
|
505
|
+
Poisson-distributed scheduling for realistic load simulation.
|
|
506
|
+
|
|
507
|
+
Schedules requests following a Poisson process with exponentially distributed
|
|
508
|
+
inter-arrival times, providing realistic simulation of user behavior and network
|
|
509
|
+
traffic patterns. Request arrivals have random variance around the target rate.
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
type_: Literal["poisson"] = "poisson" # type: ignore[assignment]
|
|
513
|
+
rate: float = Field(
|
|
514
|
+
description="Request scheduling rate in requests per second",
|
|
515
|
+
gt=0,
|
|
516
|
+
)
|
|
517
|
+
max_concurrency: PositiveInt | None = Field(
|
|
518
|
+
default=None,
|
|
519
|
+
description="Maximum number of concurrent requests to schedule",
|
|
520
|
+
)
|
|
521
|
+
random_seed: int = Field(
|
|
522
|
+
default=42,
|
|
523
|
+
description="Random seed for Poisson distribution reproducibility",
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
_random: random.Random | None = PrivateAttr(None)
|
|
527
|
+
_offset: Synchronized[float] | None = PrivateAttr(None)
|
|
528
|
+
|
|
529
|
+
def __str__(self) -> str:
|
|
530
|
+
"""
|
|
531
|
+
:return: String identifier with rate value
|
|
532
|
+
"""
|
|
533
|
+
return f"poisson@{self.rate:.2f}"
|
|
534
|
+
|
|
535
|
+
@property
|
|
536
|
+
def processes_limit(self) -> PositiveInt | None:
|
|
537
|
+
"""
|
|
538
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
539
|
+
"""
|
|
540
|
+
return self.max_concurrency
|
|
541
|
+
|
|
542
|
+
@property
|
|
543
|
+
def requests_limit(self) -> PositiveInt | None:
|
|
544
|
+
"""
|
|
545
|
+
:return: Max concurrency if set, otherwise None for unlimited
|
|
546
|
+
"""
|
|
547
|
+
return self.max_concurrency
|
|
548
|
+
|
|
549
|
+
def init_processes_timings(self, worker_count: int, max_concurrency: int):
|
|
550
|
+
"""
|
|
551
|
+
Initialize Poisson-specific timing state.
|
|
552
|
+
|
|
553
|
+
Sets up shared offset value for coordinating exponentially distributed
|
|
554
|
+
request timing across worker processes.
|
|
555
|
+
|
|
556
|
+
:param worker_count: Number of worker processes to coordinate
|
|
557
|
+
:param max_concurrency: Maximum number of concurrent requests allowed
|
|
558
|
+
"""
|
|
559
|
+
self._offset = Value("d", -1.0)
|
|
560
|
+
# Call base implementation last to avoid
|
|
561
|
+
# setting Event before offset is ready
|
|
562
|
+
super().init_processes_timings(worker_count, max_concurrency)
|
|
563
|
+
|
|
564
|
+
def init_processes_start(self, start_time: float):
|
|
565
|
+
"""
|
|
566
|
+
Initialize the offset time for Poisson timing calculations.
|
|
567
|
+
|
|
568
|
+
Sets the initial timing offset from which exponentially distributed
|
|
569
|
+
intervals are calculated.
|
|
570
|
+
|
|
571
|
+
:param start_time: Unix timestamp when request processing should begin
|
|
572
|
+
"""
|
|
573
|
+
ThroughputStrategy.init_processes_start(self, start_time)
|
|
574
|
+
|
|
575
|
+
if self._offset is None:
|
|
576
|
+
raise RuntimeError(
|
|
577
|
+
"_offset is None in init_processes_start; was "
|
|
578
|
+
"init_processes_timings not called?"
|
|
579
|
+
)
|
|
580
|
+
with self._offset.get_lock():
|
|
581
|
+
self._offset.value = start_time
|
|
582
|
+
|
|
583
|
+
async def next_request_time(self, worker_index: PositiveInt) -> float:
|
|
584
|
+
"""
|
|
585
|
+
Calculate next request time using exponential distribution.
|
|
586
|
+
|
|
587
|
+
Generates inter-arrival times following exponential distribution,
|
|
588
|
+
accumulating delays to produce Poisson-distributed request arrivals.
|
|
589
|
+
|
|
590
|
+
:param worker_index: Unused for Poisson strategy
|
|
591
|
+
:return: Next arrival time based on Poisson process
|
|
592
|
+
"""
|
|
593
|
+
_ = worker_index # unused
|
|
594
|
+
_ = await self.get_processes_start_time() # ensure offset is initialized
|
|
595
|
+
|
|
596
|
+
if self._random is None:
|
|
597
|
+
self._random = random.Random(self.random_seed)
|
|
598
|
+
|
|
599
|
+
next_delay = self._random.expovariate(self.rate)
|
|
600
|
+
|
|
601
|
+
if self._offset is None:
|
|
602
|
+
raise RuntimeError(
|
|
603
|
+
"_offset is None in next_request_time; was "
|
|
604
|
+
"init_processes_timings not called?"
|
|
605
|
+
)
|
|
606
|
+
with self._offset.get_lock():
|
|
607
|
+
self._offset.value += next_delay
|
|
608
|
+
|
|
609
|
+
return self._offset.value
|
|
610
|
+
|
|
611
|
+
def request_completed(self, request_info: RequestInfo):
|
|
612
|
+
"""
|
|
613
|
+
Handle request completion (no-op for Poisson strategy).
|
|
614
|
+
|
|
615
|
+
:param request_info: Completed request metadata (unused)
|
|
616
|
+
"""
|
|
617
|
+
_ = request_info # request_info unused for async poisson strategy
|