guidellm 0.4.0a169__tar.gz → 0.4.0a180__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- {guidellm-0.4.0a169/src/guidellm.egg-info → guidellm-0.4.0a180}/PKG-INFO +1 -1
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/constraints.py +1 -3
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/environments.py +2 -2
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/scheduler.py +1 -1
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/strategies.py +31 -4
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/worker.py +56 -30
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/worker_group.py +33 -31
- guidellm-0.4.0a180/src/guidellm/version.py +6 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180/src/guidellm.egg-info}/PKG-INFO +1 -1
- guidellm-0.4.0a169/src/guidellm/version.py +0 -6
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/LICENSE +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/MANIFEST.in +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/README.md +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/pyproject.toml +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/setup.cfg +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/setup.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/__main__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/backends/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/backends/backend.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/backends/openai.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/backends/response_handlers.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/benchmarker.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/entrypoints.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/output.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/profile.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/progress.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/scenarios/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/scenarios/chat.json +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/scenarios/rag.json +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/benchmark/schemas.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/collators.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/deserializer.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/file.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/huggingface.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/memory.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/deserializers/synthetic.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/loaders.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/preprocessors/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/preprocessors/formatters.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/preprocessors/mappers.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/preprocessors/preprocessor.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/processor.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/schemas.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/utils/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/data/utils/dataset.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/extras/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/extras/audio.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/extras/vision.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/logger.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/config.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/handlers/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/handlers/chat_completions.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/handlers/completions.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/handlers/tokenizer.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/models.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/server.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/utils.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/preprocess/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/preprocess/dataset.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/presentation/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/presentation/builder.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/presentation/data_models.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/presentation/injector.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/scheduler/schemas.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/schemas/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/schemas/info.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/schemas/request.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/schemas/response.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/schemas/stats.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/settings.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/__init__.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/auto_importer.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/cli.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/colors.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/console.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/default_group.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/dict.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/encoding.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/functions.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/hf_datasets.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/hf_transformers.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/imports.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/messaging.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/mixins.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/pydantic_utils.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/random.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/registry.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/singleton.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/statistics.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/synchronous.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/text.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/utils/typing.py +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm.egg-info/SOURCES.txt +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm.egg-info/dependency_links.txt +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm.egg-info/entry_points.txt +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm.egg-info/requires.txt +0 -0
- {guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm.egg-info/top_level.txt +0 -0
|
@@ -1005,9 +1005,7 @@ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
|
|
|
1005
1005
|
return self.model_dump()
|
|
1006
1006
|
|
|
1007
1007
|
def __call__(
|
|
1008
|
-
self,
|
|
1009
|
-
state: SchedulerState,
|
|
1010
|
-
request_info: RequestInfo, # noqa: ARG002
|
|
1008
|
+
self, state: SchedulerState, _request: RequestInfo
|
|
1011
1009
|
) -> SchedulerUpdateAction:
|
|
1012
1010
|
create_exceeded = state.created_requests >= self.num_requests
|
|
1013
1011
|
processed_exceeded = state.processed_requests >= self.num_requests
|
|
@@ -84,7 +84,7 @@ class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
|
|
|
84
84
|
async def update_run_iteration(
|
|
85
85
|
self,
|
|
86
86
|
response: ResponseT | None,
|
|
87
|
-
request: RequestT,
|
|
87
|
+
request: RequestT | MultiTurnRequestT[RequestT],
|
|
88
88
|
request_info: RequestInfo,
|
|
89
89
|
state: SchedulerState,
|
|
90
90
|
):
|
|
@@ -201,7 +201,7 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
|
|
|
201
201
|
async def update_run_iteration(
|
|
202
202
|
self,
|
|
203
203
|
response: ResponseT | None,
|
|
204
|
-
request: RequestT,
|
|
204
|
+
request: RequestT | MultiTurnRequestT[RequestT],
|
|
205
205
|
request_info: RequestInfo,
|
|
206
206
|
state: SchedulerState,
|
|
207
207
|
):
|
|
@@ -70,8 +70,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
|
|
|
70
70
|
description="Number of worker processes to use for this strategy",
|
|
71
71
|
ge=0,
|
|
72
72
|
)
|
|
73
|
-
max_concurrency: int = Field(
|
|
74
|
-
default=
|
|
73
|
+
max_concurrency: int | None = Field(
|
|
74
|
+
default=None,
|
|
75
75
|
description="Maximum number of concurrent requests to allow",
|
|
76
76
|
ge=0,
|
|
77
77
|
)
|
|
@@ -122,8 +122,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
|
|
|
122
122
|
self.startup_duration = startup_duration
|
|
123
123
|
|
|
124
124
|
self._processes_request_index = Value("i", 0)
|
|
125
|
-
self._processes_lock = Lock()
|
|
126
125
|
self._processes_start_time = Value("d", -1.0)
|
|
126
|
+
self._processes_lock = Lock()
|
|
127
127
|
|
|
128
128
|
def init_processes_start(self, start_time: float):
|
|
129
129
|
"""
|
|
@@ -137,6 +137,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
|
|
|
137
137
|
"SchedulingStrategy init_processes_start called before "
|
|
138
138
|
"init_processes_timings"
|
|
139
139
|
)
|
|
140
|
+
if self._processes_start_time is None:
|
|
141
|
+
raise RuntimeError(
|
|
142
|
+
"_processes_lock is not None but _processes_start_time is None"
|
|
143
|
+
)
|
|
140
144
|
|
|
141
145
|
with self._processes_lock:
|
|
142
146
|
self._processes_start_time.value = start_time
|
|
@@ -153,6 +157,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
|
|
|
153
157
|
"SchedulingStrategy get_processes_start_time called before "
|
|
154
158
|
"init_processes_timings"
|
|
155
159
|
)
|
|
160
|
+
if self._processes_start_time is None:
|
|
161
|
+
raise RuntimeError(
|
|
162
|
+
"_processes_lock is not None but _processes_start_time is None"
|
|
163
|
+
)
|
|
156
164
|
|
|
157
165
|
while self._cached_processes_start_time is None:
|
|
158
166
|
with self._processes_lock:
|
|
@@ -175,6 +183,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
|
|
|
175
183
|
"SchedulingStrategy next_request_index called before "
|
|
176
184
|
"init_processes_timings"
|
|
177
185
|
)
|
|
186
|
+
if self._processes_request_index is None:
|
|
187
|
+
raise RuntimeError(
|
|
188
|
+
"_processes_lock is not None but _processes_request_index is None"
|
|
189
|
+
)
|
|
178
190
|
|
|
179
191
|
with self._processes_lock:
|
|
180
192
|
self._processes_request_index.value += 1
|
|
@@ -369,7 +381,8 @@ class ThroughputStrategy(SchedulingStrategy):
|
|
|
369
381
|
start_time = await self.get_processes_start_time()
|
|
370
382
|
|
|
371
383
|
if (
|
|
372
|
-
self.
|
|
384
|
+
self.max_concurrency is not None
|
|
385
|
+
and self.startup_duration > 0
|
|
373
386
|
and (time.time() - start_time) < self.startup_duration
|
|
374
387
|
and (current_index := self.next_request_index()) <= self.max_concurrency
|
|
375
388
|
):
|
|
@@ -477,6 +490,8 @@ class AsyncPoissonStrategy(ThroughputStrategy):
|
|
|
477
490
|
:param startup_duration: Duration in seconds for request startup ramping
|
|
478
491
|
"""
|
|
479
492
|
super().init_processes_timings(worker_count, max_concurrency, startup_duration)
|
|
493
|
+
if self._processes_lock is None:
|
|
494
|
+
raise RuntimeError("_processes_lock is None in init_processes_timings")
|
|
480
495
|
with self._processes_lock:
|
|
481
496
|
self._offset = Value("d", -1.0)
|
|
482
497
|
|
|
@@ -487,6 +502,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
|
|
|
487
502
|
:param start_time: Unix timestamp when request processing should begin
|
|
488
503
|
"""
|
|
489
504
|
ThroughputStrategy.init_processes_start(self, start_time)
|
|
505
|
+
|
|
506
|
+
if self._processes_lock is None:
|
|
507
|
+
raise RuntimeError("_processes_lock is None in init_processes_start")
|
|
508
|
+
if self._offset is None:
|
|
509
|
+
raise RuntimeError("_offset is None in init_processes_start; was "
|
|
510
|
+
"init_processes_timings not called?")
|
|
490
511
|
with self._processes_lock:
|
|
491
512
|
self._offset.value = start_time
|
|
492
513
|
|
|
@@ -505,6 +526,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
|
|
|
505
526
|
|
|
506
527
|
next_delay = self._random.expovariate(self.rate)
|
|
507
528
|
|
|
529
|
+
if self._processes_lock is None:
|
|
530
|
+
raise RuntimeError("_processes_lock is None in next_request_time; was "
|
|
531
|
+
"init_processes_timings not called?")
|
|
532
|
+
if self._offset is None:
|
|
533
|
+
raise RuntimeError("_offset is None in next_request_time; was "
|
|
534
|
+
"init_processes_timings not called?")
|
|
508
535
|
with self._processes_lock:
|
|
509
536
|
self._offset.value += next_delay
|
|
510
537
|
|
|
@@ -23,11 +23,9 @@ try:
|
|
|
23
23
|
bool, "Flag indicating uvloop availability for event loop optimization"
|
|
24
24
|
] = True
|
|
25
25
|
except ImportError:
|
|
26
|
-
uvloop = None
|
|
26
|
+
uvloop = None # type: ignore[assignment] # Optional dependency
|
|
27
27
|
|
|
28
|
-
HAS_UVLOOP
|
|
29
|
-
bool, "Flag indicating uvloop availability for event loop optimization"
|
|
30
|
-
] = False
|
|
28
|
+
HAS_UVLOOP = False
|
|
31
29
|
|
|
32
30
|
|
|
33
31
|
from guidellm.scheduler.schemas import (
|
|
@@ -84,6 +82,10 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
|
|
|
84
82
|
RequestT | MultiTurnRequestT[RequestT],
|
|
85
83
|
RequestInfo,
|
|
86
84
|
],
|
|
85
|
+
tuple[
|
|
86
|
+
RequestT | MultiTurnRequestT[RequestT],
|
|
87
|
+
RequestInfo,
|
|
88
|
+
],
|
|
87
89
|
],
|
|
88
90
|
backend: BackendInterface[RequestT, ResponseT],
|
|
89
91
|
strategy: SchedulingStrategy,
|
|
@@ -201,8 +203,11 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
|
|
|
201
203
|
|
|
202
204
|
async def _stop_monitor(
|
|
203
205
|
self,
|
|
204
|
-
) ->
|
|
205
|
-
"""
|
|
206
|
+
) -> None:
|
|
207
|
+
"""
|
|
208
|
+
Monitor shutdown and error events for worker termination.
|
|
209
|
+
:raises RuntimeError if the work process received an error signal.
|
|
210
|
+
"""
|
|
206
211
|
exit_key = await wait_for_sync_objects(
|
|
207
212
|
{
|
|
208
213
|
"error_event": self.error_event,
|
|
@@ -322,7 +327,7 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
|
|
|
322
327
|
"""Cancel all remaining queued requests until worker process terminates."""
|
|
323
328
|
while True:
|
|
324
329
|
try:
|
|
325
|
-
request: RequestT
|
|
330
|
+
request: RequestT | MultiTurnRequestT[RequestT]
|
|
326
331
|
request_info: RequestInfo
|
|
327
332
|
request, request_info = await self.messaging.get(
|
|
328
333
|
timeout=self.messaging.poll_interval
|
|
@@ -350,31 +355,19 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
|
|
|
350
355
|
|
|
351
356
|
try:
|
|
352
357
|
# Pull request from the queue, update state, and send "pending" update
|
|
353
|
-
request, request_info = await self.
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
raise NotImplementedError("Multi-turn requests are not yet supported")
|
|
363
|
-
|
|
364
|
-
# Schedule the request
|
|
365
|
-
current_time = time.time()
|
|
366
|
-
request_info.timings.scheduled_at = current_time
|
|
367
|
-
if target_start > current_time:
|
|
368
|
-
await asyncio.sleep(target_start - current_time)
|
|
369
|
-
# Adapt delay so that scheduled at reflects the sleep time
|
|
370
|
-
request_info.timings.scheduled_at = target_start
|
|
371
|
-
|
|
372
|
-
# Process the request with the backend
|
|
373
|
-
request_info.timings.resolve_start = time.time()
|
|
374
|
-
self._send_update("in_progress", response, request, request_info)
|
|
375
|
-
async for resp, info in self.backend.resolve(request, request_info, None):
|
|
358
|
+
request, request_info = await self._dequeue_next_request(target_start)
|
|
359
|
+
|
|
360
|
+
# Schedule the request and send "in_progress" update
|
|
361
|
+
await self._schedule_request(request, request_info, target_start)
|
|
362
|
+
|
|
363
|
+
async for resp, info in self.backend.resolve( # type: ignore[attr-defined]
|
|
364
|
+
request, request_info, None
|
|
365
|
+
):
|
|
366
|
+
|
|
376
367
|
response = resp
|
|
377
368
|
request_info = info
|
|
369
|
+
if request_info is None:
|
|
370
|
+
raise RuntimeError("Received invalid request info from backend")
|
|
378
371
|
|
|
379
372
|
# Complete the request
|
|
380
373
|
request_info.timings.resolve_end = time.time()
|
|
@@ -397,6 +390,39 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
|
|
|
397
390
|
if request_info is not None:
|
|
398
391
|
self.strategy.request_completed(request_info)
|
|
399
392
|
|
|
393
|
+
async def _dequeue_next_request(
|
|
394
|
+
self, target_start: float
|
|
395
|
+
) -> tuple[RequestT, RequestInfo]:
|
|
396
|
+
request, request_info = await self.messaging.get()
|
|
397
|
+
dequeued_time = time.time() # Ensure accurate dequeue timing
|
|
398
|
+
if request is None or request_info is None:
|
|
399
|
+
raise RuntimeError("Received invalid request or request info")
|
|
400
|
+
if isinstance(request, list | tuple):
|
|
401
|
+
raise NotImplementedError("Multi-turn requests are not yet supported")
|
|
402
|
+
|
|
403
|
+
request_info.timings.dequeued = dequeued_time
|
|
404
|
+
request_info.scheduler_node_id = self.messaging.worker_index or -1
|
|
405
|
+
request_info.timings.targeted_start = target_start
|
|
406
|
+
self._send_update("pending", None, request, request_info)
|
|
407
|
+
return request, request_info
|
|
408
|
+
|
|
409
|
+
async def _schedule_request(
|
|
410
|
+
self,
|
|
411
|
+
request: RequestT,
|
|
412
|
+
request_info: RequestInfo,
|
|
413
|
+
target_start: float
|
|
414
|
+
):
|
|
415
|
+
current_time = time.time()
|
|
416
|
+
request_info.timings.scheduled_at = current_time
|
|
417
|
+
if target_start > current_time:
|
|
418
|
+
await asyncio.sleep(target_start - current_time)
|
|
419
|
+
# Adapt delay so that scheduled at reflects the sleep time
|
|
420
|
+
request_info.timings.scheduled_at = target_start
|
|
421
|
+
|
|
422
|
+
# Process the request with the backend
|
|
423
|
+
request_info.timings.resolve_start = time.time()
|
|
424
|
+
self._send_update("in_progress", None, request, request_info)
|
|
425
|
+
|
|
400
426
|
def _send_update(
|
|
401
427
|
self,
|
|
402
428
|
new_status: Literal[
|
|
@@ -84,7 +84,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
|
|
|
84
84
|
backend: BackendInterface[RequestT, ResponseT],
|
|
85
85
|
strategy: SchedulingStrategy,
|
|
86
86
|
startup_duration: float,
|
|
87
|
-
**constraints:
|
|
87
|
+
**constraints: Constraint,
|
|
88
88
|
):
|
|
89
89
|
"""
|
|
90
90
|
Initialize a worker process group for distributed request processing.
|
|
@@ -232,7 +232,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
|
|
|
232
232
|
worker_index=rank,
|
|
233
233
|
max_buffer_send_size=None,
|
|
234
234
|
max_buffer_receive_size=per_proc_max_buffer_size,
|
|
235
|
-
),
|
|
235
|
+
), # The non-group worker lacks the SchedulerState type. Type err.
|
|
236
236
|
backend=self.backend,
|
|
237
237
|
strategy=self.strategy,
|
|
238
238
|
async_limit=async_limit,
|
|
@@ -478,9 +478,9 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
|
|
|
478
478
|
num_processes=len(processes),
|
|
479
479
|
start_time=start_time,
|
|
480
480
|
)
|
|
481
|
-
self.
|
|
482
|
-
self.
|
|
483
|
-
self.
|
|
481
|
+
self._queued_request_ids: set[str] = set()
|
|
482
|
+
self._pending_request_ids: set[str] = set()
|
|
483
|
+
self._processing_request_ids: set[str] = set()
|
|
484
484
|
|
|
485
485
|
def requests_generator(
|
|
486
486
|
self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
|
|
@@ -517,11 +517,13 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
|
|
|
517
517
|
)
|
|
518
518
|
state_update = self._locked_update(request_info)
|
|
519
519
|
request_info.timings.queued = time.time()
|
|
520
|
+
if self.messaging.buffer_receive_queue is None:
|
|
521
|
+
raise RuntimeError("buffer receive queue is None")
|
|
520
522
|
self.messaging.buffer_receive_queue.sync_put(
|
|
521
523
|
(None, request, request_info, state_update.state)
|
|
522
524
|
)
|
|
523
525
|
|
|
524
|
-
yield
|
|
526
|
+
yield request, request_info
|
|
525
527
|
|
|
526
528
|
if state_update.stop_queueing:
|
|
527
529
|
self.stop_send_requests_event.set()
|
|
@@ -530,8 +532,8 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
|
|
|
530
532
|
# Reached the end, inject a RequestsExhaustedConstraint to record
|
|
531
533
|
self._locked_update(
|
|
532
534
|
info=None,
|
|
533
|
-
|
|
534
|
-
"requests_exhausted": RequestsExhaustedConstraint(
|
|
535
|
+
add_constraints={
|
|
536
|
+
"requests_exhausted": RequestsExhaustedConstraint( # type: ignore[dict-item]
|
|
535
537
|
num_requests=count
|
|
536
538
|
)
|
|
537
539
|
},
|
|
@@ -610,10 +612,10 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
|
|
|
610
612
|
def _locked_update(
|
|
611
613
|
self,
|
|
612
614
|
info: RequestInfo | None = None,
|
|
613
|
-
|
|
615
|
+
add_constraints: dict[str, Constraint] | None = None,
|
|
614
616
|
) -> _StateUpdate:
|
|
615
617
|
with self._update_lock:
|
|
616
|
-
if add_constraints:
|
|
618
|
+
if add_constraints is not None:
|
|
617
619
|
self.constraints.update(add_constraints)
|
|
618
620
|
|
|
619
621
|
if info is not None:
|
|
@@ -631,34 +633,34 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
|
|
|
631
633
|
|
|
632
634
|
def _update_state_request_counts(self, info: RequestInfo):
|
|
633
635
|
if info.status == "queued":
|
|
634
|
-
self.
|
|
635
|
-
self._state.queued_requests = len(self.
|
|
636
|
+
self._queued_request_ids.add(info.request_id)
|
|
637
|
+
self._state.queued_requests = len(self._queued_request_ids)
|
|
636
638
|
self._state.created_requests += 1
|
|
637
639
|
elif info.status == "pending":
|
|
638
|
-
self.
|
|
639
|
-
self._state.queued_requests = len(self.
|
|
640
|
-
self.
|
|
641
|
-
self._state.pending_requests = len(self.
|
|
640
|
+
self._queued_request_ids.remove(info.request_id)
|
|
641
|
+
self._state.queued_requests = len(self._queued_request_ids)
|
|
642
|
+
self._pending_request_ids.add(info.request_id)
|
|
643
|
+
self._state.pending_requests = len(self._pending_request_ids)
|
|
642
644
|
elif info.status == "in_progress":
|
|
643
|
-
self.
|
|
644
|
-
self._state.pending_requests = len(self.
|
|
645
|
-
self.
|
|
646
|
-
self._state.processing_requests = len(self.
|
|
645
|
+
self._pending_request_ids.remove(info.request_id)
|
|
646
|
+
self._state.pending_requests = len(self._pending_request_ids)
|
|
647
|
+
self._processing_request_ids.add(info.request_id)
|
|
648
|
+
self._state.processing_requests = len(self._processing_request_ids)
|
|
647
649
|
elif info.status == "completed":
|
|
648
|
-
self.
|
|
649
|
-
self._state.processing_requests = len(self.
|
|
650
|
+
self._processing_request_ids.remove(info.request_id)
|
|
651
|
+
self._state.processing_requests = len(self._processing_request_ids)
|
|
650
652
|
self._state.processed_requests += 1
|
|
651
653
|
self._state.successful_requests += 1
|
|
652
654
|
elif info.status in ("errored", "cancelled"):
|
|
653
|
-
if info.request_id in self.
|
|
654
|
-
self.
|
|
655
|
-
self._state.queued_requests = len(self.
|
|
656
|
-
elif info.request_id in self.
|
|
657
|
-
self.
|
|
658
|
-
self._state.pending_requests = len(self.
|
|
659
|
-
elif info.request_id in self.
|
|
660
|
-
self.
|
|
661
|
-
self._state.processing_requests = len(self.
|
|
655
|
+
if info.request_id in self._queued_request_ids:
|
|
656
|
+
self._queued_request_ids.remove(info.request_id)
|
|
657
|
+
self._state.queued_requests = len(self._queued_request_ids)
|
|
658
|
+
elif info.request_id in self._pending_request_ids:
|
|
659
|
+
self._pending_request_ids.remove(info.request_id)
|
|
660
|
+
self._state.pending_requests = len(self._pending_request_ids)
|
|
661
|
+
elif info.request_id in self._processing_request_ids:
|
|
662
|
+
self._processing_request_ids.remove(info.request_id)
|
|
663
|
+
self._state.processing_requests = len(self._processing_request_ids)
|
|
662
664
|
|
|
663
665
|
self._state.processed_requests += 1
|
|
664
666
|
self._state.errored_requests += 1 if info.status == "errored" else 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{guidellm-0.4.0a169 → guidellm-0.4.0a180}/src/guidellm/mock_server/handlers/chat_completions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|