guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a18.dist-info/RECORD +0 -62
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Environment abstractions for coordinating scheduler execution across distributed nodes.
|
|
3
|
+
|
|
4
|
+
Provides abstractions that handle synchronization, timing coordination, error
|
|
5
|
+
propagation, and lifecycle management for scheduler execution across single or
|
|
6
|
+
multiple nodes. The Environment protocol defines the interface for distributed
|
|
7
|
+
coordination while NonDistributedEnvironment provides a minimal implementation
|
|
8
|
+
for single-node execution. Environments manage the complete execution lifecycle
|
|
9
|
+
from parameter distribution through result aggregation.
|
|
10
|
+
|
|
11
|
+
Execution Flow:
|
|
12
|
+
1. sync_run_params() - Distribute workload and synchronize parameters
|
|
13
|
+
2. sync_run_start() - Coordinate synchronized start time
|
|
14
|
+
3. update_run_iteration() - Update state after each request iteration
|
|
15
|
+
4. sync_run_error() - Handle and propagate errors across nodes
|
|
16
|
+
5. sync_run_end() - Aggregate results and finalize execution
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import time
|
|
22
|
+
from abc import ABC, abstractmethod
|
|
23
|
+
from collections.abc import AsyncIterator, Iterable
|
|
24
|
+
from typing import Generic
|
|
25
|
+
|
|
26
|
+
from guidellm.scheduler.constraints import Constraint
|
|
27
|
+
from guidellm.scheduler.schemas import (
|
|
28
|
+
MultiTurnRequestT,
|
|
29
|
+
RequestT,
|
|
30
|
+
ResponseT,
|
|
31
|
+
SchedulerState,
|
|
32
|
+
)
|
|
33
|
+
from guidellm.scheduler.strategies import SchedulingStrategy
|
|
34
|
+
from guidellm.schemas import RequestInfo
|
|
35
|
+
from guidellm.settings import settings
|
|
36
|
+
from guidellm.utils import InfoMixin
|
|
37
|
+
|
|
38
|
+
__all__ = ["Environment", "NonDistributedEnvironment"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
|
|
42
|
+
"""
|
|
43
|
+
Abstract interface for coordinating scheduler execution across distributed nodes.
|
|
44
|
+
|
|
45
|
+
Defines the protocol for managing distributed scheduler execution including
|
|
46
|
+
parameter synchronization, timing coordination, state updates, error propagation,
|
|
47
|
+
and result aggregation. Implementations handle distributed coordination complexity
|
|
48
|
+
while providing a unified interface for scheduler orchestration.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
async def sync_run_params(
|
|
53
|
+
self,
|
|
54
|
+
requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
55
|
+
strategy: SchedulingStrategy,
|
|
56
|
+
constraints: dict[str, Constraint],
|
|
57
|
+
) -> tuple[
|
|
58
|
+
Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
59
|
+
SchedulingStrategy,
|
|
60
|
+
dict[str, Constraint],
|
|
61
|
+
]:
|
|
62
|
+
"""
|
|
63
|
+
Synchronize execution parameters across nodes and resolve local scope.
|
|
64
|
+
|
|
65
|
+
:param requests: Complete set of requests to process across all nodes
|
|
66
|
+
:param strategy: Scheduling strategy to apply during execution
|
|
67
|
+
:param constraints: Runtime constraints to enforce during execution
|
|
68
|
+
:return: Tuple of (local_requests, strategy, constraints) for this node
|
|
69
|
+
:raises Exception: If parameter synchronization fails or nodes inconsistent
|
|
70
|
+
"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
async def sync_run_start(self) -> float:
|
|
75
|
+
"""
|
|
76
|
+
Coordinate synchronized start time across all nodes.
|
|
77
|
+
|
|
78
|
+
:return: Unix timestamp when all nodes should begin processing
|
|
79
|
+
:raises Exception: If startup synchronization fails across nodes
|
|
80
|
+
"""
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
async def update_run_iteration(
|
|
85
|
+
self,
|
|
86
|
+
response: ResponseT | None,
|
|
87
|
+
request: RequestT,
|
|
88
|
+
request_info: RequestInfo,
|
|
89
|
+
state: SchedulerState,
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Update environment state with completed request iteration results.
|
|
93
|
+
|
|
94
|
+
:param response: Response generated for the request, if successful
|
|
95
|
+
:param request: The processed request
|
|
96
|
+
:param request_info: Metadata about request processing including timings
|
|
97
|
+
:param state: Current scheduler state with metrics and progress
|
|
98
|
+
:raises Exception: If state update fails or indicates critical errors
|
|
99
|
+
"""
|
|
100
|
+
...
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
async def sync_run_error(self, err: list[Exception] | Exception):
|
|
104
|
+
"""
|
|
105
|
+
Handle and propagate errors across all active nodes.
|
|
106
|
+
|
|
107
|
+
:param err: The exception(s) that occurred during execution
|
|
108
|
+
"""
|
|
109
|
+
...
|
|
110
|
+
|
|
111
|
+
@abstractmethod
|
|
112
|
+
async def sync_run_end(
|
|
113
|
+
self,
|
|
114
|
+
) -> AsyncIterator[
|
|
115
|
+
tuple[
|
|
116
|
+
ResponseT | None,
|
|
117
|
+
RequestT,
|
|
118
|
+
RequestInfo,
|
|
119
|
+
SchedulerState,
|
|
120
|
+
]
|
|
121
|
+
]:
|
|
122
|
+
"""
|
|
123
|
+
Finalize execution and aggregate results from all nodes.
|
|
124
|
+
|
|
125
|
+
:return: Iterator of (response, request, request_info, state) tuples from
|
|
126
|
+
remote nodes in distributed environments, empty for non-distributed
|
|
127
|
+
:raises Exception: Any errors that occurred during execution
|
|
128
|
+
"""
|
|
129
|
+
yield None # type: ignore[misc]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
|
|
133
|
+
"""
|
|
134
|
+
Single-node scheduler execution environment with minimal coordination overhead.
|
|
135
|
+
|
|
136
|
+
Implements the Environment interface with no-op synchronization for local testing,
|
|
137
|
+
development, and single-machine benchmarking. All synchronization methods return
|
|
138
|
+
immediately without distributed coordination logic.
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
::
|
|
142
|
+
from guidellm.scheduler import (
|
|
143
|
+
MaxNumberConstraint,
|
|
144
|
+
NonDistributedEnvironment,
|
|
145
|
+
RequestInfo,
|
|
146
|
+
SchedulerState,
|
|
147
|
+
SynchronousStrategy,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
env = NonDistributedEnvironment()
|
|
151
|
+
requests = [f"req_{ind}" for ind in range(5)]
|
|
152
|
+
strategy = SynchronousStrategy()
|
|
153
|
+
constraints = {"max_num": MaxNumberConstraint(max_num=5)}
|
|
154
|
+
state = SchedulerState()
|
|
155
|
+
|
|
156
|
+
local_req, local_strat, local_const = await env.sync_run_params(
|
|
157
|
+
requests, strategy, constraints
|
|
158
|
+
)
|
|
159
|
+
start_time = await env.sync_run_start()
|
|
160
|
+
for req in local_req:
|
|
161
|
+
state.processed_requests += 1
|
|
162
|
+
await env.update_run_iteration(f"resp_{req}", req, RequestInfo(), state)
|
|
163
|
+
async for nonlocal_req in env.sync_run_end():
|
|
164
|
+
state.processed_requests += 1
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
def __init__(self):
|
|
168
|
+
"""
|
|
169
|
+
Initialize single-node environment with empty error storage.
|
|
170
|
+
"""
|
|
171
|
+
self.run_errors: list[Exception] = []
|
|
172
|
+
|
|
173
|
+
async def sync_run_params(
|
|
174
|
+
self,
|
|
175
|
+
requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
176
|
+
strategy: SchedulingStrategy,
|
|
177
|
+
constraints: dict[str, Constraint],
|
|
178
|
+
) -> tuple[
|
|
179
|
+
Iterable[RequestT | MultiTurnRequestT[RequestT]],
|
|
180
|
+
SchedulingStrategy,
|
|
181
|
+
dict[str, Constraint],
|
|
182
|
+
]:
|
|
183
|
+
"""
|
|
184
|
+
Return parameters unchanged for single-node execution.
|
|
185
|
+
|
|
186
|
+
:param requests: Requests to process locally
|
|
187
|
+
:param strategy: Scheduling strategy to apply during execution
|
|
188
|
+
:param constraints: Runtime constraints to enforce during execution
|
|
189
|
+
:return: Original (requests, strategy, constraints) tuple unchanged
|
|
190
|
+
"""
|
|
191
|
+
return requests, strategy, constraints
|
|
192
|
+
|
|
193
|
+
async def sync_run_start(self) -> float:
|
|
194
|
+
"""
|
|
195
|
+
Return current time plus configured delay for single-node startup.
|
|
196
|
+
|
|
197
|
+
:return: Unix timestamp when execution should begin
|
|
198
|
+
"""
|
|
199
|
+
return time.time() + settings.scheduler_start_delay_non_distributed
|
|
200
|
+
|
|
201
|
+
async def update_run_iteration(
|
|
202
|
+
self,
|
|
203
|
+
response: ResponseT | None,
|
|
204
|
+
request: RequestT,
|
|
205
|
+
request_info: RequestInfo,
|
|
206
|
+
state: SchedulerState,
|
|
207
|
+
):
|
|
208
|
+
"""
|
|
209
|
+
No-op for single-node execution with no distributed state synchronization.
|
|
210
|
+
|
|
211
|
+
:param response: Response generated for the request, if successful
|
|
212
|
+
:param request: The processed request
|
|
213
|
+
:param request_info: Metadata about request processing including timings
|
|
214
|
+
:param state: Current scheduler state with metrics and progress
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
async def sync_run_error(self, err: Exception | list[Exception]):
|
|
218
|
+
"""
|
|
219
|
+
Store error for later propagation during run finalization.
|
|
220
|
+
|
|
221
|
+
:param err: The exception(s) that occurred during execution
|
|
222
|
+
"""
|
|
223
|
+
err = [err] if not isinstance(err, list) else err
|
|
224
|
+
self.run_errors.extend(err)
|
|
225
|
+
|
|
226
|
+
async def sync_run_end(
|
|
227
|
+
self,
|
|
228
|
+
) -> AsyncIterator[
|
|
229
|
+
tuple[
|
|
230
|
+
ResponseT | None,
|
|
231
|
+
RequestT,
|
|
232
|
+
RequestInfo,
|
|
233
|
+
SchedulerState,
|
|
234
|
+
]
|
|
235
|
+
]:
|
|
236
|
+
"""
|
|
237
|
+
Finalize single-node execution and propagate any stored errors.
|
|
238
|
+
|
|
239
|
+
:return: Empty iterator as there are no remote nodes
|
|
240
|
+
:raises Exception: Any error stored during execution via sync_run_error
|
|
241
|
+
"""
|
|
242
|
+
if self.run_errors:
|
|
243
|
+
if len(self.run_errors) == 1:
|
|
244
|
+
raise self.run_errors[0]
|
|
245
|
+
else:
|
|
246
|
+
raise RuntimeError(
|
|
247
|
+
f"Errors occurred during execution: {self.run_errors}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if False:
|
|
251
|
+
# Force compiler to recognize as generator
|
|
252
|
+
yield None # type: ignore[misc]
|