guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +452 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +150 -317
- guidellm/benchmark/entrypoints.py +467 -128
- guidellm/benchmark/output.py +519 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2086 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +144 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +348 -0
- guidellm/data/loaders.py +149 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +404 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +31 -0
- guidellm/data/processor.py +31 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +226 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +71 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
- guidellm-0.4.0a169.dist-info/RECORD +95 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core data structures and interfaces for the GuideLLM scheduler system.
|
|
3
|
+
|
|
4
|
+
Provides type-safe abstractions for distributed request processing, timing
|
|
5
|
+
measurements, and backend interfaces for benchmarking operations. Central to
|
|
6
|
+
the scheduler architecture, enabling request lifecycle tracking, backend
|
|
7
|
+
coordination, and state management across distributed worker processes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from collections.abc import AsyncIterator
|
|
14
|
+
from typing import Any, Generic, Literal, Protocol, TypeVar
|
|
15
|
+
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
from typing_extensions import TypeAliasType, TypedDict
|
|
18
|
+
|
|
19
|
+
from guidellm.schemas import RequestInfo
|
|
20
|
+
from guidellm.utils import RegistryMixin, StandardBaseModel
|
|
21
|
+
from guidellm.utils.registry import RegistryObjT
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"BackendInterface",
|
|
25
|
+
"BackendT",
|
|
26
|
+
"MultiTurnRequestT",
|
|
27
|
+
"RequestT",
|
|
28
|
+
"ResponseT",
|
|
29
|
+
"SchedulerMessagingPydanticRegistry",
|
|
30
|
+
"SchedulerState",
|
|
31
|
+
"SchedulerUpdateAction",
|
|
32
|
+
"SchedulerUpdateActionProgress",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
RequestT = TypeVar("RequestT")
|
|
36
|
+
"Generic request object type for scheduler processing"
|
|
37
|
+
|
|
38
|
+
ResponseT = TypeVar("ResponseT")
|
|
39
|
+
"Generic response object type returned by backend processing"
|
|
40
|
+
|
|
41
|
+
MultiTurnRequestT = TypeAliasType(
|
|
42
|
+
"MultiTurnRequestT",
|
|
43
|
+
list[RequestT | tuple[RequestT, float]] | tuple[RequestT | tuple[RequestT, float]],
|
|
44
|
+
type_params=(RequestT,),
|
|
45
|
+
)
|
|
46
|
+
"Multi-turn request structure supporting conversation history with optional delays"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SchedulerMessagingPydanticRegistry(RegistryMixin[RegistryObjT]):
|
|
50
|
+
"""
|
|
51
|
+
Registry for Pydantic types used in scheduler inter-process messaging.
|
|
52
|
+
|
|
53
|
+
Enables generic interface for defining Pydantic class types used for
|
|
54
|
+
communication between distributed scheduler components and worker processes.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class BackendInterface(Protocol, Generic[RequestT, ResponseT]):
|
|
59
|
+
"""
|
|
60
|
+
Protocol defining the interface for request processing backends.
|
|
61
|
+
|
|
62
|
+
Establishes the contract for backend implementations that process requests
|
|
63
|
+
within the scheduler system. Backends manage initialization, validation,
|
|
64
|
+
processing, and shutdown lifecycle. All properties must be pickleable before
|
|
65
|
+
process_startup is called for multi-process environments.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
::
|
|
69
|
+
class CustomBackend(BackendInterface):
|
|
70
|
+
@property
|
|
71
|
+
def processes_limit(self) -> int:
|
|
72
|
+
return 4
|
|
73
|
+
|
|
74
|
+
async def resolve(self, request, request_info, history=None):
|
|
75
|
+
yield response, updated_request_info
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def processes_limit(self) -> int | None:
|
|
80
|
+
"""
|
|
81
|
+
:return: Maximum worker processes supported, or None if unlimited
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def requests_limit(self) -> int | None:
|
|
86
|
+
"""
|
|
87
|
+
:return: Maximum concurrent requests supported, or None if unlimited
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def info(self) -> dict[str, Any]:
|
|
92
|
+
"""
|
|
93
|
+
:return: Backend metadata including model initialization and configuration
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
async def process_startup(self) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Perform backend initialization and startup procedures.
|
|
99
|
+
|
|
100
|
+
:raises Exception: Implementation-specific exceptions for startup failures
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
async def validate(self) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Validate backend configuration and operational status.
|
|
106
|
+
|
|
107
|
+
:raises Exception: Implementation-specific exceptions for validation failures
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
async def process_shutdown(self) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Perform backend cleanup and shutdown procedures.
|
|
113
|
+
|
|
114
|
+
:raises Exception: Implementation-specific exceptions for shutdown failures
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
async def resolve(
|
|
118
|
+
self,
|
|
119
|
+
request: RequestT,
|
|
120
|
+
request_info: RequestInfo,
|
|
121
|
+
history: list[tuple[RequestT, ResponseT]] | None = None,
|
|
122
|
+
) -> AsyncIterator[tuple[ResponseT, RequestInfo]]:
|
|
123
|
+
"""
|
|
124
|
+
Process a request and yield incremental response updates.
|
|
125
|
+
|
|
126
|
+
:param request: The request object to process
|
|
127
|
+
:param request_info: Scheduling metadata and timing information
|
|
128
|
+
:param history: Conversation history for multi-turn requests
|
|
129
|
+
:yield: Tuples of (response, updated_request_info) for each response chunk
|
|
130
|
+
:raises Exception: Implementation-specific exceptions for processing failures
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
BackendT = TypeVar("BackendT", bound=BackendInterface)
|
|
135
|
+
"Generic backend interface type for request processing"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class SchedulerUpdateActionProgress(TypedDict, total=False):
|
|
139
|
+
"""
|
|
140
|
+
Progress tracking data for scheduler operations.
|
|
141
|
+
|
|
142
|
+
Provides estimates for remaining work in scheduler operations, including
|
|
143
|
+
fraction complete, request counts, and duration. Used by constraints and
|
|
144
|
+
monitoring systems to track execution progress and make termination decisions.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
remaining_fraction: float | None
|
|
148
|
+
remaining_requests: float | None
|
|
149
|
+
remaining_duration: float | None
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class SchedulerUpdateAction(StandardBaseModel):
|
|
153
|
+
"""
|
|
154
|
+
Control directives for scheduler behavior and operations.
|
|
155
|
+
|
|
156
|
+
Encapsulates control signals for scheduler operations including request
|
|
157
|
+
queuing and processing directives. Used by constraints to communicate
|
|
158
|
+
termination conditions and progress to scheduler components.
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
::
|
|
162
|
+
action = SchedulerUpdateAction(
|
|
163
|
+
request_queuing="stop",
|
|
164
|
+
request_processing="continue",
|
|
165
|
+
metadata={"reason": "max_requests_reached"}
|
|
166
|
+
)
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
request_queuing: Literal["continue", "stop"] = Field(
|
|
170
|
+
default="continue", description="Action to take for request queuing operations"
|
|
171
|
+
)
|
|
172
|
+
request_processing: Literal["continue", "stop_local", "stop_all"] = Field(
|
|
173
|
+
default="continue",
|
|
174
|
+
description="Action to take for request processing operations",
|
|
175
|
+
)
|
|
176
|
+
metadata: dict[str, Any] = Field(
|
|
177
|
+
default_factory=dict,
|
|
178
|
+
description="Additional context and data for the scheduler action",
|
|
179
|
+
)
|
|
180
|
+
progress: SchedulerUpdateActionProgress = Field(
|
|
181
|
+
default_factory=lambda: SchedulerUpdateActionProgress(),
|
|
182
|
+
description="Progress information for the scheduler action",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class SchedulerState(StandardBaseModel):
|
|
187
|
+
"""
|
|
188
|
+
Comprehensive state tracking for scheduler execution.
|
|
189
|
+
|
|
190
|
+
Tracks scheduler execution progress, request counts, timing information,
|
|
191
|
+
and constraint enforcement. Central to scheduler coordination, providing
|
|
192
|
+
real-time metrics for monitoring and decision-making across distributed
|
|
193
|
+
worker processes.
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
::
|
|
197
|
+
state = SchedulerState(node_id=0, num_processes=4)
|
|
198
|
+
state.created_requests += 1
|
|
199
|
+
state.queued_requests += 1
|
|
200
|
+
completion_rate = state.processed_requests / state.created_requests
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
node_id: int = Field(
|
|
204
|
+
description="Unique identifier for this scheduler node", default=-1
|
|
205
|
+
)
|
|
206
|
+
num_processes: int = Field(
|
|
207
|
+
description="Number of worker processes in this scheduler", default=-1
|
|
208
|
+
)
|
|
209
|
+
start_time: float = Field(
|
|
210
|
+
description="Unix timestamp when the scheduler started",
|
|
211
|
+
default_factory=time.time,
|
|
212
|
+
)
|
|
213
|
+
end_time: float | None = Field(
|
|
214
|
+
default=None, description="Unix timestamp when the scheduler stopped"
|
|
215
|
+
)
|
|
216
|
+
end_queuing_time: float | None = Field(
|
|
217
|
+
default=None, description="Unix timestamp when request queuing stopped"
|
|
218
|
+
)
|
|
219
|
+
end_queuing_constraints: dict[str, SchedulerUpdateAction] = Field(
|
|
220
|
+
default_factory=dict,
|
|
221
|
+
description="Constraints that triggered queuing termination",
|
|
222
|
+
)
|
|
223
|
+
end_processing_time: float | None = Field(
|
|
224
|
+
default=None, description="Unix timestamp when request processing stopped"
|
|
225
|
+
)
|
|
226
|
+
end_processing_constraints: dict[str, SchedulerUpdateAction] = Field(
|
|
227
|
+
default_factory=dict,
|
|
228
|
+
description="Constraints that triggered processing termination",
|
|
229
|
+
)
|
|
230
|
+
scheduler_constraints: dict[str, SchedulerUpdateAction] = Field(
|
|
231
|
+
default_factory=dict,
|
|
232
|
+
description="Latest state from all constraints applied during scheduler run",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
remaining_fraction: float | None = Field(
|
|
236
|
+
default=None,
|
|
237
|
+
description="Estimated fraction of remaining progress, if known",
|
|
238
|
+
)
|
|
239
|
+
remaining_requests: float | None = Field(
|
|
240
|
+
default=None,
|
|
241
|
+
description="Estimated number of remaining requests to process, if known",
|
|
242
|
+
)
|
|
243
|
+
remaining_duration: float | None = Field(
|
|
244
|
+
default=None,
|
|
245
|
+
description="Estimated remaining time in seconds for scheduler run, if known",
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
created_requests: int = Field(
|
|
249
|
+
default=0, description="Total number of requests created"
|
|
250
|
+
)
|
|
251
|
+
queued_requests: int = Field(
|
|
252
|
+
default=0, description="Total number of requests queued for processing"
|
|
253
|
+
)
|
|
254
|
+
pending_requests: int = Field(
|
|
255
|
+
default=0,
|
|
256
|
+
description="Number of requests pending processing within a worker",
|
|
257
|
+
)
|
|
258
|
+
processing_requests: int = Field(
|
|
259
|
+
default=0, description="Number of requests currently being processed"
|
|
260
|
+
)
|
|
261
|
+
processed_requests: int = Field(
|
|
262
|
+
default=0, description="Number of requests that completed processing"
|
|
263
|
+
)
|
|
264
|
+
successful_requests: int = Field(
|
|
265
|
+
default=0, description="Number of requests that completed successfully"
|
|
266
|
+
)
|
|
267
|
+
errored_requests: int = Field(
|
|
268
|
+
default=0, description="Number of requests that failed with errors"
|
|
269
|
+
)
|
|
270
|
+
cancelled_requests: int = Field(
|
|
271
|
+
default=0, description="Number of requests that were cancelled"
|
|
272
|
+
)
|