guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic schema models for GuideLLM operations.
|
|
3
|
+
|
|
4
|
+
Provides standardized data models and type definitions for generation requests,
|
|
5
|
+
responses, timing measurements, and statistics aggregation. These schemas ensure
|
|
6
|
+
type safety and consistent data handling across the benchmarking pipeline,
|
|
7
|
+
from request submission through backend processing to results compilation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .base import (
|
|
13
|
+
BaseModelT,
|
|
14
|
+
ErroredT,
|
|
15
|
+
IncompleteT,
|
|
16
|
+
PydanticClassRegistryMixin,
|
|
17
|
+
RegisterClassT,
|
|
18
|
+
ReloadableBaseModel,
|
|
19
|
+
StandardBaseDict,
|
|
20
|
+
StandardBaseModel,
|
|
21
|
+
StatusBreakdown,
|
|
22
|
+
SuccessfulT,
|
|
23
|
+
TotalT,
|
|
24
|
+
)
|
|
25
|
+
from .info import RequestInfo, RequestTimings
|
|
26
|
+
from .request import (
|
|
27
|
+
GenerationRequest,
|
|
28
|
+
GenerationRequestArguments,
|
|
29
|
+
GenerativeRequestType,
|
|
30
|
+
UsageMetrics,
|
|
31
|
+
)
|
|
32
|
+
from .request_stats import GenerativeRequestStats
|
|
33
|
+
from .response import GenerationResponse
|
|
34
|
+
from .statistics import (
|
|
35
|
+
DistributionSummary,
|
|
36
|
+
FunctionObjT,
|
|
37
|
+
Percentiles,
|
|
38
|
+
StatusDistributionSummary,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"BaseModelT",
|
|
43
|
+
"DistributionSummary",
|
|
44
|
+
"ErroredT",
|
|
45
|
+
"FunctionObjT",
|
|
46
|
+
"GenerationRequest",
|
|
47
|
+
"GenerationRequestArguments",
|
|
48
|
+
"GenerationResponse",
|
|
49
|
+
"GenerativeRequestStats",
|
|
50
|
+
"GenerativeRequestType",
|
|
51
|
+
"IncompleteT",
|
|
52
|
+
"Percentiles",
|
|
53
|
+
"PydanticClassRegistryMixin",
|
|
54
|
+
"RegisterClassT",
|
|
55
|
+
"ReloadableBaseModel",
|
|
56
|
+
"RequestInfo",
|
|
57
|
+
"RequestTimings",
|
|
58
|
+
"StandardBaseDict",
|
|
59
|
+
"StandardBaseModel",
|
|
60
|
+
"StatusBreakdown",
|
|
61
|
+
"StatusDistributionSummary",
|
|
62
|
+
"SuccessfulT",
|
|
63
|
+
"TotalT",
|
|
64
|
+
"UsageMetrics",
|
|
65
|
+
]
|
guidellm/schemas/base.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic utilities for polymorphic model serialization and registry integration.
|
|
3
|
+
|
|
4
|
+
Provides integration between Pydantic and the registry system, enabling
|
|
5
|
+
polymorphic serialization and deserialization of Pydantic models using
|
|
6
|
+
a discriminator field and dynamic class registry. Includes base model classes
|
|
7
|
+
with standardized configurations and generic status breakdown models for
|
|
8
|
+
structured result organization.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
from typing import Any, ClassVar, Generic, TypeVar, cast, get_args, get_origin
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
|
|
17
|
+
from pydantic_core import CoreSchema, core_schema
|
|
18
|
+
|
|
19
|
+
from guidellm.utils.registry import RegistryMixin
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"BaseModelT",
|
|
23
|
+
"ErroredT",
|
|
24
|
+
"IncompleteT",
|
|
25
|
+
"PydanticClassRegistryMixin",
|
|
26
|
+
"RegisterClassT",
|
|
27
|
+
"ReloadableBaseModel",
|
|
28
|
+
"StandardBaseDict",
|
|
29
|
+
"StandardBaseModel",
|
|
30
|
+
"StatusBreakdown",
|
|
31
|
+
"SuccessfulT",
|
|
32
|
+
"TotalT",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
|
|
37
|
+
RegisterClassT = TypeVar("RegisterClassT", bound=type)
|
|
38
|
+
SuccessfulT = TypeVar("SuccessfulT")
|
|
39
|
+
ErroredT = TypeVar("ErroredT")
|
|
40
|
+
IncompleteT = TypeVar("IncompleteT")
|
|
41
|
+
TotalT = TypeVar("TotalT")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ReloadableBaseModel(BaseModel):
|
|
45
|
+
"""
|
|
46
|
+
Base Pydantic model with schema reloading capabilities.
|
|
47
|
+
|
|
48
|
+
Provides dynamic schema rebuilding functionality for models that need to
|
|
49
|
+
update their validation schemas at runtime, particularly useful when
|
|
50
|
+
working with registry-based polymorphic models where new types are
|
|
51
|
+
registered after initial class definition.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
model_config = ConfigDict(
|
|
55
|
+
extra="ignore",
|
|
56
|
+
use_enum_values=True,
|
|
57
|
+
from_attributes=True,
|
|
58
|
+
arbitrary_types_allowed=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def reload_schema(cls, parents: bool = True) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Reload the class schema with updated registry information.
|
|
65
|
+
|
|
66
|
+
Forces a complete rebuild of the Pydantic model schema to incorporate
|
|
67
|
+
any changes made to associated registries or validation rules.
|
|
68
|
+
|
|
69
|
+
:param parents: Whether to also rebuild schemas for any pydantic parent
|
|
70
|
+
types that reference this model.
|
|
71
|
+
"""
|
|
72
|
+
cls.model_rebuild(force=True)
|
|
73
|
+
|
|
74
|
+
if parents:
|
|
75
|
+
cls.reload_parent_schemas()
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def reload_parent_schemas(cls):
|
|
79
|
+
"""
|
|
80
|
+
Recursively reload schemas for all parent Pydantic models.
|
|
81
|
+
|
|
82
|
+
Traverses the inheritance hierarchy to find all parent classes that
|
|
83
|
+
are Pydantic models and triggers schema rebuilding on each to ensure
|
|
84
|
+
that any changes in child models are reflected in parent schemas.
|
|
85
|
+
"""
|
|
86
|
+
potential_parents: set[type[BaseModel]] = {BaseModel}
|
|
87
|
+
stack: list[type[BaseModel]] = [BaseModel]
|
|
88
|
+
|
|
89
|
+
while stack:
|
|
90
|
+
current = stack.pop()
|
|
91
|
+
for subclass in current.__subclasses__():
|
|
92
|
+
if (
|
|
93
|
+
issubclass(subclass, BaseModel)
|
|
94
|
+
and subclass is not cls
|
|
95
|
+
and subclass not in potential_parents
|
|
96
|
+
):
|
|
97
|
+
potential_parents.add(subclass)
|
|
98
|
+
stack.append(subclass)
|
|
99
|
+
|
|
100
|
+
for check in cls.__mro__:
|
|
101
|
+
if isinstance(check, type) and issubclass(check, BaseModel):
|
|
102
|
+
cls._reload_schemas_depending_on(check, potential_parents)
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def _reload_schemas_depending_on(cls, target: type[BaseModel], types: set[type]):
|
|
106
|
+
changed = True
|
|
107
|
+
while changed:
|
|
108
|
+
changed = False
|
|
109
|
+
for candidate in types:
|
|
110
|
+
if (
|
|
111
|
+
isinstance(candidate, type)
|
|
112
|
+
and issubclass(candidate, BaseModel)
|
|
113
|
+
and any(
|
|
114
|
+
cls._uses_type(target, field_info.annotation)
|
|
115
|
+
for field_info in candidate.model_fields.values()
|
|
116
|
+
if field_info.annotation is not None
|
|
117
|
+
)
|
|
118
|
+
):
|
|
119
|
+
try:
|
|
120
|
+
before = candidate.model_json_schema()
|
|
121
|
+
except Exception: # noqa: BLE001
|
|
122
|
+
before = None
|
|
123
|
+
candidate.model_rebuild(force=True)
|
|
124
|
+
if before is not None:
|
|
125
|
+
after = candidate.model_json_schema()
|
|
126
|
+
changed |= before != after
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def _uses_type(cls, target: type, candidate: type) -> bool:
|
|
130
|
+
if target is candidate:
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
origin = get_origin(candidate)
|
|
134
|
+
|
|
135
|
+
if origin is None:
|
|
136
|
+
return isinstance(candidate, type) and issubclass(candidate, target)
|
|
137
|
+
|
|
138
|
+
if isinstance(origin, type) and (
|
|
139
|
+
target is origin or issubclass(origin, target)
|
|
140
|
+
):
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
for arg in get_args(candidate) or []:
|
|
144
|
+
if isinstance(arg, type) and cls._uses_type(target, arg):
|
|
145
|
+
return True
|
|
146
|
+
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class StandardBaseModel(BaseModel):
|
|
151
|
+
"""
|
|
152
|
+
Base Pydantic model with standardized configuration for GuideLLM.
|
|
153
|
+
|
|
154
|
+
Provides consistent validation behavior and configuration settings across
|
|
155
|
+
all Pydantic models in the application, including field validation,
|
|
156
|
+
attribute conversion, and default value handling.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
::
|
|
160
|
+
class MyModel(StandardBaseModel):
|
|
161
|
+
name: str
|
|
162
|
+
value: int = 42
|
|
163
|
+
|
|
164
|
+
# Access default values
|
|
165
|
+
default_value = MyModel.get_default("value") # Returns 42
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
model_config = ConfigDict(
|
|
169
|
+
extra="ignore",
|
|
170
|
+
use_enum_values=True,
|
|
171
|
+
from_attributes=True,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def get_default(cls: type[BaseModel], field: str) -> Any:
|
|
176
|
+
"""
|
|
177
|
+
Get default value for a model field.
|
|
178
|
+
|
|
179
|
+
:param field: Name of the field to get the default value for
|
|
180
|
+
:return: Default value of the specified field
|
|
181
|
+
:raises KeyError: If the field does not exist in the model
|
|
182
|
+
"""
|
|
183
|
+
return cls.model_fields[field].default
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class StandardBaseDict(StandardBaseModel):
|
|
187
|
+
"""
|
|
188
|
+
Base Pydantic model allowing arbitrary additional fields.
|
|
189
|
+
|
|
190
|
+
Extends StandardBaseModel to accept extra fields beyond those explicitly
|
|
191
|
+
defined in the model schema. Useful for flexible data structures that
|
|
192
|
+
need to accommodate varying or unknown field sets while maintaining
|
|
193
|
+
type safety for known fields.
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
model_config = ConfigDict(
|
|
197
|
+
extra="allow",
|
|
198
|
+
use_enum_values=True,
|
|
199
|
+
from_attributes=True,
|
|
200
|
+
arbitrary_types_allowed=True,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class StatusBreakdown(BaseModel, Generic[SuccessfulT, ErroredT, IncompleteT, TotalT]):
|
|
205
|
+
"""
|
|
206
|
+
Generic model for organizing results by processing status.
|
|
207
|
+
|
|
208
|
+
Provides structured categorization of results into successful, errored,
|
|
209
|
+
incomplete, and total status groups. Supports flexible typing for each
|
|
210
|
+
status category to accommodate different result types while maintaining
|
|
211
|
+
consistent organization patterns across the application.
|
|
212
|
+
|
|
213
|
+
Example:
|
|
214
|
+
::
|
|
215
|
+
from guidellm.utils import StatusBreakdown
|
|
216
|
+
|
|
217
|
+
# Define a breakdown for request counts
|
|
218
|
+
breakdown = StatusBreakdown[int, int, int, int](
|
|
219
|
+
successful=150,
|
|
220
|
+
errored=5,
|
|
221
|
+
incomplete=10,
|
|
222
|
+
total=165
|
|
223
|
+
)
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
successful: SuccessfulT = Field(
|
|
227
|
+
description="Results or metrics for requests with successful completion status",
|
|
228
|
+
default=None, # type: ignore[assignment]
|
|
229
|
+
)
|
|
230
|
+
errored: ErroredT = Field(
|
|
231
|
+
description="Results or metrics for requests with error completion status",
|
|
232
|
+
default=None, # type: ignore[assignment]
|
|
233
|
+
)
|
|
234
|
+
incomplete: IncompleteT = Field(
|
|
235
|
+
description="Results or metrics for requests with incomplete processing status",
|
|
236
|
+
default=None, # type: ignore[assignment]
|
|
237
|
+
)
|
|
238
|
+
total: TotalT = Field(
|
|
239
|
+
description="Aggregated results or metrics combining all status categories",
|
|
240
|
+
default=None, # type: ignore[assignment]
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class PydanticClassRegistryMixin(
|
|
245
|
+
ReloadableBaseModel, RegistryMixin[type[BaseModelT]], ABC, Generic[BaseModelT]
|
|
246
|
+
):
|
|
247
|
+
"""
|
|
248
|
+
Polymorphic Pydantic model mixin enabling registry-based dynamic instantiation.
|
|
249
|
+
|
|
250
|
+
Integrates Pydantic validation with the registry system to enable polymorphic
|
|
251
|
+
serialization and deserialization based on a discriminator field. Automatically
|
|
252
|
+
instantiates the correct subclass during validation based on registry mappings,
|
|
253
|
+
providing a foundation for extensible plugin-style architectures.
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
::
|
|
257
|
+
from speculators.utils import PydanticClassRegistryMixin
|
|
258
|
+
|
|
259
|
+
class BaseConfig(PydanticClassRegistryMixin["BaseConfig"]):
|
|
260
|
+
schema_discriminator: ClassVar[str] = "config_type"
|
|
261
|
+
config_type: str = Field(description="Configuration type identifier")
|
|
262
|
+
|
|
263
|
+
@classmethod
|
|
264
|
+
def __pydantic_schema_base_type__(cls) -> type["BaseConfig"]:
|
|
265
|
+
return BaseConfig
|
|
266
|
+
|
|
267
|
+
@BaseConfig.register("database")
|
|
268
|
+
class DatabaseConfig(BaseConfig):
|
|
269
|
+
config_type: str = "database"
|
|
270
|
+
connection_string: str = Field(description="Database connection string")
|
|
271
|
+
|
|
272
|
+
# Dynamic instantiation based on discriminator
|
|
273
|
+
config = BaseConfig.model_validate({
|
|
274
|
+
"config_type": "database",
|
|
275
|
+
"connection_string": "postgresql://localhost:5432/db"
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
:cvar schema_discriminator: Field name used for polymorphic type discrimination
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
schema_discriminator: ClassVar[str] = "model_type"
|
|
282
|
+
|
|
283
|
+
def __new__(cls, *args, **kwargs): # noqa: ARG004
|
|
284
|
+
"""
|
|
285
|
+
Prevent direct instantiation of base classes that use this mixin.
|
|
286
|
+
|
|
287
|
+
Only allows instantiation of concrete subclasses, not the base class.
|
|
288
|
+
"""
|
|
289
|
+
base_type = cls.__pydantic_schema_base_type__()
|
|
290
|
+
if cls is base_type:
|
|
291
|
+
raise TypeError(f"only children of '{cls.__name__}' may be instantiated")
|
|
292
|
+
return super().__new__(cls)
|
|
293
|
+
|
|
294
|
+
@classmethod
|
|
295
|
+
def register_decorator(
|
|
296
|
+
cls, clazz: RegisterClassT, name: str | list[str] | None = None
|
|
297
|
+
) -> RegisterClassT:
|
|
298
|
+
"""
|
|
299
|
+
Register a Pydantic model class with type validation and schema reload.
|
|
300
|
+
|
|
301
|
+
Validates that the class is a proper Pydantic BaseModel subclass before
|
|
302
|
+
registering it in the class registry. Automatically triggers schema
|
|
303
|
+
reload to incorporate the new type into polymorphic validation.
|
|
304
|
+
|
|
305
|
+
:param clazz: Pydantic model class to register in the polymorphic hierarchy
|
|
306
|
+
:param name: Registry identifier for the class. Uses class name if None
|
|
307
|
+
:return: The registered class unchanged for decorator chaining
|
|
308
|
+
:raises TypeError: If clazz is not a Pydantic BaseModel subclass
|
|
309
|
+
"""
|
|
310
|
+
if not issubclass(clazz, BaseModel):
|
|
311
|
+
raise TypeError(
|
|
312
|
+
f"Cannot register {clazz.__name__} as it is not a subclass of "
|
|
313
|
+
"Pydantic BaseModel"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
super().register_decorator(clazz, name=name)
|
|
317
|
+
cls.reload_schema()
|
|
318
|
+
|
|
319
|
+
return cast("RegisterClassT", clazz)
|
|
320
|
+
|
|
321
|
+
@classmethod
|
|
322
|
+
def __get_pydantic_core_schema__(
|
|
323
|
+
cls, source_type: Any, handler: GetCoreSchemaHandler
|
|
324
|
+
) -> CoreSchema:
|
|
325
|
+
"""
|
|
326
|
+
Generate polymorphic validation schema for dynamic type instantiation.
|
|
327
|
+
|
|
328
|
+
Creates a tagged union schema that enables Pydantic to automatically
|
|
329
|
+
instantiate the correct subclass based on the discriminator field value.
|
|
330
|
+
Falls back to base schema generation when no registry is available.
|
|
331
|
+
|
|
332
|
+
:param source_type: Type being processed for schema generation
|
|
333
|
+
:param handler: Pydantic core schema generation handler
|
|
334
|
+
:return: Tagged union schema for polymorphic validation or base schema
|
|
335
|
+
"""
|
|
336
|
+
if source_type == cls.__pydantic_schema_base_type__():
|
|
337
|
+
if not cls.registry:
|
|
338
|
+
return cls.__pydantic_generate_base_schema__(handler)
|
|
339
|
+
|
|
340
|
+
choices = {
|
|
341
|
+
name: handler(model_class) for name, model_class in cls.registry.items()
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return core_schema.tagged_union_schema(
|
|
345
|
+
choices=choices,
|
|
346
|
+
discriminator=cls.schema_discriminator,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
return handler(cls)
|
|
350
|
+
|
|
351
|
+
@classmethod
|
|
352
|
+
@abstractmethod
|
|
353
|
+
def __pydantic_schema_base_type__(cls) -> type[BaseModelT]:
|
|
354
|
+
"""
|
|
355
|
+
Define the base type for polymorphic validation hierarchy.
|
|
356
|
+
|
|
357
|
+
Must be implemented by subclasses to specify which type serves as the
|
|
358
|
+
root of the polymorphic hierarchy for schema generation and validation.
|
|
359
|
+
|
|
360
|
+
:return: Base class type for the polymorphic model hierarchy
|
|
361
|
+
"""
|
|
362
|
+
...
|
|
363
|
+
|
|
364
|
+
@classmethod
|
|
365
|
+
def __pydantic_generate_base_schema__(
|
|
366
|
+
cls, handler: GetCoreSchemaHandler
|
|
367
|
+
) -> CoreSchema:
|
|
368
|
+
"""
|
|
369
|
+
Generate fallback schema for polymorphic models without registry.
|
|
370
|
+
|
|
371
|
+
Provides a base schema that accepts any valid input when no registry
|
|
372
|
+
is available for polymorphic validation. Used as fallback during
|
|
373
|
+
schema generation when the registry has not been populated.
|
|
374
|
+
|
|
375
|
+
:param handler: Pydantic core schema generation handler
|
|
376
|
+
:return: Base CoreSchema that accepts any valid input
|
|
377
|
+
"""
|
|
378
|
+
return core_schema.any_schema()
|
|
379
|
+
|
|
380
|
+
@classmethod
|
|
381
|
+
def auto_populate_registry(cls) -> bool:
|
|
382
|
+
"""
|
|
383
|
+
Initialize registry with auto-discovery and reload validation schema.
|
|
384
|
+
|
|
385
|
+
Triggers automatic population of the class registry through the parent
|
|
386
|
+
RegistryMixin functionality and ensures the Pydantic validation schema
|
|
387
|
+
is updated to include all discovered types for polymorphic validation.
|
|
388
|
+
|
|
389
|
+
:return: True if registry was populated, False if already populated
|
|
390
|
+
:raises ValueError: If called when registry_auto_discovery is disabled
|
|
391
|
+
"""
|
|
392
|
+
populated = super().auto_populate_registry()
|
|
393
|
+
cls.reload_schema()
|
|
394
|
+
|
|
395
|
+
return populated
|
|
396
|
+
|
|
397
|
+
@classmethod
|
|
398
|
+
def registered_classes(cls) -> tuple[type[BaseModelT], ...]:
|
|
399
|
+
"""
|
|
400
|
+
Get all registered pydantic classes from the registry.
|
|
401
|
+
|
|
402
|
+
Automatically triggers auto-discovery if registry_auto_discovery is enabled
|
|
403
|
+
to ensure all available implementations are included.
|
|
404
|
+
|
|
405
|
+
:return: Tuple of all registered classes including auto-discovered ones
|
|
406
|
+
:raises ValueError: If called before any objects have been registered
|
|
407
|
+
"""
|
|
408
|
+
if cls.registry_auto_discovery:
|
|
409
|
+
cls.auto_populate_registry()
|
|
410
|
+
|
|
411
|
+
if cls.registry is None:
|
|
412
|
+
raise ValueError(
|
|
413
|
+
"ClassRegistryMixin.registered_classes() must be called after "
|
|
414
|
+
"registering classes with ClassRegistryMixin.register()."
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return tuple(cls.registry.values())
|
guidellm/schemas/info.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core data structures and interfaces for the GuideLLM scheduler system.
|
|
3
|
+
|
|
4
|
+
Provides type-safe abstractions for distributed request processing, timing
|
|
5
|
+
measurements, and backend interfaces for benchmarking operations. Central to
|
|
6
|
+
the scheduler architecture, enabling request lifecycle tracking, backend
|
|
7
|
+
coordination, and state management across distributed worker processes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import Field, computed_field
|
|
16
|
+
|
|
17
|
+
from guidellm.schemas.base import StandardBaseDict, StandardBaseModel
|
|
18
|
+
|
|
19
|
+
__all__ = ["RequestInfo", "RequestTimings"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RequestTimings(StandardBaseDict):
|
|
23
|
+
"""
|
|
24
|
+
Timing measurements for tracking request lifecycle events.
|
|
25
|
+
|
|
26
|
+
Provides comprehensive timing data for distributed request processing, capturing
|
|
27
|
+
key timestamps from initial targeting through final completion. Essential for
|
|
28
|
+
performance analysis, SLA monitoring, and debugging request processing bottlenecks
|
|
29
|
+
across scheduler workers and backend systems.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
targeted_start: float | None = Field(
|
|
33
|
+
default=None,
|
|
34
|
+
description="Unix timestamp when request was initially targeted for execution",
|
|
35
|
+
)
|
|
36
|
+
queued: float | None = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="Unix timestamp when request was placed into processing queue",
|
|
39
|
+
)
|
|
40
|
+
dequeued: float | None = Field(
|
|
41
|
+
default=None,
|
|
42
|
+
description="Unix timestamp when request was removed from queue for processing",
|
|
43
|
+
)
|
|
44
|
+
scheduled_at: float | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
description="Unix timestamp when the request was scheduled for processing",
|
|
47
|
+
)
|
|
48
|
+
resolve_start: float | None = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Unix timestamp when backend resolution of the request began",
|
|
51
|
+
)
|
|
52
|
+
request_start: float | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Unix timestamp when the backend began processing the request",
|
|
55
|
+
)
|
|
56
|
+
first_request_iteration: float | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
)
|
|
59
|
+
first_token_iteration: float | None = Field(
|
|
60
|
+
default=None,
|
|
61
|
+
)
|
|
62
|
+
last_token_iteration: float | None = Field(
|
|
63
|
+
default=None,
|
|
64
|
+
)
|
|
65
|
+
last_request_iteration: float | None = Field(
|
|
66
|
+
default=None,
|
|
67
|
+
)
|
|
68
|
+
request_iterations: int = Field(
|
|
69
|
+
default=0,
|
|
70
|
+
)
|
|
71
|
+
token_iterations: int = Field(
|
|
72
|
+
default=0,
|
|
73
|
+
)
|
|
74
|
+
request_end: float | None = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="Unix timestamp when the backend completed processing the request",
|
|
77
|
+
)
|
|
78
|
+
resolve_end: float | None = Field(
|
|
79
|
+
default=None,
|
|
80
|
+
description="Unix timestamp when backend resolution of the request completed",
|
|
81
|
+
)
|
|
82
|
+
finalized: float | None = Field(
|
|
83
|
+
default=None,
|
|
84
|
+
description="Unix timestamp when request was processed by the scheduler",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def last_reported(self) -> float | None:
|
|
89
|
+
"""
|
|
90
|
+
Get the most recent timing measurement available.
|
|
91
|
+
|
|
92
|
+
:return: The latest Unix timestamp from the timing fields, or None if none
|
|
93
|
+
"""
|
|
94
|
+
timing_fields = [
|
|
95
|
+
self.queued,
|
|
96
|
+
self.dequeued,
|
|
97
|
+
self.scheduled_at,
|
|
98
|
+
self.resolve_start,
|
|
99
|
+
self.request_start,
|
|
100
|
+
self.request_end,
|
|
101
|
+
self.resolve_end,
|
|
102
|
+
]
|
|
103
|
+
valid_timings = [field for field in timing_fields if field is not None]
|
|
104
|
+
return max(valid_timings) if valid_timings else None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class RequestInfo(StandardBaseModel):
|
|
108
|
+
"""
|
|
109
|
+
Complete information about a request in the scheduler system.
|
|
110
|
+
|
|
111
|
+
Encapsulates all metadata, status tracking, and timing information for requests
|
|
112
|
+
processed through the distributed scheduler. Provides comprehensive lifecycle
|
|
113
|
+
tracking from initial queuing through final completion, including error handling
|
|
114
|
+
and node identification for debugging and performance analysis.
|
|
115
|
+
|
|
116
|
+
Example:
|
|
117
|
+
::
|
|
118
|
+
request = RequestInfo()
|
|
119
|
+
request.status = "in_progress"
|
|
120
|
+
start_time = request.started_at
|
|
121
|
+
completion_time = request.completed_at
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
request_id: str = Field(
|
|
125
|
+
description="Unique identifier for the request",
|
|
126
|
+
default_factory=lambda: str(uuid.uuid4()),
|
|
127
|
+
)
|
|
128
|
+
status: Literal[
|
|
129
|
+
"queued", "pending", "in_progress", "completed", "errored", "cancelled"
|
|
130
|
+
] = Field(description="Current processing status of the request", default="queued")
|
|
131
|
+
scheduler_node_id: int = Field(
|
|
132
|
+
description="ID/rank of the scheduler node handling the request",
|
|
133
|
+
default=-1,
|
|
134
|
+
)
|
|
135
|
+
scheduler_process_id: int = Field(
|
|
136
|
+
description="ID/rank of the node's scheduler process handling the request",
|
|
137
|
+
default=-1,
|
|
138
|
+
)
|
|
139
|
+
scheduler_start_time: float = Field(
|
|
140
|
+
description="Unix timestamp when scheduler processing began",
|
|
141
|
+
default=-1,
|
|
142
|
+
)
|
|
143
|
+
timings: RequestTimings = Field(
|
|
144
|
+
default_factory=RequestTimings,
|
|
145
|
+
description="Timing measurements for the request lifecycle",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
error: str | None = Field(
|
|
149
|
+
default=None, description="Error message if the request status is 'errored'"
|
|
150
|
+
)
|
|
151
|
+
traceback: str | None = Field(
|
|
152
|
+
default=None,
|
|
153
|
+
description="Full traceback of the error if the request status is 'errored'",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
@computed_field # type: ignore[misc]
|
|
157
|
+
@property
|
|
158
|
+
def started_at(self) -> float | None:
|
|
159
|
+
"""
|
|
160
|
+
Get the effective request processing start time.
|
|
161
|
+
|
|
162
|
+
:return: Unix timestamp when processing began, or None if not started
|
|
163
|
+
"""
|
|
164
|
+
return self.timings.request_start or self.timings.resolve_start
|
|
165
|
+
|
|
166
|
+
@computed_field # type: ignore[misc]
|
|
167
|
+
@property
|
|
168
|
+
def completed_at(self) -> float | None:
|
|
169
|
+
"""
|
|
170
|
+
Get the effective request processing completion time.
|
|
171
|
+
|
|
172
|
+
:return: Unix timestamp when processing completed, or None if not completed
|
|
173
|
+
"""
|
|
174
|
+
return self.timings.request_end or self.timings.resolve_end
|
|
175
|
+
|
|
176
|
+
def model_copy(self, **_kwargs) -> RequestInfo: # type: ignore[override] # noqa: ARG002
|
|
177
|
+
"""
|
|
178
|
+
Create a deep copy of the request info with copied timing objects.
|
|
179
|
+
|
|
180
|
+
:param kwargs: Additional keyword arguments for model copying
|
|
181
|
+
:return: New RequestInfo instance with independent timing objects
|
|
182
|
+
"""
|
|
183
|
+
return super().model_copy(
|
|
184
|
+
update={
|
|
185
|
+
"timings": self.timings.model_copy(),
|
|
186
|
+
},
|
|
187
|
+
deep=False,
|
|
188
|
+
)
|