guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a18.dist-info/RECORD +0 -62
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic schema models for GuideLLM operations.
|
|
3
|
+
|
|
4
|
+
Provides standardized data models and type definitions for generation requests,
|
|
5
|
+
responses, timing measurements, and statistics aggregation. These schemas ensure
|
|
6
|
+
type safety and consistent data handling across the benchmarking pipeline,
|
|
7
|
+
from request submission through backend processing to results compilation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .info import RequestInfo, RequestTimings
|
|
13
|
+
from .request import (
|
|
14
|
+
GenerationRequest,
|
|
15
|
+
GenerationRequestArguments,
|
|
16
|
+
GenerativeRequestType,
|
|
17
|
+
UsageMetrics,
|
|
18
|
+
)
|
|
19
|
+
from .response import GenerationResponse
|
|
20
|
+
from .stats import GenerativeRequestStats
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"GenerationRequest",
|
|
24
|
+
"GenerationRequestArguments",
|
|
25
|
+
"GenerationResponse",
|
|
26
|
+
"GenerativeRequestStats",
|
|
27
|
+
"GenerativeRequestType",
|
|
28
|
+
"RequestInfo",
|
|
29
|
+
"RequestTimings",
|
|
30
|
+
"UsageMetrics",
|
|
31
|
+
]
|
guidellm/schemas/info.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core data structures and interfaces for the GuideLLM scheduler system.
|
|
3
|
+
|
|
4
|
+
Provides type-safe abstractions for distributed request processing, timing
|
|
5
|
+
measurements, and backend interfaces for benchmarking operations. Central to
|
|
6
|
+
the scheduler architecture, enabling request lifecycle tracking, backend
|
|
7
|
+
coordination, and state management across distributed worker processes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import Field, computed_field
|
|
16
|
+
|
|
17
|
+
from guidellm.utils import StandardBaseDict, StandardBaseModel
|
|
18
|
+
|
|
19
|
+
__all__ = ["RequestInfo", "RequestTimings"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RequestTimings(StandardBaseDict):
|
|
23
|
+
"""
|
|
24
|
+
Timing measurements for tracking request lifecycle events.
|
|
25
|
+
|
|
26
|
+
Provides comprehensive timing data for distributed request processing, capturing
|
|
27
|
+
key timestamps from initial targeting through final completion. Essential for
|
|
28
|
+
performance analysis, SLA monitoring, and debugging request processing bottlenecks
|
|
29
|
+
across scheduler workers and backend systems.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
targeted_start: float | None = Field(
|
|
33
|
+
default=None,
|
|
34
|
+
description="Unix timestamp when request was initially targeted for execution",
|
|
35
|
+
)
|
|
36
|
+
queued: float | None = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="Unix timestamp when request was placed into processing queue",
|
|
39
|
+
)
|
|
40
|
+
dequeued: float | None = Field(
|
|
41
|
+
default=None,
|
|
42
|
+
description="Unix timestamp when request was removed from queue for processing",
|
|
43
|
+
)
|
|
44
|
+
scheduled_at: float | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
description="Unix timestamp when the request was scheduled for processing",
|
|
47
|
+
)
|
|
48
|
+
resolve_start: float | None = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Unix timestamp when backend resolution of the request began",
|
|
51
|
+
)
|
|
52
|
+
request_start: float | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Unix timestamp when the backend began processing the request",
|
|
55
|
+
)
|
|
56
|
+
first_iteration: float | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="Unix timestamp when the first iteration for a streaming began",
|
|
59
|
+
)
|
|
60
|
+
last_iteration: float | None = Field(
|
|
61
|
+
default=None,
|
|
62
|
+
description="Unix timestamp when the last iteration for a streaming completed",
|
|
63
|
+
)
|
|
64
|
+
iterations: int | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
description="Total number of streaming update iterations performed",
|
|
67
|
+
)
|
|
68
|
+
request_end: float | None = Field(
|
|
69
|
+
default=None,
|
|
70
|
+
description="Unix timestamp when the backend completed processing the request",
|
|
71
|
+
)
|
|
72
|
+
resolve_end: float | None = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description="Unix timestamp when backend resolution of the request completed",
|
|
75
|
+
)
|
|
76
|
+
finalized: float | None = Field(
|
|
77
|
+
default=None,
|
|
78
|
+
description="Unix timestamp when request was processed by the scheduler",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class RequestInfo(StandardBaseModel):
|
|
83
|
+
"""
|
|
84
|
+
Complete information about a request in the scheduler system.
|
|
85
|
+
|
|
86
|
+
Encapsulates all metadata, status tracking, and timing information for requests
|
|
87
|
+
processed through the distributed scheduler. Provides comprehensive lifecycle
|
|
88
|
+
tracking from initial queuing through final completion, including error handling
|
|
89
|
+
and node identification for debugging and performance analysis.
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
::
|
|
93
|
+
request = RequestInfo()
|
|
94
|
+
request.status = "in_progress"
|
|
95
|
+
start_time = request.started_at
|
|
96
|
+
completion_time = request.completed_at
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
request_id: str = Field(
|
|
100
|
+
description="Unique identifier for the request",
|
|
101
|
+
default_factory=lambda: str(uuid.uuid4()),
|
|
102
|
+
)
|
|
103
|
+
status: Literal[
|
|
104
|
+
"queued", "pending", "in_progress", "completed", "errored", "cancelled"
|
|
105
|
+
] = Field(description="Current processing status of the request", default="queued")
|
|
106
|
+
scheduler_node_id: int = Field(
|
|
107
|
+
description="ID/rank of the scheduler node handling the request",
|
|
108
|
+
default=-1,
|
|
109
|
+
)
|
|
110
|
+
scheduler_process_id: int = Field(
|
|
111
|
+
description="ID/rank of the node's scheduler process handling the request",
|
|
112
|
+
default=-1,
|
|
113
|
+
)
|
|
114
|
+
scheduler_start_time: float = Field(
|
|
115
|
+
description="Unix timestamp when scheduler processing began",
|
|
116
|
+
default=-1,
|
|
117
|
+
)
|
|
118
|
+
timings: RequestTimings = Field(
|
|
119
|
+
default_factory=RequestTimings,
|
|
120
|
+
description="Timing measurements for the request lifecycle",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
error: str | None = Field(
|
|
124
|
+
default=None, description="Error message if the request status is 'errored'"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
@computed_field # type: ignore[misc]
|
|
128
|
+
@property
|
|
129
|
+
def started_at(self) -> float | None:
|
|
130
|
+
"""
|
|
131
|
+
Get the effective request processing start time.
|
|
132
|
+
|
|
133
|
+
:return: Unix timestamp when processing began, or None if not started
|
|
134
|
+
"""
|
|
135
|
+
return self.timings.request_start or self.timings.resolve_start
|
|
136
|
+
|
|
137
|
+
@computed_field # type: ignore[misc]
|
|
138
|
+
@property
|
|
139
|
+
def completed_at(self) -> float | None:
|
|
140
|
+
"""
|
|
141
|
+
Get the effective request processing completion time.
|
|
142
|
+
|
|
143
|
+
:return: Unix timestamp when processing completed, or None if not completed
|
|
144
|
+
"""
|
|
145
|
+
return self.timings.request_end or self.timings.resolve_end
|
|
146
|
+
|
|
147
|
+
def model_copy(self, **_kwargs) -> RequestInfo: # type: ignore[override] # noqa: ARG002
|
|
148
|
+
"""
|
|
149
|
+
Create a deep copy of the request info with copied timing objects.
|
|
150
|
+
|
|
151
|
+
:param kwargs: Additional keyword arguments for model copying
|
|
152
|
+
:return: New RequestInfo instance with independent timing objects
|
|
153
|
+
"""
|
|
154
|
+
return super().model_copy(
|
|
155
|
+
update={
|
|
156
|
+
"timings": self.timings.model_copy(),
|
|
157
|
+
},
|
|
158
|
+
deep=False,
|
|
159
|
+
)
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Request schema definitions for generation operations.
|
|
3
|
+
|
|
4
|
+
Contains request models and data structures used to define and execute generation
|
|
5
|
+
requests across different backend services. Provides standardized interfaces for
|
|
6
|
+
request arguments, usage metrics tracking, and request type definitions that enable
|
|
7
|
+
consistent interaction with various AI generation APIs.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Any, Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import Field, computed_field
|
|
16
|
+
|
|
17
|
+
from guidellm.utils import StandardBaseDict, StandardBaseModel
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"GenerationRequest",
|
|
21
|
+
"GenerationRequestArguments",
|
|
22
|
+
"GenerativeRequestType",
|
|
23
|
+
"UsageMetrics",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
GenerativeRequestType = Literal[
|
|
28
|
+
"text_completions",
|
|
29
|
+
"chat_completions",
|
|
30
|
+
"audio_transcriptions",
|
|
31
|
+
"audio_translations",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class GenerationRequestArguments(StandardBaseDict):
|
|
36
|
+
"""
|
|
37
|
+
HTTP request arguments for generation operations.
|
|
38
|
+
|
|
39
|
+
Encapsulates all necessary HTTP request components including method, headers,
|
|
40
|
+
parameters, and payload data required to execute generation requests against
|
|
41
|
+
backend services. Supports file uploads and streaming responses.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
method: str | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
description="The HTTP method to use for the request (e.g., 'POST', 'GET').",
|
|
47
|
+
)
|
|
48
|
+
stream: bool | None = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Whether to stream the response, if applicable.",
|
|
51
|
+
)
|
|
52
|
+
headers: dict[str, str] | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Any headers to include in the request, if applicable.",
|
|
55
|
+
)
|
|
56
|
+
params: dict[str, Any] | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="Query parameters to include in the request, if applicable.",
|
|
59
|
+
)
|
|
60
|
+
body: dict[str, Any] | None = Field(
|
|
61
|
+
default=None,
|
|
62
|
+
description="Content to include in the main request body.",
|
|
63
|
+
)
|
|
64
|
+
files: dict[str, Any] | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
description="Files to include in the request, if applicable.",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def model_combine(
|
|
70
|
+
self, additional: GenerationRequestArguments | dict[str, Any]
|
|
71
|
+
) -> GenerationRequestArguments:
|
|
72
|
+
"""
|
|
73
|
+
Merge additional request arguments into the current instance.
|
|
74
|
+
|
|
75
|
+
Combines method and stream fields by overwriting, while merging collection
|
|
76
|
+
fields like headers, params, json_body, and files by extending existing values.
|
|
77
|
+
|
|
78
|
+
:param additional: Additional arguments to merge with current instance
|
|
79
|
+
:return: Updated instance with merged arguments
|
|
80
|
+
"""
|
|
81
|
+
additional_dict = (
|
|
82
|
+
additional.model_dump()
|
|
83
|
+
if isinstance(additional, GenerationRequestArguments)
|
|
84
|
+
else additional
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
for overwrite in ("method", "stream"):
|
|
88
|
+
if (val := additional_dict.get(overwrite)) is not None:
|
|
89
|
+
setattr(self, overwrite, val)
|
|
90
|
+
|
|
91
|
+
for combine in ("headers", "params", "json_body", "files"):
|
|
92
|
+
if (val := additional_dict.get(combine)) is not None:
|
|
93
|
+
setattr(self, combine, {**getattr(self, combine, {}), **val})
|
|
94
|
+
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class UsageMetrics(StandardBaseDict):
|
|
99
|
+
"""
|
|
100
|
+
Multimodal usage metrics for generation requests.
|
|
101
|
+
|
|
102
|
+
Tracks resource consumption across different modalities including text, images,
|
|
103
|
+
video, and audio. Provides granular metrics for tokens, bytes, duration, and
|
|
104
|
+
format-specific measurements to enable comprehensive usage monitoring and billing.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
# Text stats
|
|
108
|
+
text_tokens: int | None = Field(
|
|
109
|
+
default=None, description="Number of text tokens processed/generated."
|
|
110
|
+
)
|
|
111
|
+
text_words: int | None = Field(
|
|
112
|
+
default=None, description="Number of text words processed/generated."
|
|
113
|
+
)
|
|
114
|
+
text_characters: int | None = Field(
|
|
115
|
+
default=None, description="Number of text characters processed/generated."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Vision image stats
|
|
119
|
+
image_tokens: int | None = Field(
|
|
120
|
+
default=None, description="Number of image tokens processed/generated."
|
|
121
|
+
)
|
|
122
|
+
image_count: int | None = Field(
|
|
123
|
+
default=None, description="Number of images processed/generated."
|
|
124
|
+
)
|
|
125
|
+
image_pixels: int | None = Field(
|
|
126
|
+
default=None, description="Number of image pixels processed/generated."
|
|
127
|
+
)
|
|
128
|
+
image_bytes: int | None = Field(
|
|
129
|
+
default=None, description="Number of image bytes processed/generated."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Vision video stats
|
|
133
|
+
video_tokens: int | None = Field(
|
|
134
|
+
default=None, description="Number of video tokens processed/generated."
|
|
135
|
+
)
|
|
136
|
+
video_frames: int | None = Field(
|
|
137
|
+
default=None, description="Number of video frames processed/generated."
|
|
138
|
+
)
|
|
139
|
+
video_seconds: float | None = Field(
|
|
140
|
+
default=None, description="Duration of video processed/generated in seconds."
|
|
141
|
+
)
|
|
142
|
+
video_bytes: int | None = Field(
|
|
143
|
+
default=None, description="Number of video bytes processed/generated."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Audio stats
|
|
147
|
+
audio_tokens: int | None = Field(
|
|
148
|
+
default=None, description="Number of audio tokens processed/generated."
|
|
149
|
+
)
|
|
150
|
+
audio_samples: int | None = Field(
|
|
151
|
+
default=None, description="Number of audio samples processed/generated."
|
|
152
|
+
)
|
|
153
|
+
audio_seconds: float | None = Field(
|
|
154
|
+
default=None, description="Duration of audio processed/generated in seconds."
|
|
155
|
+
)
|
|
156
|
+
audio_bytes: int | None = Field(
|
|
157
|
+
default=None, description="Number of audio bytes processed/generated."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@computed_field # type: ignore[misc]
|
|
161
|
+
@property
|
|
162
|
+
def total_tokens(self) -> int | None:
|
|
163
|
+
"""
|
|
164
|
+
Calculate total tokens across all modalities.
|
|
165
|
+
|
|
166
|
+
:return: Sum of text, image, video, and audio tokens, or None if all are None
|
|
167
|
+
"""
|
|
168
|
+
return (self.text_tokens or 0) + (self.image_tokens or 0) + (
|
|
169
|
+
self.video_tokens or 0
|
|
170
|
+
) + (self.audio_tokens or 0) or None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class GenerationRequest(StandardBaseModel):
|
|
174
|
+
"""
|
|
175
|
+
Complete request specification for backend generation operations.
|
|
176
|
+
|
|
177
|
+
Encapsulates all components needed to execute a generation request including
|
|
178
|
+
unique identification, request type specification, HTTP arguments, and input/output
|
|
179
|
+
usage metrics. Serves as the primary interface between the scheduler and backend
|
|
180
|
+
services for coordinating AI generation tasks.
|
|
181
|
+
|
|
182
|
+
Example::
|
|
183
|
+
request = GenerationRequest(
|
|
184
|
+
request_type="text_completions",
|
|
185
|
+
arguments=GenerationRequestArguments(
|
|
186
|
+
method="POST",
|
|
187
|
+
body={"prompt": "Hello world", "max_tokens": 100}
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
request_id: str = Field(
|
|
193
|
+
default_factory=lambda: str(uuid.uuid4()),
|
|
194
|
+
description="Unique identifier for the request.",
|
|
195
|
+
)
|
|
196
|
+
request_type: GenerativeRequestType | str = Field(
|
|
197
|
+
description=(
|
|
198
|
+
"Type of request. If url is not provided in arguments, "
|
|
199
|
+
"this will be used to determine the request url."
|
|
200
|
+
),
|
|
201
|
+
)
|
|
202
|
+
arguments: GenerationRequestArguments = Field(
|
|
203
|
+
description=(
|
|
204
|
+
"Payload for the request, structured as a dictionary of arguments to pass "
|
|
205
|
+
"to the respective backend method. For example, can contain "
|
|
206
|
+
"'json', 'headers', 'files', etc."
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
input_metrics: UsageMetrics = Field(
|
|
210
|
+
default_factory=UsageMetrics,
|
|
211
|
+
description="Input statistics including counts, sizes, and durations.",
|
|
212
|
+
)
|
|
213
|
+
output_metrics: UsageMetrics = Field(
|
|
214
|
+
default_factory=UsageMetrics,
|
|
215
|
+
description="Output statistics including counts, sizes, and durations.",
|
|
216
|
+
)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backend response models for request and response handling.
|
|
3
|
+
|
|
4
|
+
Provides standardized response models for generation operations that capture
|
|
5
|
+
output text, usage metrics, and compilation of request statistics. Ensures
|
|
6
|
+
consistent data handling and statistics aggregation across different backend
|
|
7
|
+
implementations.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
|
|
14
|
+
from guidellm.schemas.info import RequestInfo
|
|
15
|
+
from guidellm.schemas.request import GenerationRequest, UsageMetrics
|
|
16
|
+
from guidellm.schemas.stats import GenerativeRequestStats
|
|
17
|
+
from guidellm.utils import StandardBaseModel
|
|
18
|
+
|
|
19
|
+
__all__ = ["GenerationResponse"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GenerationResponse(StandardBaseModel):
|
|
23
|
+
"""
|
|
24
|
+
Response model for backend generation operations.
|
|
25
|
+
|
|
26
|
+
Captures the output and metrics from a generation request, providing structured
|
|
27
|
+
data for text output, token usage statistics, and compilation of detailed
|
|
28
|
+
request statistics for analysis and monitoring purposes.
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
::
|
|
32
|
+
response = GenerationResponse(
|
|
33
|
+
request_id="req-123",
|
|
34
|
+
text="Generated response text",
|
|
35
|
+
input_metrics=UsageMetrics(token_count=50),
|
|
36
|
+
output_metrics=UsageMetrics(token_count=25)
|
|
37
|
+
)
|
|
38
|
+
stats = response.compile_stats(request, info)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
request_id: str = Field(
|
|
42
|
+
description="Unique identifier matching the original GenerationRequest."
|
|
43
|
+
)
|
|
44
|
+
request_args: str | None = Field(
|
|
45
|
+
description="Arguments passed to the backend for request processing."
|
|
46
|
+
)
|
|
47
|
+
text: str | None = Field(
|
|
48
|
+
default=None,
|
|
49
|
+
description="The generated response text.",
|
|
50
|
+
)
|
|
51
|
+
input_metrics: UsageMetrics = Field(
|
|
52
|
+
default_factory=UsageMetrics,
|
|
53
|
+
description="Token usage statistics from the input prompt.",
|
|
54
|
+
)
|
|
55
|
+
output_metrics: UsageMetrics = Field(
|
|
56
|
+
default_factory=UsageMetrics,
|
|
57
|
+
description="Token usage statistics from the generated output.",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def compile_stats(
|
|
61
|
+
self,
|
|
62
|
+
request: GenerationRequest,
|
|
63
|
+
info: RequestInfo,
|
|
64
|
+
prefer_response: bool = True,
|
|
65
|
+
) -> GenerativeRequestStats:
|
|
66
|
+
"""
|
|
67
|
+
Compile and return comprehensive request statistics.
|
|
68
|
+
|
|
69
|
+
Merges metrics from the request and response objects to create a complete
|
|
70
|
+
statistical record, with preference given to response-level metrics when
|
|
71
|
+
available to ensure accuracy of actual execution data.
|
|
72
|
+
|
|
73
|
+
:param request: The original generation request containing input data
|
|
74
|
+
:param info: Metadata and timing information for the request execution
|
|
75
|
+
:param prefer_response: Whether to prefer response metrics over request
|
|
76
|
+
metrics when both are available
|
|
77
|
+
:return: A GenerativeRequestStats object containing detailed statistics
|
|
78
|
+
:raises ValueError: When request IDs don't match between objects
|
|
79
|
+
"""
|
|
80
|
+
if request.request_id != self.request_id:
|
|
81
|
+
raise ValueError("Mismatched request IDs between request and response.")
|
|
82
|
+
|
|
83
|
+
if info.request_id != self.request_id:
|
|
84
|
+
raise ValueError("Mismatched request IDs between info and response.")
|
|
85
|
+
|
|
86
|
+
if info.status != "completed":
|
|
87
|
+
# clear out request output metrics if the request failed since
|
|
88
|
+
# those are not valid
|
|
89
|
+
request.output_metrics = UsageMetrics()
|
|
90
|
+
|
|
91
|
+
base_input = request.input_metrics if prefer_response else self.input_metrics
|
|
92
|
+
override_input = (
|
|
93
|
+
self.input_metrics if prefer_response else request.input_metrics
|
|
94
|
+
)
|
|
95
|
+
base_output = request.output_metrics if prefer_response else self.output_metrics
|
|
96
|
+
override_output = (
|
|
97
|
+
self.output_metrics if prefer_response else request.output_metrics
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
input_metrics_dict = base_input.model_dump()
|
|
101
|
+
for key, value in override_input.model_dump().items():
|
|
102
|
+
if value is not None:
|
|
103
|
+
input_metrics_dict[key] = value
|
|
104
|
+
output_metrics_dict = base_output.model_dump()
|
|
105
|
+
for key, value in override_output.model_dump().items():
|
|
106
|
+
if value is not None:
|
|
107
|
+
output_metrics_dict[key] = value
|
|
108
|
+
|
|
109
|
+
return GenerativeRequestStats(
|
|
110
|
+
request_id=self.request_id,
|
|
111
|
+
request_type=request.request_type,
|
|
112
|
+
request_args=str(
|
|
113
|
+
request.arguments.model_dump() if request.arguments else {}
|
|
114
|
+
),
|
|
115
|
+
output=self.text,
|
|
116
|
+
info=info,
|
|
117
|
+
input_metrics=UsageMetrics(**input_metrics_dict),
|
|
118
|
+
output_metrics=UsageMetrics(**output_metrics_dict),
|
|
119
|
+
)
|