guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,31 @@
1
+ """
2
+ Pydantic schema models for GuideLLM operations.
3
+
4
+ Provides standardized data models and type definitions for generation requests,
5
+ responses, timing measurements, and statistics aggregation. These schemas ensure
6
+ type safety and consistent data handling across the benchmarking pipeline,
7
+ from request submission through backend processing to results compilation.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .info import RequestInfo, RequestTimings
13
+ from .request import (
14
+ GenerationRequest,
15
+ GenerationRequestArguments,
16
+ GenerativeRequestType,
17
+ UsageMetrics,
18
+ )
19
+ from .response import GenerationResponse
20
+ from .stats import GenerativeRequestStats
21
+
22
+ __all__ = [
23
+ "GenerationRequest",
24
+ "GenerationRequestArguments",
25
+ "GenerationResponse",
26
+ "GenerativeRequestStats",
27
+ "GenerativeRequestType",
28
+ "RequestInfo",
29
+ "RequestTimings",
30
+ "UsageMetrics",
31
+ ]
@@ -0,0 +1,159 @@
1
+ """
2
+ Core data structures and interfaces for the GuideLLM scheduler system.
3
+
4
+ Provides type-safe abstractions for distributed request processing, timing
5
+ measurements, and backend interfaces for benchmarking operations. Central to
6
+ the scheduler architecture, enabling request lifecycle tracking, backend
7
+ coordination, and state management across distributed worker processes.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import uuid
13
+ from typing import Literal
14
+
15
+ from pydantic import Field, computed_field
16
+
17
+ from guidellm.utils import StandardBaseDict, StandardBaseModel
18
+
19
+ __all__ = ["RequestInfo", "RequestTimings"]
20
+
21
+
22
+ class RequestTimings(StandardBaseDict):
23
+ """
24
+ Timing measurements for tracking request lifecycle events.
25
+
26
+ Provides comprehensive timing data for distributed request processing, capturing
27
+ key timestamps from initial targeting through final completion. Essential for
28
+ performance analysis, SLA monitoring, and debugging request processing bottlenecks
29
+ across scheduler workers and backend systems.
30
+ """
31
+
32
+ targeted_start: float | None = Field(
33
+ default=None,
34
+ description="Unix timestamp when request was initially targeted for execution",
35
+ )
36
+ queued: float | None = Field(
37
+ default=None,
38
+ description="Unix timestamp when request was placed into processing queue",
39
+ )
40
+ dequeued: float | None = Field(
41
+ default=None,
42
+ description="Unix timestamp when request was removed from queue for processing",
43
+ )
44
+ scheduled_at: float | None = Field(
45
+ default=None,
46
+ description="Unix timestamp when the request was scheduled for processing",
47
+ )
48
+ resolve_start: float | None = Field(
49
+ default=None,
50
+ description="Unix timestamp when backend resolution of the request began",
51
+ )
52
+ request_start: float | None = Field(
53
+ default=None,
54
+ description="Unix timestamp when the backend began processing the request",
55
+ )
56
+ first_iteration: float | None = Field(
57
+ default=None,
58
+ description="Unix timestamp when the first iteration for a streaming began",
59
+ )
60
+ last_iteration: float | None = Field(
61
+ default=None,
62
+ description="Unix timestamp when the last iteration for a streaming completed",
63
+ )
64
+ iterations: int | None = Field(
65
+ default=None,
66
+ description="Total number of streaming update iterations performed",
67
+ )
68
+ request_end: float | None = Field(
69
+ default=None,
70
+ description="Unix timestamp when the backend completed processing the request",
71
+ )
72
+ resolve_end: float | None = Field(
73
+ default=None,
74
+ description="Unix timestamp when backend resolution of the request completed",
75
+ )
76
+ finalized: float | None = Field(
77
+ default=None,
78
+ description="Unix timestamp when request was processed by the scheduler",
79
+ )
80
+
81
+
82
+ class RequestInfo(StandardBaseModel):
83
+ """
84
+ Complete information about a request in the scheduler system.
85
+
86
+ Encapsulates all metadata, status tracking, and timing information for requests
87
+ processed through the distributed scheduler. Provides comprehensive lifecycle
88
+ tracking from initial queuing through final completion, including error handling
89
+ and node identification for debugging and performance analysis.
90
+
91
+ Example:
92
+ ::
93
+ request = RequestInfo()
94
+ request.status = "in_progress"
95
+ start_time = request.started_at
96
+ completion_time = request.completed_at
97
+ """
98
+
99
+ request_id: str = Field(
100
+ description="Unique identifier for the request",
101
+ default_factory=lambda: str(uuid.uuid4()),
102
+ )
103
+ status: Literal[
104
+ "queued", "pending", "in_progress", "completed", "errored", "cancelled"
105
+ ] = Field(description="Current processing status of the request", default="queued")
106
+ scheduler_node_id: int = Field(
107
+ description="ID/rank of the scheduler node handling the request",
108
+ default=-1,
109
+ )
110
+ scheduler_process_id: int = Field(
111
+ description="ID/rank of the node's scheduler process handling the request",
112
+ default=-1,
113
+ )
114
+ scheduler_start_time: float = Field(
115
+ description="Unix timestamp when scheduler processing began",
116
+ default=-1,
117
+ )
118
+ timings: RequestTimings = Field(
119
+ default_factory=RequestTimings,
120
+ description="Timing measurements for the request lifecycle",
121
+ )
122
+
123
+ error: str | None = Field(
124
+ default=None, description="Error message if the request status is 'errored'"
125
+ )
126
+
127
+ @computed_field # type: ignore[misc]
128
+ @property
129
+ def started_at(self) -> float | None:
130
+ """
131
+ Get the effective request processing start time.
132
+
133
+ :return: Unix timestamp when processing began, or None if not started
134
+ """
135
+ return self.timings.request_start or self.timings.resolve_start
136
+
137
+ @computed_field # type: ignore[misc]
138
+ @property
139
+ def completed_at(self) -> float | None:
140
+ """
141
+ Get the effective request processing completion time.
142
+
143
+ :return: Unix timestamp when processing completed, or None if not completed
144
+ """
145
+ return self.timings.request_end or self.timings.resolve_end
146
+
147
+ def model_copy(self, **_kwargs) -> RequestInfo: # type: ignore[override] # noqa: ARG002
148
+ """
149
+ Create a deep copy of the request info with copied timing objects.
150
+
151
+ :param kwargs: Additional keyword arguments for model copying
152
+ :return: New RequestInfo instance with independent timing objects
153
+ """
154
+ return super().model_copy(
155
+ update={
156
+ "timings": self.timings.model_copy(),
157
+ },
158
+ deep=False,
159
+ )
@@ -0,0 +1,216 @@
1
+ """
2
+ Request schema definitions for generation operations.
3
+
4
+ Contains request models and data structures used to define and execute generation
5
+ requests across different backend services. Provides standardized interfaces for
6
+ request arguments, usage metrics tracking, and request type definitions that enable
7
+ consistent interaction with various AI generation APIs.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import uuid
13
+ from typing import Any, Literal
14
+
15
+ from pydantic import Field, computed_field
16
+
17
+ from guidellm.utils import StandardBaseDict, StandardBaseModel
18
+
19
+ __all__ = [
20
+ "GenerationRequest",
21
+ "GenerationRequestArguments",
22
+ "GenerativeRequestType",
23
+ "UsageMetrics",
24
+ ]
25
+
26
+
27
+ GenerativeRequestType = Literal[
28
+ "text_completions",
29
+ "chat_completions",
30
+ "audio_transcriptions",
31
+ "audio_translations",
32
+ ]
33
+
34
+
35
+ class GenerationRequestArguments(StandardBaseDict):
36
+ """
37
+ HTTP request arguments for generation operations.
38
+
39
+ Encapsulates all necessary HTTP request components including method, headers,
40
+ parameters, and payload data required to execute generation requests against
41
+ backend services. Supports file uploads and streaming responses.
42
+ """
43
+
44
+ method: str | None = Field(
45
+ default=None,
46
+ description="The HTTP method to use for the request (e.g., 'POST', 'GET').",
47
+ )
48
+ stream: bool | None = Field(
49
+ default=None,
50
+ description="Whether to stream the response, if applicable.",
51
+ )
52
+ headers: dict[str, str] | None = Field(
53
+ default=None,
54
+ description="Any headers to include in the request, if applicable.",
55
+ )
56
+ params: dict[str, Any] | None = Field(
57
+ default=None,
58
+ description="Query parameters to include in the request, if applicable.",
59
+ )
60
+ body: dict[str, Any] | None = Field(
61
+ default=None,
62
+ description="Content to include in the main request body.",
63
+ )
64
+ files: dict[str, Any] | None = Field(
65
+ default=None,
66
+ description="Files to include in the request, if applicable.",
67
+ )
68
+
69
+ def model_combine(
70
+ self, additional: GenerationRequestArguments | dict[str, Any]
71
+ ) -> GenerationRequestArguments:
72
+ """
73
+ Merge additional request arguments into the current instance.
74
+
75
+ Combines method and stream fields by overwriting, while merging collection
76
+ fields like headers, params, json_body, and files by extending existing values.
77
+
78
+ :param additional: Additional arguments to merge with current instance
79
+ :return: Updated instance with merged arguments
80
+ """
81
+ additional_dict = (
82
+ additional.model_dump()
83
+ if isinstance(additional, GenerationRequestArguments)
84
+ else additional
85
+ )
86
+
87
+ for overwrite in ("method", "stream"):
88
+ if (val := additional_dict.get(overwrite)) is not None:
89
+ setattr(self, overwrite, val)
90
+
91
+ for combine in ("headers", "params", "json_body", "files"):
92
+ if (val := additional_dict.get(combine)) is not None:
93
+ setattr(self, combine, {**getattr(self, combine, {}), **val})
94
+
95
+ return self
96
+
97
+
98
+ class UsageMetrics(StandardBaseDict):
99
+ """
100
+ Multimodal usage metrics for generation requests.
101
+
102
+ Tracks resource consumption across different modalities including text, images,
103
+ video, and audio. Provides granular metrics for tokens, bytes, duration, and
104
+ format-specific measurements to enable comprehensive usage monitoring and billing.
105
+ """
106
+
107
+ # Text stats
108
+ text_tokens: int | None = Field(
109
+ default=None, description="Number of text tokens processed/generated."
110
+ )
111
+ text_words: int | None = Field(
112
+ default=None, description="Number of text words processed/generated."
113
+ )
114
+ text_characters: int | None = Field(
115
+ default=None, description="Number of text characters processed/generated."
116
+ )
117
+
118
+ # Vision image stats
119
+ image_tokens: int | None = Field(
120
+ default=None, description="Number of image tokens processed/generated."
121
+ )
122
+ image_count: int | None = Field(
123
+ default=None, description="Number of images processed/generated."
124
+ )
125
+ image_pixels: int | None = Field(
126
+ default=None, description="Number of image pixels processed/generated."
127
+ )
128
+ image_bytes: int | None = Field(
129
+ default=None, description="Number of image bytes processed/generated."
130
+ )
131
+
132
+ # Vision video stats
133
+ video_tokens: int | None = Field(
134
+ default=None, description="Number of video tokens processed/generated."
135
+ )
136
+ video_frames: int | None = Field(
137
+ default=None, description="Number of video frames processed/generated."
138
+ )
139
+ video_seconds: float | None = Field(
140
+ default=None, description="Duration of video processed/generated in seconds."
141
+ )
142
+ video_bytes: int | None = Field(
143
+ default=None, description="Number of video bytes processed/generated."
144
+ )
145
+
146
+ # Audio stats
147
+ audio_tokens: int | None = Field(
148
+ default=None, description="Number of audio tokens processed/generated."
149
+ )
150
+ audio_samples: int | None = Field(
151
+ default=None, description="Number of audio samples processed/generated."
152
+ )
153
+ audio_seconds: float | None = Field(
154
+ default=None, description="Duration of audio processed/generated in seconds."
155
+ )
156
+ audio_bytes: int | None = Field(
157
+ default=None, description="Number of audio bytes processed/generated."
158
+ )
159
+
160
+ @computed_field # type: ignore[misc]
161
+ @property
162
+ def total_tokens(self) -> int | None:
163
+ """
164
+ Calculate total tokens across all modalities.
165
+
166
+ :return: Sum of text, image, video, and audio tokens, or None if all are None
167
+ """
168
+ return (self.text_tokens or 0) + (self.image_tokens or 0) + (
169
+ self.video_tokens or 0
170
+ ) + (self.audio_tokens or 0) or None
171
+
172
+
173
+ class GenerationRequest(StandardBaseModel):
174
+ """
175
+ Complete request specification for backend generation operations.
176
+
177
+ Encapsulates all components needed to execute a generation request including
178
+ unique identification, request type specification, HTTP arguments, and input/output
179
+ usage metrics. Serves as the primary interface between the scheduler and backend
180
+ services for coordinating AI generation tasks.
181
+
182
+ Example::
183
+ request = GenerationRequest(
184
+ request_type="text_completions",
185
+ arguments=GenerationRequestArguments(
186
+ method="POST",
187
+ body={"prompt": "Hello world", "max_tokens": 100}
188
+ )
189
+ )
190
+ """
191
+
192
+ request_id: str = Field(
193
+ default_factory=lambda: str(uuid.uuid4()),
194
+ description="Unique identifier for the request.",
195
+ )
196
+ request_type: GenerativeRequestType | str = Field(
197
+ description=(
198
+ "Type of request. If url is not provided in arguments, "
199
+ "this will be used to determine the request url."
200
+ ),
201
+ )
202
+ arguments: GenerationRequestArguments = Field(
203
+ description=(
204
+ "Payload for the request, structured as a dictionary of arguments to pass "
205
+ "to the respective backend method. For example, can contain "
206
+ "'json', 'headers', 'files', etc."
207
+ )
208
+ )
209
+ input_metrics: UsageMetrics = Field(
210
+ default_factory=UsageMetrics,
211
+ description="Input statistics including counts, sizes, and durations.",
212
+ )
213
+ output_metrics: UsageMetrics = Field(
214
+ default_factory=UsageMetrics,
215
+ description="Output statistics including counts, sizes, and durations.",
216
+ )
@@ -0,0 +1,119 @@
1
+ """
2
+ Backend response models for request and response handling.
3
+
4
+ Provides standardized response models for generation operations that capture
5
+ output text, usage metrics, and compilation of request statistics. Ensures
6
+ consistent data handling and statistics aggregation across different backend
7
+ implementations.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pydantic import Field
13
+
14
+ from guidellm.schemas.info import RequestInfo
15
+ from guidellm.schemas.request import GenerationRequest, UsageMetrics
16
+ from guidellm.schemas.stats import GenerativeRequestStats
17
+ from guidellm.utils import StandardBaseModel
18
+
19
+ __all__ = ["GenerationResponse"]
20
+
21
+
22
+ class GenerationResponse(StandardBaseModel):
23
+ """
24
+ Response model for backend generation operations.
25
+
26
+ Captures the output and metrics from a generation request, providing structured
27
+ data for text output, token usage statistics, and compilation of detailed
28
+ request statistics for analysis and monitoring purposes.
29
+
30
+ Example:
31
+ ::
32
+ response = GenerationResponse(
33
+ request_id="req-123",
34
+ text="Generated response text",
35
+ input_metrics=UsageMetrics(token_count=50),
36
+ output_metrics=UsageMetrics(token_count=25)
37
+ )
38
+ stats = response.compile_stats(request, info)
39
+ """
40
+
41
+ request_id: str = Field(
42
+ description="Unique identifier matching the original GenerationRequest."
43
+ )
44
+ request_args: str | None = Field(
45
+ description="Arguments passed to the backend for request processing."
46
+ )
47
+ text: str | None = Field(
48
+ default=None,
49
+ description="The generated response text.",
50
+ )
51
+ input_metrics: UsageMetrics = Field(
52
+ default_factory=UsageMetrics,
53
+ description="Token usage statistics from the input prompt.",
54
+ )
55
+ output_metrics: UsageMetrics = Field(
56
+ default_factory=UsageMetrics,
57
+ description="Token usage statistics from the generated output.",
58
+ )
59
+
60
+ def compile_stats(
61
+ self,
62
+ request: GenerationRequest,
63
+ info: RequestInfo,
64
+ prefer_response: bool = True,
65
+ ) -> GenerativeRequestStats:
66
+ """
67
+ Compile and return comprehensive request statistics.
68
+
69
+ Merges metrics from the request and response objects to create a complete
70
+ statistical record, with preference given to response-level metrics when
71
+ available to ensure accuracy of actual execution data.
72
+
73
+ :param request: The original generation request containing input data
74
+ :param info: Metadata and timing information for the request execution
75
+ :param prefer_response: Whether to prefer response metrics over request
76
+ metrics when both are available
77
+ :return: A GenerativeRequestStats object containing detailed statistics
78
+ :raises ValueError: When request IDs don't match between objects
79
+ """
80
+ if request.request_id != self.request_id:
81
+ raise ValueError("Mismatched request IDs between request and response.")
82
+
83
+ if info.request_id != self.request_id:
84
+ raise ValueError("Mismatched request IDs between info and response.")
85
+
86
+ if info.status != "completed":
87
+ # clear out request output metrics if the request failed since
88
+ # those are not valid
89
+ request.output_metrics = UsageMetrics()
90
+
91
+ base_input = request.input_metrics if prefer_response else self.input_metrics
92
+ override_input = (
93
+ self.input_metrics if prefer_response else request.input_metrics
94
+ )
95
+ base_output = request.output_metrics if prefer_response else self.output_metrics
96
+ override_output = (
97
+ self.output_metrics if prefer_response else request.output_metrics
98
+ )
99
+
100
+ input_metrics_dict = base_input.model_dump()
101
+ for key, value in override_input.model_dump().items():
102
+ if value is not None:
103
+ input_metrics_dict[key] = value
104
+ output_metrics_dict = base_output.model_dump()
105
+ for key, value in override_output.model_dump().items():
106
+ if value is not None:
107
+ output_metrics_dict[key] = value
108
+
109
+ return GenerativeRequestStats(
110
+ request_id=self.request_id,
111
+ request_type=request.request_type,
112
+ request_args=str(
113
+ request.arguments.model_dump() if request.arguments else {}
114
+ ),
115
+ output=self.text,
116
+ info=info,
117
+ input_metrics=UsageMetrics(**input_metrics_dict),
118
+ output_metrics=UsageMetrics(**output_metrics_dict),
119
+ )