openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +88 -0
- openlit/__init__.py +4 -3
- openlit/instrumentation/ag2/ag2.py +5 -5
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +9 -9
- openlit/instrumentation/astra/async_astra.py +9 -9
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +9 -9
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
- openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/async_julep.py +5 -5
- openlit/instrumentation/julep/julep.py +5 -5
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +105 -492
- openlit/instrumentation/letta/letta.py +11 -9
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +318 -247
- openlit/instrumentation/litellm/litellm.py +314 -243
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/milvus.py +9 -9
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +423 -250
- openlit/instrumentation/mistral/mistral.py +420 -246
- openlit/instrumentation/multion/async_multion.py +6 -4
- openlit/instrumentation/multion/multion.py +6 -4
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +433 -410
- openlit/instrumentation/openai/openai.py +414 -394
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/pinecone.py +9 -9
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +9 -9
- openlit/instrumentation/qdrant/qdrant.py +9 -9
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +76 -45
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +330 -987
- openlit/instrumentation/vertexai/vertexai.py +330 -987
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +66 -36
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
|
2
1
|
"""
|
3
2
|
Module for monitoring OpenAI API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
get_embed_model_cost,
|
@@ -14,19 +14,22 @@ from openlit.__helpers import (
|
|
14
14
|
openai_tokens,
|
15
15
|
handle_exception,
|
16
16
|
response_as_dict,
|
17
|
+
calculate_ttft,
|
18
|
+
calculate_tbt,
|
19
|
+
create_metrics_attributes,
|
20
|
+
set_server_address_and_port
|
17
21
|
)
|
18
22
|
from openlit.semcov import SemanticConvetion
|
19
23
|
|
20
24
|
# Initialize logger for logging potential issues and operations
|
21
25
|
logger = logging.getLogger(__name__)
|
22
26
|
|
23
|
-
def chat_completions(
|
27
|
+
def chat_completions(version, environment, application_name,
|
24
28
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
25
29
|
"""
|
26
30
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
27
31
|
|
28
32
|
Args:
|
29
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
30
33
|
version: Version of the monitoring package.
|
31
34
|
environment: Deployment environment (e.g., production, staging).
|
32
35
|
application_name: Name of the application using the OpenAI API.
|
@@ -41,7 +44,7 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
41
44
|
class TracedSyncStream:
|
42
45
|
"""
|
43
46
|
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
-
Wraps the
|
47
|
+
Wraps the response to collect message IDs and aggregated response.
|
45
48
|
|
46
49
|
This class implements the '__aiter__' and '__anext__' methods that
|
47
50
|
handle asynchronous streaming responses.
|
@@ -54,6 +57,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
54
57
|
wrapped,
|
55
58
|
span,
|
56
59
|
kwargs,
|
60
|
+
server_address,
|
61
|
+
server_port,
|
57
62
|
**args,
|
58
63
|
):
|
59
64
|
self.__wrapped__ = wrapped
|
@@ -61,9 +66,20 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
61
66
|
# Placeholder for aggregating streaming response
|
62
67
|
self._llmresponse = ""
|
63
68
|
self._response_id = ""
|
69
|
+
self._response_model = ""
|
70
|
+
self._finish_reason = ""
|
71
|
+
self._openai_response_service_tier = ""
|
72
|
+
self._openai_system_fingerprint = ""
|
64
73
|
|
65
74
|
self._args = args
|
66
75
|
self._kwargs = kwargs
|
76
|
+
self._start_time = time.time()
|
77
|
+
self._end_time = None
|
78
|
+
self._timestamps = []
|
79
|
+
self._ttft = 0
|
80
|
+
self._tbt = 0
|
81
|
+
self._server_address = server_address
|
82
|
+
self._server_port = server_port
|
67
83
|
|
68
84
|
def __enter__(self):
|
69
85
|
self.__wrapped__.__enter__()
|
@@ -82,6 +98,14 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
82
98
|
def __next__(self):
|
83
99
|
try:
|
84
100
|
chunk = self.__wrapped__.__next__()
|
101
|
+
end_time = time.time()
|
102
|
+
# Record the timestamp for the current chunk
|
103
|
+
self._timestamps.append(end_time)
|
104
|
+
|
105
|
+
if len(self._timestamps) == 1:
|
106
|
+
# Calculate time to first chunk
|
107
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
108
|
+
|
85
109
|
chunked = response_as_dict(chunk)
|
86
110
|
# Collect message IDs and aggregated response from events
|
87
111
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
@@ -91,10 +115,18 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
91
115
|
if content:
|
92
116
|
self._llmresponse += content
|
93
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._response_model = chunked.get('model')
|
119
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
120
|
+
self._openai_response_service_tier = chunked.get('service_tier')
|
121
|
+
self._openai_system_fingerprint = chunked.get('system_fingerprint')
|
94
122
|
return chunk
|
95
123
|
except StopIteration:
|
96
124
|
# Handling exception ensure observability without disrupting operation
|
97
125
|
try:
|
126
|
+
self._end_time = time.time()
|
127
|
+
if len(self._timestamps) > 1:
|
128
|
+
self._tbt = calculate_tbt(self._timestamps)
|
129
|
+
|
98
130
|
# Format 'messages' into a single string
|
99
131
|
message_prompt = self._kwargs.get("messages", "")
|
100
132
|
formatted_messages = []
|
@@ -109,7 +141,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
109
141
|
content_str_list.append(f'text: {item["text"]}')
|
110
142
|
elif (item["type"] == "image_url" and
|
111
143
|
not item["image_url"]["url"].startswith("data:")):
|
112
|
-
# pylint: disable=line-too-long
|
113
144
|
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
114
145
|
content_str = ", ".join(content_str_list)
|
115
146
|
formatted_messages.append(f"{role}: {content_str}")
|
@@ -117,57 +148,87 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
117
148
|
formatted_messages.append(f"{role}: {content}")
|
118
149
|
prompt = "\n".join(formatted_messages)
|
119
150
|
|
151
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
+
|
120
153
|
# Calculate tokens using input prompt and aggregated response
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
154
|
+
input_tokens = openai_tokens(prompt,
|
155
|
+
request_model)
|
156
|
+
output_tokens = openai_tokens(self._llmresponse,
|
157
|
+
request_model)
|
125
158
|
|
126
159
|
# Calculate cost of the operation
|
127
|
-
cost = get_chat_model_cost(
|
128
|
-
pricing_info,
|
129
|
-
|
160
|
+
cost = get_chat_model_cost(request_model,
|
161
|
+
pricing_info, input_tokens,
|
162
|
+
output_tokens)
|
130
163
|
|
131
|
-
# Set Span attributes
|
164
|
+
# Set Span attributes (OTel Semconv)
|
132
165
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
167
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
133
168
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
134
169
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
135
|
-
self._span.set_attribute(SemanticConvetion.
|
136
|
-
|
137
|
-
self._span.set_attribute(SemanticConvetion.
|
138
|
-
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
171
|
+
request_model)
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
173
|
+
self._kwargs.get("seed", ""))
|
174
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
175
|
+
self._server_port)
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
177
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
179
|
+
self._kwargs.get("max_tokens", -1))
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
181
|
+
self._kwargs.get("presence_penalty", 0.0))
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
183
|
+
self._kwargs.get("stop", []))
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
185
|
+
self._kwargs.get("temperature", 1.0))
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
187
|
+
self._kwargs.get("top_p", 1.0))
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
189
|
+
[self._finish_reason])
|
139
190
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
140
191
|
self._response_id)
|
141
|
-
self._span.set_attribute(SemanticConvetion.
|
192
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
193
|
+
self._response_model)
|
194
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
195
|
+
input_tokens)
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
197
|
+
output_tokens)
|
198
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
199
|
+
self._server_address)
|
200
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
|
201
|
+
self._kwargs.get("service_tier", "auto"))
|
202
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
|
203
|
+
self._openai_response_service_tier)
|
204
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
205
|
+
self._openai_system_fingerprint)
|
206
|
+
if isinstance(self._llmresponse, str):
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
208
|
+
"text")
|
209
|
+
else:
|
210
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
211
|
+
"json")
|
212
|
+
|
213
|
+
# Set Span attributes (Extra)
|
214
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
142
215
|
environment)
|
143
|
-
self._span.set_attribute(
|
216
|
+
self._span.set_attribute(SERVICE_NAME,
|
144
217
|
application_name)
|
145
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
146
|
-
self._kwargs.get("model", "gpt-3.5-turbo"))
|
147
218
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
148
219
|
self._kwargs.get("user", ""))
|
149
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
150
|
-
self._kwargs.get("top_p", 1.0))
|
151
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
152
|
-
self._kwargs.get("max_tokens", -1))
|
153
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
154
|
-
self._kwargs.get("temperature", 1.0))
|
155
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
156
|
-
self._kwargs.get("presence_penalty", 0.0))
|
157
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
158
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
159
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
160
|
-
self._kwargs.get("seed", ""))
|
161
220
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
162
221
|
True)
|
163
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
164
|
-
prompt_tokens)
|
165
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
166
|
-
completion_tokens)
|
167
222
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
168
|
-
|
223
|
+
input_tokens + output_tokens)
|
169
224
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
170
225
|
cost)
|
226
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
227
|
+
self._tbt)
|
228
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
229
|
+
self._ttft)
|
230
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
231
|
+
version)
|
171
232
|
if trace_content:
|
172
233
|
self._span.add_event(
|
173
234
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -181,31 +242,35 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
181
242
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
243
|
},
|
183
244
|
)
|
184
|
-
|
185
245
|
self._span.set_status(Status(StatusCode.OK))
|
186
246
|
|
187
247
|
if disable_metrics is False:
|
188
|
-
attributes =
|
189
|
-
|
190
|
-
|
191
|
-
SemanticConvetion.
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
199
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
200
|
-
self._kwargs.get("model", "gpt-3.5-turbo")
|
201
|
-
}
|
248
|
+
attributes = create_metrics_attributes(
|
249
|
+
service_name=application_name,
|
250
|
+
deployment_environment=environment,
|
251
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
252
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
253
|
+
request_model=request_model,
|
254
|
+
server_address=self._server_address,
|
255
|
+
server_port=self._server_port,
|
256
|
+
response_model=self._response_model,
|
257
|
+
)
|
202
258
|
|
203
|
-
metrics["
|
204
|
-
|
205
|
-
|
259
|
+
metrics["genai_client_usage_tokens"].record(
|
260
|
+
input_tokens + output_tokens, attributes
|
261
|
+
)
|
262
|
+
metrics["genai_client_operation_duration"].record(
|
263
|
+
self._end_time - self._start_time, attributes
|
264
|
+
)
|
265
|
+
metrics["genai_server_tbt"].record(
|
266
|
+
self._tbt, attributes
|
206
267
|
)
|
207
|
-
metrics["
|
208
|
-
|
268
|
+
metrics["genai_server_ttft"].record(
|
269
|
+
self._ttft, attributes
|
270
|
+
)
|
271
|
+
metrics["genai_requests"].add(1, attributes)
|
272
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
273
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
209
274
|
metrics["genai_cost"].record(cost, attributes)
|
210
275
|
|
211
276
|
except Exception as e:
|
@@ -234,20 +299,25 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
234
299
|
|
235
300
|
# Check if streaming is enabled for the API call
|
236
301
|
streaming = kwargs.get("stream", False)
|
302
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
303
|
+
request_model = kwargs.get("model", "gpt-4o")
|
304
|
+
|
305
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
237
306
|
|
238
307
|
# pylint: disable=no-else-return
|
239
308
|
if streaming:
|
240
309
|
# Special handling for streaming response to accommodate the nature of data flow
|
241
310
|
awaited_wrapped = wrapped(*args, **kwargs)
|
242
|
-
span = tracer.start_span(
|
311
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
243
312
|
|
244
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
313
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
245
314
|
|
246
315
|
# Handling for non-streaming responses
|
247
316
|
else:
|
248
|
-
|
249
|
-
|
317
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
318
|
+
start_time = time.time()
|
250
319
|
response = wrapped(*args, **kwargs)
|
320
|
+
end_time = time.time()
|
251
321
|
|
252
322
|
response_dict = response_as_dict(response)
|
253
323
|
|
@@ -261,7 +331,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
261
331
|
|
262
332
|
if isinstance(content, list):
|
263
333
|
content_str = ", ".join(
|
264
|
-
# pylint: disable=line-too-long
|
265
334
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
266
335
|
if "type" in item else f'text: {item["text"]}'
|
267
336
|
for item in content
|
@@ -271,38 +340,72 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
271
340
|
formatted_messages.append(f"{role}: {content}")
|
272
341
|
prompt = "\n".join(formatted_messages)
|
273
342
|
|
274
|
-
|
343
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
344
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
345
|
+
|
346
|
+
# Calculate cost of the operation
|
347
|
+
cost = get_chat_model_cost(request_model,
|
348
|
+
pricing_info, input_tokens,
|
349
|
+
output_tokens)
|
350
|
+
|
351
|
+
# Set base span attribues (OTel Semconv)
|
275
352
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
353
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
354
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
276
355
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
277
356
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
278
|
-
span.set_attribute(SemanticConvetion.
|
279
|
-
|
280
|
-
span.set_attribute(SemanticConvetion.
|
281
|
-
|
357
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
358
|
+
request_model)
|
359
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
360
|
+
kwargs.get("seed", ""))
|
361
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
362
|
+
server_port)
|
363
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
364
|
+
kwargs.get("frequency_penalty", 0.0))
|
365
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
366
|
+
kwargs.get("max_tokens", -1))
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
368
|
+
kwargs.get("presence_penalty", 0.0))
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
370
|
+
kwargs.get("stop", []))
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
372
|
+
kwargs.get("temperature", 1.0))
|
373
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
374
|
+
kwargs.get("top_p", 1.0))
|
282
375
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
283
376
|
response_dict.get("id"))
|
284
|
-
span.set_attribute(SemanticConvetion.
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
378
|
+
response_dict.get('model'))
|
379
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
380
|
+
input_tokens)
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
382
|
+
output_tokens)
|
383
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
384
|
+
server_address)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
|
386
|
+
kwargs.get("service_tier", "auto"))
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
|
388
|
+
response_dict.get('service_tier'))
|
389
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
390
|
+
response_dict.get('system_fingerprint'))
|
391
|
+
|
392
|
+
# Set base span attribues (Extras)
|
393
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
285
394
|
environment)
|
286
|
-
span.set_attribute(
|
395
|
+
span.set_attribute(SERVICE_NAME,
|
287
396
|
application_name)
|
288
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
289
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
290
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
291
|
-
kwargs.get("top_p", 1.0))
|
292
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
293
|
-
kwargs.get("max_tokens", -1))
|
294
397
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
295
398
|
kwargs.get("user", ""))
|
296
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
297
|
-
kwargs.get("temperature", 1.0))
|
298
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
299
|
-
kwargs.get("presence_penalty", 0.0))
|
300
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
301
|
-
kwargs.get("frequency_penalty", 0.0))
|
302
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
303
|
-
kwargs.get("seed", ""))
|
304
399
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
305
400
|
False)
|
401
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
402
|
+
input_tokens + output_tokens)
|
403
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
404
|
+
cost)
|
405
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
406
|
+
end_time - start_time)
|
407
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
408
|
+
version)
|
306
409
|
if trace_content:
|
307
410
|
span.add_event(
|
308
411
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -311,93 +414,54 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
311
414
|
},
|
312
415
|
)
|
313
416
|
|
314
|
-
|
315
|
-
if "tools" not in kwargs:
|
316
|
-
# Calculate cost of the operation
|
317
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
318
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
319
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
320
|
-
|
321
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
322
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
323
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
324
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
325
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
326
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
417
|
+
for i in range(kwargs.get('n',1)):
|
327
418
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
328
|
-
[response_dict.get('choices'
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
span.add_event(
|
348
|
-
name=attribute_name,
|
349
|
-
attributes={
|
350
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
351
|
-
},
|
352
|
-
)
|
353
|
-
i += 1
|
354
|
-
|
355
|
-
# Return original response
|
356
|
-
return response
|
357
|
-
|
358
|
-
# Set span attributes when tools is passed to the function call
|
359
|
-
elif "tools" in kwargs:
|
360
|
-
# Calculate cost of the operation
|
361
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
362
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
363
|
-
response_dict.get('usage').get('completion_tokens'))
|
364
|
-
span.add_event(
|
365
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
366
|
-
attributes={
|
367
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
368
|
-
},
|
369
|
-
)
|
370
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
371
|
-
response_dict.get('usage').get('prompt_tokens'))
|
372
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
373
|
-
response_dict.get('usage').get('completion_tokens'))
|
374
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
375
|
-
response_dict.get('usage').get('total_tokens'))
|
376
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
377
|
-
cost)
|
419
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
420
|
+
if trace_content:
|
421
|
+
span.add_event(
|
422
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
423
|
+
attributes={
|
424
|
+
# pylint: disable=line-too-long
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
426
|
+
},
|
427
|
+
)
|
428
|
+
if kwargs.get('tools'):
|
429
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
430
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
431
|
+
|
432
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
433
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
434
|
+
"text")
|
435
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
436
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
437
|
+
"json")
|
378
438
|
|
379
439
|
span.set_status(Status(StatusCode.OK))
|
380
440
|
|
381
441
|
if disable_metrics is False:
|
382
|
-
attributes =
|
383
|
-
|
384
|
-
|
385
|
-
SemanticConvetion.
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
393
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
394
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
395
|
-
}
|
442
|
+
attributes = create_metrics_attributes(
|
443
|
+
service_name=application_name,
|
444
|
+
deployment_environment=environment,
|
445
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
446
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
447
|
+
request_model=request_model,
|
448
|
+
server_address=server_address,
|
449
|
+
server_port=server_port,
|
450
|
+
response_model=response_dict.get('model'),
|
451
|
+
)
|
396
452
|
|
453
|
+
metrics["genai_client_usage_tokens"].record(
|
454
|
+
input_tokens + output_tokens, attributes
|
455
|
+
)
|
456
|
+
metrics["genai_client_operation_duration"].record(
|
457
|
+
end_time - start_time, attributes
|
458
|
+
)
|
459
|
+
metrics["genai_server_ttft"].record(
|
460
|
+
end_time - start_time, attributes
|
461
|
+
)
|
397
462
|
metrics["genai_requests"].add(1, attributes)
|
398
|
-
metrics["
|
399
|
-
metrics["
|
400
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
463
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
464
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
401
465
|
metrics["genai_cost"].record(cost, attributes)
|
402
466
|
|
403
467
|
# Return original response
|
@@ -412,13 +476,12 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
412
476
|
|
413
477
|
return wrapper
|
414
478
|
|
415
|
-
def embedding(
|
479
|
+
def embedding(version, environment, application_name,
|
416
480
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
417
481
|
"""
|
418
482
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
419
483
|
|
420
484
|
Args:
|
421
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
422
485
|
version: Version of the monitoring package.
|
423
486
|
environment: Deployment environment (e.g., production, staging).
|
424
487
|
application_name: Name of the application using the OpenAI API.
|
@@ -447,40 +510,56 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
447
510
|
The response from the original 'embeddings' method.
|
448
511
|
"""
|
449
512
|
|
450
|
-
|
513
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
514
|
+
request_model = kwargs.get("model", "text-embedding-ada-002")
|
515
|
+
|
516
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
517
|
+
|
518
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
519
|
+
start_time = time.time()
|
451
520
|
response = wrapped(*args, **kwargs)
|
521
|
+
end_time = time.time()
|
522
|
+
|
452
523
|
response_dict = response_as_dict(response)
|
453
524
|
try:
|
525
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
526
|
+
|
454
527
|
# Calculate cost of the operation
|
455
|
-
cost = get_embed_model_cost(
|
456
|
-
pricing_info,
|
528
|
+
cost = get_embed_model_cost(request_model,
|
529
|
+
pricing_info, input_tokens)
|
457
530
|
|
458
|
-
# Set Span attributes
|
531
|
+
# Set Span attributes (OTel Semconv)
|
459
532
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
534
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
460
535
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
461
536
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
462
|
-
span.set_attribute(SemanticConvetion.
|
463
|
-
|
464
|
-
span.set_attribute(SemanticConvetion.
|
465
|
-
|
466
|
-
span.set_attribute(SemanticConvetion.
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
538
|
+
request_model)
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
540
|
+
[kwargs.get('encoding_format', 'float')])
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
542
|
+
request_model)
|
543
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
544
|
+
server_address)
|
545
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
546
|
+
server_port)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
|
550
|
+
# Set Span attributes (Extras)
|
551
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
467
552
|
environment)
|
468
|
-
span.set_attribute(
|
553
|
+
span.set_attribute(SERVICE_NAME,
|
469
554
|
application_name)
|
470
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
471
|
-
kwargs.get("model", "text-embedding-ada-002"))
|
472
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
473
|
-
kwargs.get("encoding_format", "float"))
|
474
|
-
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
475
|
-
# kwargs.get("dimensions", "null"))
|
476
555
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
477
556
|
kwargs.get("user", ""))
|
478
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
479
|
-
response_dict.get('usage').get('prompt_tokens'))
|
480
557
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
481
|
-
|
558
|
+
input_tokens)
|
482
559
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
483
560
|
cost)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
484
563
|
|
485
564
|
if trace_content:
|
486
565
|
span.add_event(
|
@@ -493,26 +572,24 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
493
572
|
span.set_status(Status(StatusCode.OK))
|
494
573
|
|
495
574
|
if disable_metrics is False:
|
496
|
-
attributes =
|
497
|
-
|
498
|
-
|
499
|
-
SemanticConvetion.
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
575
|
+
attributes = create_metrics_attributes(
|
576
|
+
service_name=application_name,
|
577
|
+
deployment_environment=environment,
|
578
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
579
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
580
|
+
request_model=request_model,
|
581
|
+
server_address=server_address,
|
582
|
+
server_port=server_port,
|
583
|
+
response_model=request_model,
|
584
|
+
)
|
585
|
+
metrics["genai_client_usage_tokens"].record(
|
586
|
+
input_tokens, attributes
|
587
|
+
)
|
588
|
+
metrics["genai_client_operation_duration"].record(
|
589
|
+
end_time - start_time, attributes
|
590
|
+
)
|
511
591
|
metrics["genai_requests"].add(1, attributes)
|
512
|
-
metrics["
|
513
|
-
response_dict.get('usage').get('total_tokens'), attributes)
|
514
|
-
metrics["genai_prompt_tokens"].add(
|
515
|
-
response_dict.get('usage').get('prompt_tokens'), attributes)
|
592
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
516
593
|
metrics["genai_cost"].record(cost, attributes)
|
517
594
|
|
518
595
|
# Return original response
|
@@ -527,118 +604,12 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
527
604
|
|
528
605
|
return wrapper
|
529
606
|
|
530
|
-
def
|
531
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
532
|
-
"""
|
533
|
-
Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
|
534
|
-
|
535
|
-
Args:
|
536
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
537
|
-
version: Version of the monitoring package.
|
538
|
-
environment: Deployment environment (e.g., production, staging).
|
539
|
-
application_name: Name of the application using the OpenAI API.
|
540
|
-
tracer: OpenTelemetry tracer for creating spans.
|
541
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
542
|
-
trace_content: Flag indicating whether to trace the actual content.
|
543
|
-
|
544
|
-
Returns:
|
545
|
-
A function that wraps the fine tuning creation method to add telemetry.
|
546
|
-
"""
|
547
|
-
|
548
|
-
def wrapper(wrapped, instance, args, kwargs):
|
549
|
-
"""
|
550
|
-
Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
|
551
|
-
|
552
|
-
This collects metrics such as execution time, usage stats, and handles errors
|
553
|
-
gracefully, adding details to the trace for observability.
|
554
|
-
|
555
|
-
Args:
|
556
|
-
wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
|
557
|
-
instance: The instance of the class where the original method is defined.
|
558
|
-
args: Positional arguments for the method.
|
559
|
-
kwargs: Keyword arguments for the method.
|
560
|
-
|
561
|
-
Returns:
|
562
|
-
The response from the original 'fine_tuning.jobs.create' method.
|
563
|
-
"""
|
564
|
-
|
565
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
566
|
-
response = wrapped(*args, **kwargs)
|
567
|
-
|
568
|
-
# Handling exception ensure observability without disrupting operation
|
569
|
-
try:
|
570
|
-
# Set Span attributes
|
571
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
572
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
573
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
574
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
575
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING)
|
576
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
577
|
-
gen_ai_endpoint)
|
578
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
579
|
-
environment)
|
580
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
581
|
-
application_name)
|
582
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
583
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
584
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
|
585
|
-
kwargs.get("training_file", ""))
|
586
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
|
587
|
-
kwargs.get("validation_file", ""))
|
588
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
|
589
|
-
kwargs.get("hyperparameters.batch_size", "auto"))
|
590
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
|
591
|
-
kwargs.get("hyperparameters.learning_rate_multiplier",
|
592
|
-
"auto"))
|
593
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
|
594
|
-
kwargs.get("hyperparameters.n_epochs", "auto"))
|
595
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
|
596
|
-
kwargs.get("suffix", ""))
|
597
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
598
|
-
response.id)
|
599
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
600
|
-
response.usage.prompt_tokens)
|
601
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
|
602
|
-
response.status)
|
603
|
-
span.set_status(Status(StatusCode.OK))
|
604
|
-
|
605
|
-
if disable_metrics is False:
|
606
|
-
attributes = {
|
607
|
-
TELEMETRY_SDK_NAME:
|
608
|
-
"openlit",
|
609
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
610
|
-
application_name,
|
611
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
612
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
613
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
614
|
-
environment,
|
615
|
-
SemanticConvetion.GEN_AI_TYPE:
|
616
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING,
|
617
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
618
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
619
|
-
}
|
620
|
-
|
621
|
-
metrics["genai_requests"].add(1, attributes)
|
622
|
-
|
623
|
-
# Return original response
|
624
|
-
return response
|
625
|
-
|
626
|
-
except Exception as e:
|
627
|
-
handle_exception(span, e)
|
628
|
-
logger.error("Error in trace creation: %s", e)
|
629
|
-
|
630
|
-
# Return original response
|
631
|
-
return response
|
632
|
-
|
633
|
-
return wrapper
|
634
|
-
|
635
|
-
def image_generate(gen_ai_endpoint, version, environment, application_name,
|
607
|
+
def image_generate(version, environment, application_name,
|
636
608
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
637
609
|
"""
|
638
610
|
Generates a telemetry wrapper for image generation to collect metrics.
|
639
611
|
|
640
612
|
Args:
|
641
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
642
613
|
version: Version of the monitoring package.
|
643
614
|
environment: Deployment environment (e.g., production, staging).
|
644
615
|
application_name: Name of the application using the OpenAI API.
|
@@ -667,8 +638,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
667
638
|
The response from the original 'images.generate' method.
|
668
639
|
"""
|
669
640
|
|
670
|
-
|
641
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
642
|
+
request_model = kwargs.get("model", "dall-e-2")
|
643
|
+
|
644
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
645
|
+
|
646
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
647
|
+
start_time = time.time()
|
671
648
|
response = wrapped(*args, **kwargs)
|
649
|
+
end_time = time.time()
|
650
|
+
|
672
651
|
images_count = 0
|
673
652
|
|
674
653
|
try:
|
@@ -679,27 +658,35 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
679
658
|
image = "url"
|
680
659
|
|
681
660
|
# Calculate cost of the operation
|
682
|
-
cost = get_image_model_cost(
|
661
|
+
cost = get_image_model_cost(request_model,
|
683
662
|
pricing_info, kwargs.get("size", "1024x1024"),
|
684
663
|
kwargs.get("quality", "standard"))
|
685
664
|
|
686
665
|
for items in response.data:
|
687
|
-
# Set Span attributes
|
666
|
+
# Set Span attributes (OTel Semconv)
|
688
667
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
668
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
669
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
689
670
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
690
671
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
691
|
-
span.set_attribute(SemanticConvetion.
|
692
|
-
|
693
|
-
span.set_attribute(SemanticConvetion.
|
694
|
-
|
672
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
673
|
+
request_model)
|
674
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
675
|
+
server_address)
|
676
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
677
|
+
server_port)
|
695
678
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
696
679
|
response.created)
|
697
|
-
span.set_attribute(SemanticConvetion.
|
680
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
681
|
+
request_model)
|
682
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
683
|
+
"image")
|
684
|
+
|
685
|
+
# Set Span attributes (Extras)
|
686
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
698
687
|
environment)
|
699
|
-
span.set_attribute(
|
688
|
+
span.set_attribute(SERVICE_NAME,
|
700
689
|
application_name)
|
701
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
702
|
-
kwargs.get("model", "dall-e-2"))
|
703
690
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
704
691
|
kwargs.get("size", "1024x1024"))
|
705
692
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
@@ -710,6 +697,9 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
710
697
|
items.revised_prompt if items.revised_prompt else "")
|
711
698
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
712
699
|
kwargs.get("user", ""))
|
700
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
701
|
+
version)
|
702
|
+
|
713
703
|
if trace_content:
|
714
704
|
span.add_event(
|
715
705
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -717,7 +707,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
717
707
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
718
708
|
},
|
719
709
|
)
|
720
|
-
attribute_name = f"
|
710
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
721
711
|
span.add_event(
|
722
712
|
name=attribute_name,
|
723
713
|
attributes={
|
@@ -732,21 +722,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
732
722
|
span.set_status(Status(StatusCode.OK))
|
733
723
|
|
734
724
|
if disable_metrics is False:
|
735
|
-
attributes =
|
736
|
-
|
737
|
-
|
738
|
-
SemanticConvetion.
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
746
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
747
|
-
kwargs.get("model", "dall-e-2")
|
748
|
-
}
|
725
|
+
attributes = create_metrics_attributes(
|
726
|
+
service_name=application_name,
|
727
|
+
deployment_environment=environment,
|
728
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
729
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
730
|
+
request_model=request_model,
|
731
|
+
server_address=server_address,
|
732
|
+
server_port=server_port,
|
733
|
+
response_model=request_model,
|
734
|
+
)
|
749
735
|
|
736
|
+
metrics["genai_client_operation_duration"].record(
|
737
|
+
end_time - start_time, attributes
|
738
|
+
)
|
750
739
|
metrics["genai_requests"].add(1, attributes)
|
751
740
|
metrics["genai_cost"].record(cost, attributes)
|
752
741
|
|
@@ -762,13 +751,12 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
762
751
|
|
763
752
|
return wrapper
|
764
753
|
|
765
|
-
def image_variatons(
|
754
|
+
def image_variatons(version, environment, application_name,
|
766
755
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
767
756
|
"""
|
768
757
|
Generates a telemetry wrapper for creating image variations to collect metrics.
|
769
758
|
|
770
759
|
Args:
|
771
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
772
760
|
version: Version of the monitoring package.
|
773
761
|
environment: Deployment environment (e.g., production, staging).
|
774
762
|
application_name: Name of the application using the OpenAI API.
|
@@ -797,8 +785,16 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
797
785
|
The response from the original 'images.create.variations' method.
|
798
786
|
"""
|
799
787
|
|
800
|
-
|
788
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
789
|
+
request_model = kwargs.get("model", "dall-e-2")
|
790
|
+
|
791
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
792
|
+
|
793
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
794
|
+
start_time = time.time()
|
801
795
|
response = wrapped(*args, **kwargs)
|
796
|
+
end_time = time.time()
|
797
|
+
|
802
798
|
images_count = 0
|
803
799
|
|
804
800
|
try:
|
@@ -809,34 +805,45 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
809
805
|
image = "url"
|
810
806
|
|
811
807
|
# Calculate cost of the operation
|
812
|
-
cost = get_image_model_cost(
|
808
|
+
cost = get_image_model_cost(request_model, pricing_info,
|
813
809
|
kwargs.get("size", "1024x1024"), "standard")
|
814
810
|
|
815
811
|
for items in response.data:
|
816
|
-
# Set Span attributes
|
812
|
+
# Set Span attributes (OTel Semconv)
|
817
813
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
814
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
815
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
818
816
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
819
817
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
820
|
-
span.set_attribute(SemanticConvetion.
|
821
|
-
|
822
|
-
span.set_attribute(SemanticConvetion.
|
823
|
-
|
818
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
819
|
+
request_model)
|
820
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
821
|
+
server_address)
|
822
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
823
|
+
server_port)
|
824
824
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
825
825
|
response.created)
|
826
|
-
span.set_attribute(SemanticConvetion.
|
826
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
827
|
+
request_model)
|
828
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
829
|
+
"image")
|
830
|
+
|
831
|
+
# Set Span attributes (Extras)
|
832
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
827
833
|
environment)
|
828
|
-
span.set_attribute(
|
834
|
+
span.set_attribute(SERVICE_NAME,
|
829
835
|
application_name)
|
830
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
831
|
-
kwargs.get("model", "dall-e-2"))
|
832
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
833
|
-
kwargs.get("user", ""))
|
834
836
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
835
837
|
kwargs.get("size", "1024x1024"))
|
836
838
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
837
839
|
"standard")
|
840
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
841
|
+
kwargs.get("user", ""))
|
842
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
843
|
+
version)
|
844
|
+
|
838
845
|
if trace_content:
|
839
|
-
attribute_name = f"
|
846
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
840
847
|
span.add_event(
|
841
848
|
name=attribute_name,
|
842
849
|
attributes={
|
@@ -851,21 +858,20 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
851
858
|
span.set_status(Status(StatusCode.OK))
|
852
859
|
|
853
860
|
if disable_metrics is False:
|
854
|
-
attributes =
|
855
|
-
|
856
|
-
|
857
|
-
SemanticConvetion.
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
865
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
866
|
-
kwargs.get("model", "dall-e-2")
|
867
|
-
}
|
861
|
+
attributes = create_metrics_attributes(
|
862
|
+
service_name=application_name,
|
863
|
+
deployment_environment=environment,
|
864
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
865
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
866
|
+
request_model=request_model,
|
867
|
+
server_address=server_address,
|
868
|
+
server_port=server_port,
|
869
|
+
response_model=request_model,
|
870
|
+
)
|
868
871
|
|
872
|
+
metrics["genai_client_operation_duration"].record(
|
873
|
+
end_time - start_time, attributes
|
874
|
+
)
|
869
875
|
metrics["genai_requests"].add(1, attributes)
|
870
876
|
metrics["genai_cost"].record(cost, attributes)
|
871
877
|
|
@@ -881,13 +887,12 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
881
887
|
|
882
888
|
return wrapper
|
883
889
|
|
884
|
-
def audio_create(
|
890
|
+
def audio_create(version, environment, application_name,
|
885
891
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
886
892
|
"""
|
887
893
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
888
894
|
|
889
895
|
Args:
|
890
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
891
896
|
version: Version of the monitoring package.
|
892
897
|
environment: Deployment environment (e.g., production, staging).
|
893
898
|
application_name: Name of the application using the OpenAI API.
|
@@ -916,28 +921,42 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
916
921
|
The response from the original 'audio.speech.create' method.
|
917
922
|
"""
|
918
923
|
|
919
|
-
|
924
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
925
|
+
request_model = kwargs.get("model", "tts-1")
|
926
|
+
|
927
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
|
928
|
+
|
929
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
930
|
+
start_time = time.time()
|
920
931
|
response = wrapped(*args, **kwargs)
|
932
|
+
end_time = time.time()
|
921
933
|
|
922
934
|
try:
|
923
935
|
# Calculate cost of the operation
|
924
|
-
cost = get_audio_model_cost(
|
936
|
+
cost = get_audio_model_cost(request_model,
|
925
937
|
pricing_info, kwargs.get("input", ""))
|
926
938
|
|
927
939
|
# Set Span attributes
|
928
940
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
941
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
942
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
|
929
943
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
930
944
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
931
|
-
span.set_attribute(SemanticConvetion.
|
932
|
-
|
933
|
-
span.set_attribute(SemanticConvetion.
|
934
|
-
|
935
|
-
span.set_attribute(SemanticConvetion.
|
945
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
946
|
+
request_model)
|
947
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
948
|
+
server_address)
|
949
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
950
|
+
server_port)
|
951
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
952
|
+
request_model)
|
953
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
954
|
+
"speech")
|
955
|
+
|
956
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
936
957
|
environment)
|
937
|
-
span.set_attribute(
|
958
|
+
span.set_attribute(SERVICE_NAME,
|
938
959
|
application_name)
|
939
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
940
|
-
kwargs.get("model", "tts-1"))
|
941
960
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
|
942
961
|
kwargs.get("voice", "alloy"))
|
943
962
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
|
@@ -946,6 +965,8 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
946
965
|
kwargs.get("speed", 1))
|
947
966
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
948
967
|
cost)
|
968
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
969
|
+
version)
|
949
970
|
if trace_content:
|
950
971
|
span.add_event(
|
951
972
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -957,21 +978,20 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
957
978
|
span.set_status(Status(StatusCode.OK))
|
958
979
|
|
959
980
|
if disable_metrics is False:
|
960
|
-
attributes =
|
961
|
-
|
962
|
-
|
963
|
-
SemanticConvetion.
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
SemanticConvetion.GEN_AI_TYPE_AUDIO,
|
971
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
972
|
-
kwargs.get("model", "tts-1")
|
973
|
-
}
|
981
|
+
attributes = create_metrics_attributes(
|
982
|
+
service_name=application_name,
|
983
|
+
deployment_environment=environment,
|
984
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
|
985
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
986
|
+
request_model=request_model,
|
987
|
+
server_address=server_address,
|
988
|
+
server_port=server_port,
|
989
|
+
response_model=request_model,
|
990
|
+
)
|
974
991
|
|
992
|
+
metrics["genai_client_operation_duration"].record(
|
993
|
+
end_time - start_time, attributes
|
994
|
+
)
|
975
995
|
metrics["genai_requests"].add(1, attributes)
|
976
996
|
metrics["genai_cost"].record(cost, attributes)
|
977
997
|
|