openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +83 -0
- openlit/__init__.py +1 -1
- openlit/instrumentation/ag2/ag2.py +2 -2
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +4 -4
- openlit/instrumentation/astra/async_astra.py +4 -4
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +4 -4
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
- openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
- openlit/instrumentation/crewai/crewai.py +2 -2
- openlit/instrumentation/dynamiq/dynamiq.py +2 -2
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
- openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/firecrawl/firecrawl.py +2 -2
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/gpt4all.py +17 -17
- openlit/instrumentation/groq/async_groq.py +14 -14
- openlit/instrumentation/groq/groq.py +14 -14
- openlit/instrumentation/haystack/haystack.py +2 -2
- openlit/instrumentation/julep/async_julep.py +2 -2
- openlit/instrumentation/julep/julep.py +2 -2
- openlit/instrumentation/langchain/langchain.py +36 -31
- openlit/instrumentation/letta/letta.py +6 -6
- openlit/instrumentation/litellm/async_litellm.py +20 -20
- openlit/instrumentation/litellm/litellm.py +20 -20
- openlit/instrumentation/llamaindex/llamaindex.py +2 -2
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/milvus/milvus.py +4 -4
- openlit/instrumentation/mistral/async_mistral.py +18 -18
- openlit/instrumentation/mistral/mistral.py +18 -18
- openlit/instrumentation/multion/async_multion.py +2 -2
- openlit/instrumentation/multion/multion.py +2 -2
- openlit/instrumentation/ollama/async_ollama.py +29 -29
- openlit/instrumentation/ollama/ollama.py +29 -29
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +434 -409
- openlit/instrumentation/openai/openai.py +415 -393
- openlit/instrumentation/phidata/phidata.py +2 -2
- openlit/instrumentation/pinecone/pinecone.py +4 -4
- openlit/instrumentation/premai/premai.py +20 -20
- openlit/instrumentation/qdrant/async_qdrant.py +4 -4
- openlit/instrumentation/qdrant/qdrant.py +4 -4
- openlit/instrumentation/reka/async_reka.py +6 -6
- openlit/instrumentation/reka/reka.py +6 -6
- openlit/instrumentation/together/async_together.py +18 -18
- openlit/instrumentation/together/together.py +18 -18
- openlit/instrumentation/transformers/transformers.py +6 -6
- openlit/instrumentation/vertexai/async_vertexai.py +53 -53
- openlit/instrumentation/vertexai/vertexai.py +53 -53
- openlit/instrumentation/vllm/vllm.py +6 -6
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
- openlit-1.33.9.dist-info/RECORD +121 -0
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.7.dist-info/RECORD +0 -122
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
|
2
1
|
"""
|
3
2
|
Module for monitoring OpenAI API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
get_embed_model_cost,
|
@@ -14,19 +14,22 @@ from openlit.__helpers import (
|
|
14
14
|
openai_tokens,
|
15
15
|
handle_exception,
|
16
16
|
response_as_dict,
|
17
|
+
calculate_ttft,
|
18
|
+
calculate_tbt,
|
19
|
+
create_metrics_attributes,
|
20
|
+
set_server_address_and_port
|
17
21
|
)
|
18
22
|
from openlit.semcov import SemanticConvetion
|
19
23
|
|
20
24
|
# Initialize logger for logging potential issues and operations
|
21
25
|
logger = logging.getLogger(__name__)
|
22
26
|
|
23
|
-
def async_chat_completions(
|
27
|
+
def async_chat_completions(version, environment, application_name,
|
24
28
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
25
29
|
"""
|
26
30
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
27
31
|
|
28
32
|
Args:
|
29
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
30
33
|
version: Version of the monitoring package.
|
31
34
|
environment: Deployment environment (e.g., production, staging).
|
32
35
|
application_name: Name of the application using the OpenAI API.
|
@@ -54,6 +57,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
54
57
|
wrapped,
|
55
58
|
span,
|
56
59
|
kwargs,
|
60
|
+
server_address,
|
61
|
+
server_port,
|
57
62
|
**args,
|
58
63
|
):
|
59
64
|
self.__wrapped__ = wrapped
|
@@ -61,9 +66,20 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
61
66
|
# Placeholder for aggregating streaming response
|
62
67
|
self._llmresponse = ""
|
63
68
|
self._response_id = ""
|
69
|
+
self._response_model = ""
|
70
|
+
self._finish_reason = ""
|
71
|
+
self._openai_response_service_tier = ""
|
72
|
+
self._openai_system_fingerprint = ""
|
64
73
|
|
65
74
|
self._args = args
|
66
75
|
self._kwargs = kwargs
|
76
|
+
self._start_time = time.time()
|
77
|
+
self._end_time = None
|
78
|
+
self._timestamps = []
|
79
|
+
self._ttft = 0
|
80
|
+
self._tbt = 0
|
81
|
+
self._server_address = server_address
|
82
|
+
self._server_port = server_port
|
67
83
|
|
68
84
|
async def __aenter__(self):
|
69
85
|
await self.__wrapped__.__aenter__()
|
@@ -82,6 +98,14 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
82
98
|
async def __anext__(self):
|
83
99
|
try:
|
84
100
|
chunk = await self.__wrapped__.__anext__()
|
101
|
+
end_time = time.time()
|
102
|
+
# Record the timestamp for the current chunk
|
103
|
+
self._timestamps.append(end_time)
|
104
|
+
|
105
|
+
if len(self._timestamps) == 1:
|
106
|
+
# Calculate time to first chunk
|
107
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
108
|
+
|
85
109
|
chunked = response_as_dict(chunk)
|
86
110
|
# Collect message IDs and aggregated response from events
|
87
111
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
@@ -91,10 +115,18 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
91
115
|
if content:
|
92
116
|
self._llmresponse += content
|
93
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._response_model = chunked.get('model')
|
119
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
120
|
+
self._openai_response_service_tier = chunked.get('service_tier')
|
121
|
+
self._openai_system_fingerprint = chunked.get('system_fingerprint')
|
94
122
|
return chunk
|
95
123
|
except StopAsyncIteration:
|
96
124
|
# Handling exception ensure observability without disrupting operation
|
97
125
|
try:
|
126
|
+
self._end_time = time.time()
|
127
|
+
if len(self._timestamps) > 1:
|
128
|
+
self._tbt = calculate_tbt(self._timestamps)
|
129
|
+
|
98
130
|
# Format 'messages' into a single string
|
99
131
|
message_prompt = self._kwargs.get("messages", "")
|
100
132
|
formatted_messages = []
|
@@ -103,68 +135,100 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
103
135
|
content = message["content"]
|
104
136
|
|
105
137
|
if isinstance(content, list):
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
138
|
+
content_str_list = []
|
139
|
+
for item in content:
|
140
|
+
if item["type"] == "text":
|
141
|
+
content_str_list.append(f'text: {item["text"]}')
|
142
|
+
elif (item["type"] == "image_url" and
|
143
|
+
not item["image_url"]["url"].startswith("data:")):
|
144
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
145
|
+
content_str = ", ".join(content_str_list)
|
112
146
|
formatted_messages.append(f"{role}: {content_str}")
|
113
147
|
else:
|
114
148
|
formatted_messages.append(f"{role}: {content}")
|
115
149
|
prompt = "\n".join(formatted_messages)
|
116
150
|
|
151
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
+
|
117
153
|
# Calculate tokens using input prompt and aggregated response
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
154
|
+
input_tokens = openai_tokens(prompt,
|
155
|
+
request_model)
|
156
|
+
output_tokens = openai_tokens(self._llmresponse,
|
157
|
+
request_model)
|
122
158
|
|
123
159
|
# Calculate cost of the operation
|
124
|
-
cost = get_chat_model_cost(
|
125
|
-
pricing_info,
|
126
|
-
|
160
|
+
cost = get_chat_model_cost(request_model,
|
161
|
+
pricing_info, input_tokens,
|
162
|
+
output_tokens)
|
127
163
|
|
128
|
-
# Set Span attributes
|
164
|
+
# Set Span attributes (OTel Semconv)
|
129
165
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
167
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
130
168
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
131
169
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
132
|
-
self._span.set_attribute(SemanticConvetion.
|
133
|
-
|
134
|
-
self._span.set_attribute(SemanticConvetion.
|
135
|
-
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
171
|
+
request_model)
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
173
|
+
self._kwargs.get("seed", ""))
|
174
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
175
|
+
self._server_port)
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
177
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
179
|
+
self._kwargs.get("max_tokens", -1))
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
181
|
+
self._kwargs.get("presence_penalty", 0.0))
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
183
|
+
self._kwargs.get("stop", []))
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
185
|
+
self._kwargs.get("temperature", 1.0))
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
187
|
+
self._kwargs.get("top_p", 1.0))
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
189
|
+
[self._finish_reason])
|
136
190
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
137
191
|
self._response_id)
|
138
|
-
self._span.set_attribute(SemanticConvetion.
|
192
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
193
|
+
self._response_model)
|
194
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
195
|
+
input_tokens)
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
197
|
+
output_tokens)
|
198
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
199
|
+
self._server_address)
|
200
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
|
201
|
+
self._kwargs.get("service_tier", "auto"))
|
202
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
|
203
|
+
self._openai_response_service_tier)
|
204
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
|
205
|
+
self._openai_system_fingerprint)
|
206
|
+
if isinstance(self._llmresponse, str):
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
208
|
+
"text")
|
209
|
+
else:
|
210
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
211
|
+
"json")
|
212
|
+
|
213
|
+
# Set Span attributes (Extra)
|
214
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
139
215
|
environment)
|
140
|
-
self._span.set_attribute(
|
216
|
+
self._span.set_attribute(SERVICE_NAME,
|
141
217
|
application_name)
|
142
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
143
|
-
self._kwargs.get("model", "gpt-3.5-turbo"))
|
144
218
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
145
219
|
self._kwargs.get("user", ""))
|
146
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
147
|
-
self._kwargs.get("top_p", 1.0))
|
148
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
149
|
-
self._kwargs.get("max_tokens", -1))
|
150
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
151
|
-
self._kwargs.get("temperature", 1.0))
|
152
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
153
|
-
self._kwargs.get("presence_penalty", 0.0))
|
154
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
155
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
156
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
157
|
-
self._kwargs.get("seed", ""))
|
158
220
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
159
221
|
True)
|
160
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
161
|
-
prompt_tokens)
|
162
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
163
|
-
completion_tokens)
|
164
222
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
165
|
-
|
223
|
+
input_tokens + output_tokens)
|
166
224
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
167
225
|
cost)
|
226
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
227
|
+
self._tbt)
|
228
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
229
|
+
self._ttft)
|
230
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
231
|
+
version)
|
168
232
|
if trace_content:
|
169
233
|
self._span.add_event(
|
170
234
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -178,31 +242,35 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
178
242
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
179
243
|
},
|
180
244
|
)
|
181
|
-
|
182
245
|
self._span.set_status(Status(StatusCode.OK))
|
183
246
|
|
184
247
|
if disable_metrics is False:
|
185
|
-
attributes =
|
186
|
-
|
187
|
-
|
188
|
-
SemanticConvetion.
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
196
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
197
|
-
self._kwargs.get("model", "gpt-3.5-turbo")
|
198
|
-
}
|
248
|
+
attributes = create_metrics_attributes(
|
249
|
+
service_name=application_name,
|
250
|
+
deployment_environment=environment,
|
251
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
252
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
253
|
+
request_model=request_model,
|
254
|
+
server_address=self._server_address,
|
255
|
+
server_port=self._server_port,
|
256
|
+
response_model=self._response_model,
|
257
|
+
)
|
199
258
|
|
200
|
-
metrics["
|
201
|
-
|
202
|
-
|
259
|
+
metrics["genai_client_usage_tokens"].record(
|
260
|
+
input_tokens + output_tokens, attributes
|
261
|
+
)
|
262
|
+
metrics["genai_client_operation_duration"].record(
|
263
|
+
self._end_time - self._start_time, attributes
|
203
264
|
)
|
204
|
-
metrics["
|
205
|
-
|
265
|
+
metrics["genai_server_tbt"].record(
|
266
|
+
self._tbt, attributes
|
267
|
+
)
|
268
|
+
metrics["genai_server_ttft"].record(
|
269
|
+
self._ttft, attributes
|
270
|
+
)
|
271
|
+
metrics["genai_requests"].add(1, attributes)
|
272
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
273
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
206
274
|
metrics["genai_cost"].record(cost, attributes)
|
207
275
|
|
208
276
|
except Exception as e:
|
@@ -231,20 +299,25 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
231
299
|
|
232
300
|
# Check if streaming is enabled for the API call
|
233
301
|
streaming = kwargs.get("stream", False)
|
302
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
303
|
+
request_model = kwargs.get("model", "gpt-4o")
|
304
|
+
|
305
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
234
306
|
|
235
307
|
# pylint: disable=no-else-return
|
236
308
|
if streaming:
|
237
309
|
# Special handling for streaming response to accommodate the nature of data flow
|
238
310
|
awaited_wrapped = await wrapped(*args, **kwargs)
|
239
|
-
span = tracer.start_span(
|
311
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
240
312
|
|
241
|
-
return TracedAsyncStream(awaited_wrapped, span, kwargs)
|
313
|
+
return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
242
314
|
|
243
315
|
# Handling for non-streaming responses
|
244
316
|
else:
|
245
|
-
|
246
|
-
|
317
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
318
|
+
start_time = time.time()
|
247
319
|
response = await wrapped(*args, **kwargs)
|
320
|
+
end_time = time.time()
|
248
321
|
|
249
322
|
response_dict = response_as_dict(response)
|
250
323
|
|
@@ -258,7 +331,6 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
258
331
|
|
259
332
|
if isinstance(content, list):
|
260
333
|
content_str = ", ".join(
|
261
|
-
# pylint: disable=line-too-long
|
262
334
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
263
335
|
if "type" in item else f'text: {item["text"]}'
|
264
336
|
for item in content
|
@@ -268,38 +340,72 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
268
340
|
formatted_messages.append(f"{role}: {content}")
|
269
341
|
prompt = "\n".join(formatted_messages)
|
270
342
|
|
271
|
-
|
343
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
344
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
345
|
+
|
346
|
+
# Calculate cost of the operation
|
347
|
+
cost = get_chat_model_cost(request_model,
|
348
|
+
pricing_info, input_tokens,
|
349
|
+
output_tokens)
|
350
|
+
|
351
|
+
# Set base span attribues (OTel Semconv)
|
272
352
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
353
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
354
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
273
355
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
274
356
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
275
|
-
span.set_attribute(SemanticConvetion.
|
276
|
-
|
277
|
-
span.set_attribute(SemanticConvetion.
|
278
|
-
|
357
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
358
|
+
request_model)
|
359
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
360
|
+
kwargs.get("seed", ""))
|
361
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
362
|
+
server_port)
|
363
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
364
|
+
kwargs.get("frequency_penalty", 0.0))
|
365
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
366
|
+
kwargs.get("max_tokens", -1))
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
368
|
+
kwargs.get("presence_penalty", 0.0))
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
370
|
+
kwargs.get("stop", []))
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
372
|
+
kwargs.get("temperature", 1.0))
|
373
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
374
|
+
kwargs.get("top_p", 1.0))
|
279
375
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
280
376
|
response_dict.get("id"))
|
281
|
-
span.set_attribute(SemanticConvetion.
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
378
|
+
response_dict.get('model'))
|
379
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
380
|
+
input_tokens)
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
382
|
+
output_tokens)
|
383
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
384
|
+
server_address)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
|
386
|
+
kwargs.get("service_tier", "auto"))
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
|
388
|
+
response_dict.get('service_tier'))
|
389
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
|
390
|
+
response_dict.get('system_fingerprint'))
|
391
|
+
|
392
|
+
# Set base span attribues (Extras)
|
393
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
282
394
|
environment)
|
283
|
-
span.set_attribute(
|
395
|
+
span.set_attribute(SERVICE_NAME,
|
284
396
|
application_name)
|
285
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
286
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
287
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
288
|
-
kwargs.get("top_p", 1.0))
|
289
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
290
|
-
kwargs.get("max_tokens", -1))
|
291
397
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
292
398
|
kwargs.get("user", ""))
|
293
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
294
|
-
kwargs.get("temperature", 1.0))
|
295
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
296
|
-
kwargs.get("presence_penalty", 0.0))
|
297
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
298
|
-
kwargs.get("frequency_penalty", 0.0))
|
299
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
300
|
-
kwargs.get("seed", ""))
|
301
399
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
302
400
|
False)
|
401
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
402
|
+
input_tokens + output_tokens)
|
403
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
404
|
+
cost)
|
405
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
406
|
+
end_time - start_time)
|
407
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
408
|
+
version)
|
303
409
|
if trace_content:
|
304
410
|
span.add_event(
|
305
411
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -308,93 +414,54 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
308
414
|
},
|
309
415
|
)
|
310
416
|
|
311
|
-
|
312
|
-
if "tools" not in kwargs:
|
313
|
-
# Calculate cost of the operation
|
314
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
315
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
316
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
317
|
-
|
318
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
319
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
320
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
321
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
322
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
323
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
417
|
+
for i in range(kwargs.get('n',1)):
|
324
418
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
325
|
-
[response_dict.get('choices'
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
span.add_event(
|
345
|
-
name=attribute_name,
|
346
|
-
attributes={
|
347
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
348
|
-
},
|
349
|
-
)
|
350
|
-
i += 1
|
351
|
-
|
352
|
-
# Return original response
|
353
|
-
return response
|
354
|
-
|
355
|
-
# Set span attributes when tools is passed to the function call
|
356
|
-
elif "tools" in kwargs:
|
357
|
-
# Calculate cost of the operation
|
358
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
359
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
360
|
-
response_dict.get('usage').get('completion_tokens'))
|
361
|
-
span.add_event(
|
362
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
363
|
-
attributes={
|
364
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
365
|
-
},
|
366
|
-
)
|
367
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
368
|
-
response_dict.get('usage').get('prompt_tokens'))
|
369
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
370
|
-
response_dict.get('usage').get('completion_tokens'))
|
371
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
372
|
-
response_dict.get('usage').get('total_tokens'))
|
373
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
374
|
-
cost)
|
419
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
420
|
+
if trace_content:
|
421
|
+
span.add_event(
|
422
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
423
|
+
attributes={
|
424
|
+
# pylint: disable=line-too-long
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
426
|
+
},
|
427
|
+
)
|
428
|
+
if kwargs.get('tools'):
|
429
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
430
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
431
|
+
|
432
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
433
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
434
|
+
"text")
|
435
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
436
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
437
|
+
"json")
|
375
438
|
|
376
439
|
span.set_status(Status(StatusCode.OK))
|
377
440
|
|
378
441
|
if disable_metrics is False:
|
379
|
-
attributes =
|
380
|
-
|
381
|
-
|
382
|
-
SemanticConvetion.
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
390
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
391
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
392
|
-
}
|
442
|
+
attributes = create_metrics_attributes(
|
443
|
+
service_name=application_name,
|
444
|
+
deployment_environment=environment,
|
445
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
446
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
447
|
+
request_model=request_model,
|
448
|
+
server_address=server_address,
|
449
|
+
server_port=server_port,
|
450
|
+
response_model=response_dict.get('model'),
|
451
|
+
)
|
393
452
|
|
453
|
+
metrics["genai_client_usage_tokens"].record(
|
454
|
+
input_tokens + output_tokens, attributes
|
455
|
+
)
|
456
|
+
metrics["genai_client_operation_duration"].record(
|
457
|
+
end_time - start_time, attributes
|
458
|
+
)
|
459
|
+
metrics["genai_server_ttft"].record(
|
460
|
+
end_time - start_time, attributes
|
461
|
+
)
|
394
462
|
metrics["genai_requests"].add(1, attributes)
|
395
|
-
metrics["
|
396
|
-
metrics["
|
397
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
463
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
464
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
398
465
|
metrics["genai_cost"].record(cost, attributes)
|
399
466
|
|
400
467
|
# Return original response
|
@@ -409,20 +476,19 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
409
476
|
|
410
477
|
return wrapper
|
411
478
|
|
412
|
-
def async_embedding(
|
413
|
-
|
479
|
+
def async_embedding(version, environment, application_name,
|
480
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
414
481
|
"""
|
415
482
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
416
|
-
|
483
|
+
|
417
484
|
Args:
|
418
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
419
485
|
version: Version of the monitoring package.
|
420
486
|
environment: Deployment environment (e.g., production, staging).
|
421
487
|
application_name: Name of the application using the OpenAI API.
|
422
488
|
tracer: OpenTelemetry tracer for creating spans.
|
423
489
|
pricing_info: Information used for calculating the cost of OpenAI usage.
|
424
490
|
trace_content: Flag indicating whether to trace the actual content.
|
425
|
-
|
491
|
+
|
426
492
|
Returns:
|
427
493
|
A function that wraps the embeddings method to add telemetry.
|
428
494
|
"""
|
@@ -444,40 +510,56 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
444
510
|
The response from the original 'embeddings' method.
|
445
511
|
"""
|
446
512
|
|
447
|
-
|
513
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
514
|
+
request_model = kwargs.get("model", "text-embedding-ada-002")
|
515
|
+
|
516
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
517
|
+
|
518
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
519
|
+
start_time = time.time()
|
448
520
|
response = await wrapped(*args, **kwargs)
|
521
|
+
end_time = time.time()
|
522
|
+
|
449
523
|
response_dict = response_as_dict(response)
|
450
524
|
try:
|
525
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
526
|
+
|
451
527
|
# Calculate cost of the operation
|
452
|
-
cost = get_embed_model_cost(
|
453
|
-
pricing_info,
|
528
|
+
cost = get_embed_model_cost(request_model,
|
529
|
+
pricing_info, input_tokens)
|
454
530
|
|
455
|
-
# Set Span attributes
|
531
|
+
# Set Span attributes (OTel Semconv)
|
456
532
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
534
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
457
535
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
458
536
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
459
|
-
span.set_attribute(SemanticConvetion.
|
460
|
-
|
461
|
-
span.set_attribute(SemanticConvetion.
|
462
|
-
|
463
|
-
span.set_attribute(SemanticConvetion.
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
538
|
+
request_model)
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
540
|
+
[kwargs.get('encoding_format', 'float')])
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
542
|
+
request_model)
|
543
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
544
|
+
server_address)
|
545
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
546
|
+
server_port)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
|
550
|
+
# Set Span attributes (Extras)
|
551
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
464
552
|
environment)
|
465
|
-
span.set_attribute(
|
553
|
+
span.set_attribute(SERVICE_NAME,
|
466
554
|
application_name)
|
467
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
468
|
-
kwargs.get("model", "text-embedding-ada-002"))
|
469
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
470
|
-
kwargs.get("encoding_format", "float"))
|
471
|
-
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
472
|
-
# kwargs.get("dimensions", "null"))
|
473
555
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
474
556
|
kwargs.get("user", ""))
|
475
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
476
|
-
response_dict.get('usage').get('prompt_tokens'))
|
477
557
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
478
|
-
|
558
|
+
input_tokens)
|
479
559
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
480
560
|
cost)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
481
563
|
|
482
564
|
if trace_content:
|
483
565
|
span.add_event(
|
@@ -490,26 +572,24 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
490
572
|
span.set_status(Status(StatusCode.OK))
|
491
573
|
|
492
574
|
if disable_metrics is False:
|
493
|
-
attributes =
|
494
|
-
|
495
|
-
|
496
|
-
SemanticConvetion.
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
575
|
+
attributes = create_metrics_attributes(
|
576
|
+
service_name=application_name,
|
577
|
+
deployment_environment=environment,
|
578
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
579
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
580
|
+
request_model=request_model,
|
581
|
+
server_address=server_address,
|
582
|
+
server_port=server_port,
|
583
|
+
response_model=request_model,
|
584
|
+
)
|
585
|
+
metrics["genai_client_usage_tokens"].record(
|
586
|
+
input_tokens, attributes
|
587
|
+
)
|
588
|
+
metrics["genai_client_operation_duration"].record(
|
589
|
+
end_time - start_time, attributes
|
590
|
+
)
|
508
591
|
metrics["genai_requests"].add(1, attributes)
|
509
|
-
metrics["
|
510
|
-
response_dict.get('usage').get('total_tokens'), attributes)
|
511
|
-
metrics["genai_prompt_tokens"].add(
|
512
|
-
response_dict.get('usage').get('prompt_tokens'), attributes)
|
592
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
513
593
|
metrics["genai_cost"].record(cost, attributes)
|
514
594
|
|
515
595
|
# Return original response
|
@@ -524,125 +604,19 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
524
604
|
|
525
605
|
return wrapper
|
526
606
|
|
527
|
-
def
|
607
|
+
def async_image_generate(version, environment, application_name,
|
528
608
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
529
609
|
"""
|
530
|
-
Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
|
531
|
-
|
532
|
-
Args:
|
533
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
534
|
-
version: Version of the monitoring package.
|
535
|
-
environment: Deployment environment (e.g., production, staging).
|
536
|
-
application_name: Name of the application using the OpenAI API.
|
537
|
-
tracer: OpenTelemetry tracer for creating spans.
|
538
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
539
|
-
trace_content: Flag indicating whether to trace the actual content.
|
540
|
-
|
541
|
-
Returns:
|
542
|
-
A function that wraps the fine tuning creation method to add telemetry.
|
543
|
-
"""
|
544
|
-
|
545
|
-
async def wrapper(wrapped, instance, args, kwargs):
|
546
|
-
"""
|
547
|
-
Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
|
548
|
-
|
549
|
-
This collects metrics such as execution time, usage stats, and handles errors
|
550
|
-
gracefully, adding details to the trace for observability.
|
551
|
-
|
552
|
-
Args:
|
553
|
-
wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
|
554
|
-
instance: The instance of the class where the original method is defined.
|
555
|
-
args: Positional arguments for the method.
|
556
|
-
kwargs: Keyword arguments for the method.
|
557
|
-
|
558
|
-
Returns:
|
559
|
-
The response from the original 'fine_tuning.jobs.create' method.
|
560
|
-
"""
|
561
|
-
|
562
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
563
|
-
response = await wrapped(*args, **kwargs)
|
564
|
-
|
565
|
-
# Handling exception ensure observability without disrupting operation
|
566
|
-
try:
|
567
|
-
# Set Span attributes
|
568
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
569
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
570
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
571
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
572
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING)
|
573
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
574
|
-
gen_ai_endpoint)
|
575
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
576
|
-
environment)
|
577
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
578
|
-
application_name)
|
579
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
580
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
581
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
|
582
|
-
kwargs.get("training_file", ""))
|
583
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
|
584
|
-
kwargs.get("validation_file", ""))
|
585
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
|
586
|
-
kwargs.get("hyperparameters.batch_size", "auto"))
|
587
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
|
588
|
-
kwargs.get("hyperparameters.learning_rate_multiplier",
|
589
|
-
"auto"))
|
590
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
|
591
|
-
kwargs.get("hyperparameters.n_epochs", "auto"))
|
592
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
|
593
|
-
kwargs.get("suffix", ""))
|
594
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
595
|
-
response.id)
|
596
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
597
|
-
response.usage.prompt_tokens)
|
598
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
|
599
|
-
response.status)
|
600
|
-
span.set_status(Status(StatusCode.OK))
|
601
|
-
|
602
|
-
if disable_metrics is False:
|
603
|
-
attributes = {
|
604
|
-
TELEMETRY_SDK_NAME:
|
605
|
-
"openlit",
|
606
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
607
|
-
application_name,
|
608
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
609
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
610
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
611
|
-
environment,
|
612
|
-
SemanticConvetion.GEN_AI_TYPE:
|
613
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING,
|
614
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
615
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
616
|
-
}
|
617
|
-
|
618
|
-
metrics["genai_requests"].add(1, attributes)
|
619
|
-
|
620
|
-
# Return original response
|
621
|
-
return response
|
622
|
-
|
623
|
-
except Exception as e:
|
624
|
-
handle_exception(span, e)
|
625
|
-
logger.error("Error in trace creation: %s", e)
|
626
|
-
|
627
|
-
# Return original response
|
628
|
-
return response
|
629
|
-
|
630
|
-
return wrapper
|
631
|
-
|
632
|
-
def async_image_generate(gen_ai_endpoint, version, environment, application_name,
|
633
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
634
|
-
"""
|
635
610
|
Generates a telemetry wrapper for image generation to collect metrics.
|
636
|
-
|
611
|
+
|
637
612
|
Args:
|
638
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
639
613
|
version: Version of the monitoring package.
|
640
614
|
environment: Deployment environment (e.g., production, staging).
|
641
615
|
application_name: Name of the application using the OpenAI API.
|
642
616
|
tracer: OpenTelemetry tracer for creating spans.
|
643
617
|
pricing_info: Information used for calculating the cost of OpenAI image generation.
|
644
618
|
trace_content: Flag indicating whether to trace the input prompt and generated images.
|
645
|
-
|
619
|
+
|
646
620
|
Returns:
|
647
621
|
A function that wraps the image generation method to add telemetry.
|
648
622
|
"""
|
@@ -664,8 +638,16 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
664
638
|
The response from the original 'images.generate' method.
|
665
639
|
"""
|
666
640
|
|
667
|
-
|
641
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
642
|
+
request_model = kwargs.get("model", "dall-e-2")
|
643
|
+
|
644
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
645
|
+
|
646
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
647
|
+
start_time = time.time()
|
668
648
|
response = await wrapped(*args, **kwargs)
|
649
|
+
end_time = time.time()
|
650
|
+
|
669
651
|
images_count = 0
|
670
652
|
|
671
653
|
try:
|
@@ -675,28 +657,38 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
675
657
|
else:
|
676
658
|
image = "url"
|
677
659
|
|
660
|
+
request_model = kwargs.get("model", "dall-e-2")
|
661
|
+
|
678
662
|
# Calculate cost of the operation
|
679
|
-
cost = get_image_model_cost(
|
663
|
+
cost = get_image_model_cost(request_model,
|
680
664
|
pricing_info, kwargs.get("size", "1024x1024"),
|
681
665
|
kwargs.get("quality", "standard"))
|
682
666
|
|
683
667
|
for items in response.data:
|
684
|
-
# Set Span attributes
|
668
|
+
# Set Span attributes (OTel Semconv)
|
685
669
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
670
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
671
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
686
672
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
687
673
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
688
|
-
span.set_attribute(SemanticConvetion.
|
689
|
-
|
690
|
-
span.set_attribute(SemanticConvetion.
|
691
|
-
|
674
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
675
|
+
request_model)
|
676
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
677
|
+
server_address)
|
678
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
679
|
+
server_port)
|
692
680
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
693
681
|
response.created)
|
694
|
-
span.set_attribute(SemanticConvetion.
|
682
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
683
|
+
request_model)
|
684
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
685
|
+
"image")
|
686
|
+
|
687
|
+
# Set Span attributes (Extras)
|
688
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
695
689
|
environment)
|
696
|
-
span.set_attribute(
|
690
|
+
span.set_attribute(SERVICE_NAME,
|
697
691
|
application_name)
|
698
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
699
|
-
kwargs.get("model", "dall-e-2"))
|
700
692
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
701
693
|
kwargs.get("size", "1024x1024"))
|
702
694
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
@@ -707,6 +699,9 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
707
699
|
items.revised_prompt if items.revised_prompt else "")
|
708
700
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
709
701
|
kwargs.get("user", ""))
|
702
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
703
|
+
version)
|
704
|
+
|
710
705
|
if trace_content:
|
711
706
|
span.add_event(
|
712
707
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -714,7 +709,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
714
709
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
715
710
|
},
|
716
711
|
)
|
717
|
-
attribute_name = f"
|
712
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
718
713
|
span.add_event(
|
719
714
|
name=attribute_name,
|
720
715
|
attributes={
|
@@ -729,21 +724,20 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
729
724
|
span.set_status(Status(StatusCode.OK))
|
730
725
|
|
731
726
|
if disable_metrics is False:
|
732
|
-
attributes =
|
733
|
-
|
734
|
-
|
735
|
-
SemanticConvetion.
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
743
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
744
|
-
kwargs.get("model", "dall-e-2")
|
745
|
-
}
|
727
|
+
attributes = create_metrics_attributes(
|
728
|
+
service_name=application_name,
|
729
|
+
deployment_environment=environment,
|
730
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
731
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
732
|
+
request_model=request_model,
|
733
|
+
server_address=server_address,
|
734
|
+
server_port=server_port,
|
735
|
+
response_model=request_model,
|
736
|
+
)
|
746
737
|
|
738
|
+
metrics["genai_client_operation_duration"].record(
|
739
|
+
end_time - start_time, attributes
|
740
|
+
)
|
747
741
|
metrics["genai_requests"].add(1, attributes)
|
748
742
|
metrics["genai_cost"].record(cost, attributes)
|
749
743
|
|
@@ -759,20 +753,19 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
759
753
|
|
760
754
|
return wrapper
|
761
755
|
|
762
|
-
def async_image_variatons(
|
763
|
-
|
756
|
+
def async_image_variatons(version, environment, application_name,
|
757
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
764
758
|
"""
|
765
759
|
Generates a telemetry wrapper for creating image variations to collect metrics.
|
766
|
-
|
760
|
+
|
767
761
|
Args:
|
768
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
769
762
|
version: Version of the monitoring package.
|
770
763
|
environment: Deployment environment (e.g., production, staging).
|
771
764
|
application_name: Name of the application using the OpenAI API.
|
772
765
|
tracer: OpenTelemetry tracer for creating spans.
|
773
766
|
pricing_info: Information used for calculating the cost of generating image variations.
|
774
767
|
trace_content: Flag indicating whether to trace the input image and generated variations.
|
775
|
-
|
768
|
+
|
776
769
|
Returns:
|
777
770
|
A function that wraps the image variations creation method to add telemetry.
|
778
771
|
"""
|
@@ -794,8 +787,16 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
794
787
|
The response from the original 'images.create.variations' method.
|
795
788
|
"""
|
796
789
|
|
797
|
-
|
790
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
791
|
+
request_model = kwargs.get("model", "dall-e-2")
|
792
|
+
|
793
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
794
|
+
|
795
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
796
|
+
start_time = time.time()
|
798
797
|
response = await wrapped(*args, **kwargs)
|
798
|
+
end_time = time.time()
|
799
|
+
|
799
800
|
images_count = 0
|
800
801
|
|
801
802
|
try:
|
@@ -806,34 +807,45 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
806
807
|
image = "url"
|
807
808
|
|
808
809
|
# Calculate cost of the operation
|
809
|
-
cost = get_image_model_cost(
|
810
|
+
cost = get_image_model_cost(request_model, pricing_info,
|
810
811
|
kwargs.get("size", "1024x1024"), "standard")
|
811
812
|
|
812
813
|
for items in response.data:
|
813
|
-
# Set Span attributes
|
814
|
+
# Set Span attributes (OTel Semconv)
|
814
815
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
816
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
817
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
815
818
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
816
819
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
817
|
-
span.set_attribute(SemanticConvetion.
|
818
|
-
|
819
|
-
span.set_attribute(SemanticConvetion.
|
820
|
-
|
820
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
821
|
+
request_model)
|
822
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
823
|
+
server_address)
|
824
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
825
|
+
server_port)
|
821
826
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
822
827
|
response.created)
|
823
|
-
span.set_attribute(SemanticConvetion.
|
828
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
829
|
+
request_model)
|
830
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
831
|
+
"image")
|
832
|
+
|
833
|
+
# Set Span attributes (Extras)
|
834
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
824
835
|
environment)
|
825
|
-
span.set_attribute(
|
836
|
+
span.set_attribute(SERVICE_NAME,
|
826
837
|
application_name)
|
827
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
828
|
-
kwargs.get("model", "dall-e-2"))
|
829
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
830
|
-
kwargs.get("user", ""))
|
831
838
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
832
839
|
kwargs.get("size", "1024x1024"))
|
833
840
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
834
841
|
"standard")
|
842
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
843
|
+
kwargs.get("user", ""))
|
844
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
845
|
+
version)
|
846
|
+
|
835
847
|
if trace_content:
|
836
|
-
attribute_name = f"
|
848
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
837
849
|
span.add_event(
|
838
850
|
name=attribute_name,
|
839
851
|
attributes={
|
@@ -848,21 +860,20 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
848
860
|
span.set_status(Status(StatusCode.OK))
|
849
861
|
|
850
862
|
if disable_metrics is False:
|
851
|
-
attributes =
|
852
|
-
|
853
|
-
|
854
|
-
SemanticConvetion.
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
862
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
863
|
-
kwargs.get("model", "dall-e-2")
|
864
|
-
}
|
863
|
+
attributes = create_metrics_attributes(
|
864
|
+
service_name=application_name,
|
865
|
+
deployment_environment=environment,
|
866
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
867
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
868
|
+
request_model=request_model,
|
869
|
+
server_address=server_address,
|
870
|
+
server_port=server_port,
|
871
|
+
response_model=request_model,
|
872
|
+
)
|
865
873
|
|
874
|
+
metrics["genai_client_operation_duration"].record(
|
875
|
+
end_time - start_time, attributes
|
876
|
+
)
|
866
877
|
metrics["genai_requests"].add(1, attributes)
|
867
878
|
metrics["genai_cost"].record(cost, attributes)
|
868
879
|
|
@@ -878,20 +889,19 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
878
889
|
|
879
890
|
return wrapper
|
880
891
|
|
881
|
-
def async_audio_create(
|
882
|
-
|
892
|
+
def async_audio_create(version, environment, application_name,
|
893
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
883
894
|
"""
|
884
895
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
885
|
-
|
896
|
+
|
886
897
|
Args:
|
887
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
888
898
|
version: Version of the monitoring package.
|
889
899
|
environment: Deployment environment (e.g., production, staging).
|
890
900
|
application_name: Name of the application using the OpenAI API.
|
891
901
|
tracer: OpenTelemetry tracer for creating spans.
|
892
902
|
pricing_info: Information used for calculating the cost of generating speech audio.
|
893
903
|
trace_content: Flag indicating whether to trace the input text and generated audio.
|
894
|
-
|
904
|
+
|
895
905
|
Returns:
|
896
906
|
A function that wraps the speech audio creation method to add telemetry.
|
897
907
|
"""
|
@@ -913,28 +923,42 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
913
923
|
The response from the original 'audio.speech.create' method.
|
914
924
|
"""
|
915
925
|
|
916
|
-
|
926
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
927
|
+
request_model = kwargs.get("model", "tts-1")
|
928
|
+
|
929
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
|
930
|
+
|
931
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
932
|
+
start_time = time.time()
|
917
933
|
response = await wrapped(*args, **kwargs)
|
934
|
+
end_time = time.time()
|
918
935
|
|
919
936
|
try:
|
920
937
|
# Calculate cost of the operation
|
921
|
-
cost = get_audio_model_cost(
|
938
|
+
cost = get_audio_model_cost(request_model,
|
922
939
|
pricing_info, kwargs.get("input", ""))
|
923
940
|
|
924
941
|
# Set Span attributes
|
925
942
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
943
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
944
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
|
926
945
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
927
946
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
928
|
-
span.set_attribute(SemanticConvetion.
|
929
|
-
|
930
|
-
span.set_attribute(SemanticConvetion.
|
931
|
-
|
932
|
-
span.set_attribute(SemanticConvetion.
|
947
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
948
|
+
request_model)
|
949
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
950
|
+
server_address)
|
951
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
952
|
+
server_port)
|
953
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
954
|
+
request_model)
|
955
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
956
|
+
"speech")
|
957
|
+
|
958
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
933
959
|
environment)
|
934
|
-
span.set_attribute(
|
960
|
+
span.set_attribute(SERVICE_NAME,
|
935
961
|
application_name)
|
936
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
937
|
-
kwargs.get("model", "tts-1"))
|
938
962
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
|
939
963
|
kwargs.get("voice", "alloy"))
|
940
964
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
|
@@ -943,6 +967,8 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
943
967
|
kwargs.get("speed", 1))
|
944
968
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
945
969
|
cost)
|
970
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
971
|
+
version)
|
946
972
|
if trace_content:
|
947
973
|
span.add_event(
|
948
974
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -954,21 +980,20 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
954
980
|
span.set_status(Status(StatusCode.OK))
|
955
981
|
|
956
982
|
if disable_metrics is False:
|
957
|
-
attributes =
|
958
|
-
|
959
|
-
|
960
|
-
SemanticConvetion.
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
SemanticConvetion.GEN_AI_TYPE_AUDIO,
|
968
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
969
|
-
kwargs.get("model", "tts-1")
|
970
|
-
}
|
983
|
+
attributes = create_metrics_attributes(
|
984
|
+
service_name=application_name,
|
985
|
+
deployment_environment=environment,
|
986
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
|
987
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
988
|
+
request_model=request_model,
|
989
|
+
server_address=server_address,
|
990
|
+
server_port=server_port,
|
991
|
+
response_model=request_model,
|
992
|
+
)
|
971
993
|
|
994
|
+
metrics["genai_client_operation_duration"].record(
|
995
|
+
end_time - start_time, attributes
|
996
|
+
)
|
972
997
|
metrics["genai_requests"].add(1, attributes)
|
973
998
|
metrics["genai_cost"].record(cost, attributes)
|
974
999
|
|