openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +88 -0
- openlit/__init__.py +4 -3
- openlit/instrumentation/ag2/ag2.py +5 -5
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +9 -9
- openlit/instrumentation/astra/async_astra.py +9 -9
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +9 -9
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
- openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/async_julep.py +5 -5
- openlit/instrumentation/julep/julep.py +5 -5
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +105 -492
- openlit/instrumentation/letta/letta.py +11 -9
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +318 -247
- openlit/instrumentation/litellm/litellm.py +314 -243
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/milvus.py +9 -9
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +423 -250
- openlit/instrumentation/mistral/mistral.py +420 -246
- openlit/instrumentation/multion/async_multion.py +6 -4
- openlit/instrumentation/multion/multion.py +6 -4
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +433 -410
- openlit/instrumentation/openai/openai.py +414 -394
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/pinecone.py +9 -9
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +9 -9
- openlit/instrumentation/qdrant/qdrant.py +9 -9
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +76 -45
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +330 -987
- openlit/instrumentation/vertexai/vertexai.py +330 -987
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +66 -36
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
|
2
1
|
"""
|
3
2
|
Module for monitoring OpenAI API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
get_embed_model_cost,
|
@@ -14,19 +14,22 @@ from openlit.__helpers import (
|
|
14
14
|
openai_tokens,
|
15
15
|
handle_exception,
|
16
16
|
response_as_dict,
|
17
|
+
calculate_ttft,
|
18
|
+
calculate_tbt,
|
19
|
+
create_metrics_attributes,
|
20
|
+
set_server_address_and_port
|
17
21
|
)
|
18
22
|
from openlit.semcov import SemanticConvetion
|
19
23
|
|
20
24
|
# Initialize logger for logging potential issues and operations
|
21
25
|
logger = logging.getLogger(__name__)
|
22
26
|
|
23
|
-
def async_chat_completions(
|
27
|
+
def async_chat_completions(version, environment, application_name,
|
24
28
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
25
29
|
"""
|
26
30
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
27
31
|
|
28
32
|
Args:
|
29
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
30
33
|
version: Version of the monitoring package.
|
31
34
|
environment: Deployment environment (e.g., production, staging).
|
32
35
|
application_name: Name of the application using the OpenAI API.
|
@@ -41,7 +44,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
41
44
|
class TracedAsyncStream:
|
42
45
|
"""
|
43
46
|
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
-
Wraps the
|
47
|
+
Wraps the response to collect message IDs and aggregated response.
|
45
48
|
|
46
49
|
This class implements the '__aiter__' and '__anext__' methods that
|
47
50
|
handle asynchronous streaming responses.
|
@@ -54,6 +57,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
54
57
|
wrapped,
|
55
58
|
span,
|
56
59
|
kwargs,
|
60
|
+
server_address,
|
61
|
+
server_port,
|
57
62
|
**args,
|
58
63
|
):
|
59
64
|
self.__wrapped__ = wrapped
|
@@ -61,9 +66,20 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
61
66
|
# Placeholder for aggregating streaming response
|
62
67
|
self._llmresponse = ""
|
63
68
|
self._response_id = ""
|
69
|
+
self._response_model = ""
|
70
|
+
self._finish_reason = ""
|
71
|
+
self._openai_response_service_tier = ""
|
72
|
+
self._openai_system_fingerprint = ""
|
64
73
|
|
65
74
|
self._args = args
|
66
75
|
self._kwargs = kwargs
|
76
|
+
self._start_time = time.time()
|
77
|
+
self._end_time = None
|
78
|
+
self._timestamps = []
|
79
|
+
self._ttft = 0
|
80
|
+
self._tbt = 0
|
81
|
+
self._server_address = server_address
|
82
|
+
self._server_port = server_port
|
67
83
|
|
68
84
|
async def __aenter__(self):
|
69
85
|
await self.__wrapped__.__aenter__()
|
@@ -82,6 +98,14 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
82
98
|
async def __anext__(self):
|
83
99
|
try:
|
84
100
|
chunk = await self.__wrapped__.__anext__()
|
101
|
+
end_time = time.time()
|
102
|
+
# Record the timestamp for the current chunk
|
103
|
+
self._timestamps.append(end_time)
|
104
|
+
|
105
|
+
if len(self._timestamps) == 1:
|
106
|
+
# Calculate time to first chunk
|
107
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
108
|
+
|
85
109
|
chunked = response_as_dict(chunk)
|
86
110
|
# Collect message IDs and aggregated response from events
|
87
111
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
@@ -91,10 +115,18 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
91
115
|
if content:
|
92
116
|
self._llmresponse += content
|
93
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._response_model = chunked.get('model')
|
119
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
120
|
+
self._openai_response_service_tier = chunked.get('service_tier')
|
121
|
+
self._openai_system_fingerprint = chunked.get('system_fingerprint')
|
94
122
|
return chunk
|
95
123
|
except StopAsyncIteration:
|
96
124
|
# Handling exception ensure observability without disrupting operation
|
97
125
|
try:
|
126
|
+
self._end_time = time.time()
|
127
|
+
if len(self._timestamps) > 1:
|
128
|
+
self._tbt = calculate_tbt(self._timestamps)
|
129
|
+
|
98
130
|
# Format 'messages' into a single string
|
99
131
|
message_prompt = self._kwargs.get("messages", "")
|
100
132
|
formatted_messages = []
|
@@ -103,68 +135,100 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
103
135
|
content = message["content"]
|
104
136
|
|
105
137
|
if isinstance(content, list):
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
138
|
+
content_str_list = []
|
139
|
+
for item in content:
|
140
|
+
if item["type"] == "text":
|
141
|
+
content_str_list.append(f'text: {item["text"]}')
|
142
|
+
elif (item["type"] == "image_url" and
|
143
|
+
not item["image_url"]["url"].startswith("data:")):
|
144
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
145
|
+
content_str = ", ".join(content_str_list)
|
112
146
|
formatted_messages.append(f"{role}: {content_str}")
|
113
147
|
else:
|
114
148
|
formatted_messages.append(f"{role}: {content}")
|
115
149
|
prompt = "\n".join(formatted_messages)
|
116
150
|
|
151
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
+
|
117
153
|
# Calculate tokens using input prompt and aggregated response
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
154
|
+
input_tokens = openai_tokens(prompt,
|
155
|
+
request_model)
|
156
|
+
output_tokens = openai_tokens(self._llmresponse,
|
157
|
+
request_model)
|
122
158
|
|
123
159
|
# Calculate cost of the operation
|
124
|
-
cost = get_chat_model_cost(
|
125
|
-
pricing_info,
|
126
|
-
|
160
|
+
cost = get_chat_model_cost(request_model,
|
161
|
+
pricing_info, input_tokens,
|
162
|
+
output_tokens)
|
127
163
|
|
128
|
-
# Set Span attributes
|
164
|
+
# Set Span attributes (OTel Semconv)
|
129
165
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
167
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
130
168
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
131
169
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
132
|
-
self._span.set_attribute(SemanticConvetion.
|
133
|
-
|
134
|
-
self._span.set_attribute(SemanticConvetion.
|
135
|
-
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
171
|
+
request_model)
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
173
|
+
self._kwargs.get("seed", ""))
|
174
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
175
|
+
self._server_port)
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
177
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
179
|
+
self._kwargs.get("max_tokens", -1))
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
181
|
+
self._kwargs.get("presence_penalty", 0.0))
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
183
|
+
self._kwargs.get("stop", []))
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
185
|
+
self._kwargs.get("temperature", 1.0))
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
187
|
+
self._kwargs.get("top_p", 1.0))
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
189
|
+
[self._finish_reason])
|
136
190
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
137
191
|
self._response_id)
|
138
|
-
self._span.set_attribute(SemanticConvetion.
|
192
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
193
|
+
self._response_model)
|
194
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
195
|
+
input_tokens)
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
197
|
+
output_tokens)
|
198
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
199
|
+
self._server_address)
|
200
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
|
201
|
+
self._kwargs.get("service_tier", "auto"))
|
202
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
|
203
|
+
self._openai_response_service_tier)
|
204
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
205
|
+
self._openai_system_fingerprint)
|
206
|
+
if isinstance(self._llmresponse, str):
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
208
|
+
"text")
|
209
|
+
else:
|
210
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
211
|
+
"json")
|
212
|
+
|
213
|
+
# Set Span attributes (Extra)
|
214
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
139
215
|
environment)
|
140
|
-
self._span.set_attribute(
|
216
|
+
self._span.set_attribute(SERVICE_NAME,
|
141
217
|
application_name)
|
142
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
143
|
-
self._kwargs.get("model", "gpt-3.5-turbo"))
|
144
218
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
145
219
|
self._kwargs.get("user", ""))
|
146
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
147
|
-
self._kwargs.get("top_p", 1.0))
|
148
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
149
|
-
self._kwargs.get("max_tokens", -1))
|
150
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
151
|
-
self._kwargs.get("temperature", 1.0))
|
152
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
153
|
-
self._kwargs.get("presence_penalty", 0.0))
|
154
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
155
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
156
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
157
|
-
self._kwargs.get("seed", ""))
|
158
220
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
159
221
|
True)
|
160
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
161
|
-
prompt_tokens)
|
162
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
163
|
-
completion_tokens)
|
164
222
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
165
|
-
|
223
|
+
input_tokens + output_tokens)
|
166
224
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
167
225
|
cost)
|
226
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
227
|
+
self._tbt)
|
228
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
229
|
+
self._ttft)
|
230
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
231
|
+
version)
|
168
232
|
if trace_content:
|
169
233
|
self._span.add_event(
|
170
234
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -178,31 +242,35 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
178
242
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
179
243
|
},
|
180
244
|
)
|
181
|
-
|
182
245
|
self._span.set_status(Status(StatusCode.OK))
|
183
246
|
|
184
247
|
if disable_metrics is False:
|
185
|
-
attributes =
|
186
|
-
|
187
|
-
|
188
|
-
SemanticConvetion.
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
196
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
197
|
-
self._kwargs.get("model", "gpt-3.5-turbo")
|
198
|
-
}
|
248
|
+
attributes = create_metrics_attributes(
|
249
|
+
service_name=application_name,
|
250
|
+
deployment_environment=environment,
|
251
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
252
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
253
|
+
request_model=request_model,
|
254
|
+
server_address=self._server_address,
|
255
|
+
server_port=self._server_port,
|
256
|
+
response_model=self._response_model,
|
257
|
+
)
|
199
258
|
|
200
|
-
metrics["
|
201
|
-
|
202
|
-
|
259
|
+
metrics["genai_client_usage_tokens"].record(
|
260
|
+
input_tokens + output_tokens, attributes
|
261
|
+
)
|
262
|
+
metrics["genai_client_operation_duration"].record(
|
263
|
+
self._end_time - self._start_time, attributes
|
203
264
|
)
|
204
|
-
metrics["
|
205
|
-
|
265
|
+
metrics["genai_server_tbt"].record(
|
266
|
+
self._tbt, attributes
|
267
|
+
)
|
268
|
+
metrics["genai_server_ttft"].record(
|
269
|
+
self._ttft, attributes
|
270
|
+
)
|
271
|
+
metrics["genai_requests"].add(1, attributes)
|
272
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
273
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
206
274
|
metrics["genai_cost"].record(cost, attributes)
|
207
275
|
|
208
276
|
except Exception as e:
|
@@ -231,20 +299,25 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
231
299
|
|
232
300
|
# Check if streaming is enabled for the API call
|
233
301
|
streaming = kwargs.get("stream", False)
|
302
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
303
|
+
request_model = kwargs.get("model", "gpt-4o")
|
304
|
+
|
305
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
234
306
|
|
235
307
|
# pylint: disable=no-else-return
|
236
308
|
if streaming:
|
237
309
|
# Special handling for streaming response to accommodate the nature of data flow
|
238
310
|
awaited_wrapped = await wrapped(*args, **kwargs)
|
239
|
-
span = tracer.start_span(
|
311
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
240
312
|
|
241
|
-
return TracedAsyncStream(awaited_wrapped, span, kwargs)
|
313
|
+
return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
242
314
|
|
243
315
|
# Handling for non-streaming responses
|
244
316
|
else:
|
245
|
-
|
246
|
-
|
317
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
318
|
+
start_time = time.time()
|
247
319
|
response = await wrapped(*args, **kwargs)
|
320
|
+
end_time = time.time()
|
248
321
|
|
249
322
|
response_dict = response_as_dict(response)
|
250
323
|
|
@@ -258,7 +331,6 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
258
331
|
|
259
332
|
if isinstance(content, list):
|
260
333
|
content_str = ", ".join(
|
261
|
-
# pylint: disable=line-too-long
|
262
334
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
263
335
|
if "type" in item else f'text: {item["text"]}'
|
264
336
|
for item in content
|
@@ -268,38 +340,72 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
268
340
|
formatted_messages.append(f"{role}: {content}")
|
269
341
|
prompt = "\n".join(formatted_messages)
|
270
342
|
|
271
|
-
|
343
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
344
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
345
|
+
|
346
|
+
# Calculate cost of the operation
|
347
|
+
cost = get_chat_model_cost(request_model,
|
348
|
+
pricing_info, input_tokens,
|
349
|
+
output_tokens)
|
350
|
+
|
351
|
+
# Set base span attribues (OTel Semconv)
|
272
352
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
353
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
354
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
273
355
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
274
356
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
275
|
-
span.set_attribute(SemanticConvetion.
|
276
|
-
|
277
|
-
span.set_attribute(SemanticConvetion.
|
278
|
-
|
357
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
358
|
+
request_model)
|
359
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
360
|
+
kwargs.get("seed", ""))
|
361
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
362
|
+
server_port)
|
363
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
364
|
+
kwargs.get("frequency_penalty", 0.0))
|
365
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
366
|
+
kwargs.get("max_tokens", -1))
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
368
|
+
kwargs.get("presence_penalty", 0.0))
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
370
|
+
kwargs.get("stop", []))
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
372
|
+
kwargs.get("temperature", 1.0))
|
373
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
374
|
+
kwargs.get("top_p", 1.0))
|
279
375
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
280
376
|
response_dict.get("id"))
|
281
|
-
span.set_attribute(SemanticConvetion.
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
378
|
+
response_dict.get('model'))
|
379
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
380
|
+
input_tokens)
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
382
|
+
output_tokens)
|
383
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
384
|
+
server_address)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
|
386
|
+
kwargs.get("service_tier", "auto"))
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
|
388
|
+
response_dict.get('service_tier'))
|
389
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
390
|
+
response_dict.get('system_fingerprint'))
|
391
|
+
|
392
|
+
# Set base span attribues (Extras)
|
393
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
282
394
|
environment)
|
283
|
-
span.set_attribute(
|
395
|
+
span.set_attribute(SERVICE_NAME,
|
284
396
|
application_name)
|
285
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
286
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
287
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
288
|
-
kwargs.get("top_p", 1.0))
|
289
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
290
|
-
kwargs.get("max_tokens", -1))
|
291
397
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
292
398
|
kwargs.get("user", ""))
|
293
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
294
|
-
kwargs.get("temperature", 1.0))
|
295
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
296
|
-
kwargs.get("presence_penalty", 0.0))
|
297
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
298
|
-
kwargs.get("frequency_penalty", 0.0))
|
299
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
300
|
-
kwargs.get("seed", ""))
|
301
399
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
302
400
|
False)
|
401
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
402
|
+
input_tokens + output_tokens)
|
403
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
404
|
+
cost)
|
405
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
406
|
+
end_time - start_time)
|
407
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
408
|
+
version)
|
303
409
|
if trace_content:
|
304
410
|
span.add_event(
|
305
411
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -308,93 +414,54 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
308
414
|
},
|
309
415
|
)
|
310
416
|
|
311
|
-
|
312
|
-
if "tools" not in kwargs:
|
313
|
-
# Calculate cost of the operation
|
314
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
315
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
316
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
317
|
-
|
318
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
319
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
320
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
321
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
322
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
323
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
417
|
+
for i in range(kwargs.get('n',1)):
|
324
418
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
325
|
-
[response_dict.get('choices'
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
span.add_event(
|
345
|
-
name=attribute_name,
|
346
|
-
attributes={
|
347
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
348
|
-
},
|
349
|
-
)
|
350
|
-
i += 1
|
351
|
-
|
352
|
-
# Return original response
|
353
|
-
return response
|
354
|
-
|
355
|
-
# Set span attributes when tools is passed to the function call
|
356
|
-
elif "tools" in kwargs:
|
357
|
-
# Calculate cost of the operation
|
358
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
359
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
360
|
-
response_dict.get('usage').get('completion_tokens'))
|
361
|
-
span.add_event(
|
362
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
363
|
-
attributes={
|
364
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
365
|
-
},
|
366
|
-
)
|
367
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
368
|
-
response_dict.get('usage').get('prompt_tokens'))
|
369
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
370
|
-
response_dict.get('usage').get('completion_tokens'))
|
371
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
372
|
-
response_dict.get('usage').get('total_tokens'))
|
373
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
374
|
-
cost)
|
419
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
420
|
+
if trace_content:
|
421
|
+
span.add_event(
|
422
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
423
|
+
attributes={
|
424
|
+
# pylint: disable=line-too-long
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
426
|
+
},
|
427
|
+
)
|
428
|
+
if kwargs.get('tools'):
|
429
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
430
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
431
|
+
|
432
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
433
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
434
|
+
"text")
|
435
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
436
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
437
|
+
"json")
|
375
438
|
|
376
439
|
span.set_status(Status(StatusCode.OK))
|
377
440
|
|
378
441
|
if disable_metrics is False:
|
379
|
-
attributes =
|
380
|
-
|
381
|
-
|
382
|
-
SemanticConvetion.
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
390
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
391
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
392
|
-
}
|
442
|
+
attributes = create_metrics_attributes(
|
443
|
+
service_name=application_name,
|
444
|
+
deployment_environment=environment,
|
445
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
446
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
447
|
+
request_model=request_model,
|
448
|
+
server_address=server_address,
|
449
|
+
server_port=server_port,
|
450
|
+
response_model=response_dict.get('model'),
|
451
|
+
)
|
393
452
|
|
453
|
+
metrics["genai_client_usage_tokens"].record(
|
454
|
+
input_tokens + output_tokens, attributes
|
455
|
+
)
|
456
|
+
metrics["genai_client_operation_duration"].record(
|
457
|
+
end_time - start_time, attributes
|
458
|
+
)
|
459
|
+
metrics["genai_server_ttft"].record(
|
460
|
+
end_time - start_time, attributes
|
461
|
+
)
|
394
462
|
metrics["genai_requests"].add(1, attributes)
|
395
|
-
metrics["
|
396
|
-
metrics["
|
397
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
463
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
464
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
398
465
|
metrics["genai_cost"].record(cost, attributes)
|
399
466
|
|
400
467
|
# Return original response
|
@@ -409,20 +476,19 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
409
476
|
|
410
477
|
return wrapper
|
411
478
|
|
412
|
-
def async_embedding(
|
413
|
-
|
479
|
+
def async_embedding(version, environment, application_name,
|
480
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
414
481
|
"""
|
415
482
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
416
|
-
|
483
|
+
|
417
484
|
Args:
|
418
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
419
485
|
version: Version of the monitoring package.
|
420
486
|
environment: Deployment environment (e.g., production, staging).
|
421
487
|
application_name: Name of the application using the OpenAI API.
|
422
488
|
tracer: OpenTelemetry tracer for creating spans.
|
423
489
|
pricing_info: Information used for calculating the cost of OpenAI usage.
|
424
490
|
trace_content: Flag indicating whether to trace the actual content.
|
425
|
-
|
491
|
+
|
426
492
|
Returns:
|
427
493
|
A function that wraps the embeddings method to add telemetry.
|
428
494
|
"""
|
@@ -444,40 +510,56 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
444
510
|
The response from the original 'embeddings' method.
|
445
511
|
"""
|
446
512
|
|
447
|
-
|
513
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
514
|
+
request_model = kwargs.get("model", "text-embedding-ada-002")
|
515
|
+
|
516
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
517
|
+
|
518
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
519
|
+
start_time = time.time()
|
448
520
|
response = await wrapped(*args, **kwargs)
|
521
|
+
end_time = time.time()
|
522
|
+
|
449
523
|
response_dict = response_as_dict(response)
|
450
524
|
try:
|
525
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
526
|
+
|
451
527
|
# Calculate cost of the operation
|
452
|
-
cost = get_embed_model_cost(
|
453
|
-
pricing_info,
|
528
|
+
cost = get_embed_model_cost(request_model,
|
529
|
+
pricing_info, input_tokens)
|
454
530
|
|
455
|
-
# Set Span attributes
|
531
|
+
# Set Span attributes (OTel Semconv)
|
456
532
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
534
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
457
535
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
458
536
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
459
|
-
span.set_attribute(SemanticConvetion.
|
460
|
-
|
461
|
-
span.set_attribute(SemanticConvetion.
|
462
|
-
|
463
|
-
span.set_attribute(SemanticConvetion.
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
538
|
+
request_model)
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
540
|
+
[kwargs.get('encoding_format', 'float')])
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
542
|
+
request_model)
|
543
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
544
|
+
server_address)
|
545
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
546
|
+
server_port)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
|
550
|
+
# Set Span attributes (Extras)
|
551
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
464
552
|
environment)
|
465
|
-
span.set_attribute(
|
553
|
+
span.set_attribute(SERVICE_NAME,
|
466
554
|
application_name)
|
467
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
468
|
-
kwargs.get("model", "text-embedding-ada-002"))
|
469
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
470
|
-
kwargs.get("encoding_format", "float"))
|
471
|
-
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
472
|
-
# kwargs.get("dimensions", "null"))
|
473
555
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
474
556
|
kwargs.get("user", ""))
|
475
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
476
|
-
response_dict.get('usage').get('prompt_tokens'))
|
477
557
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
478
|
-
|
558
|
+
input_tokens)
|
479
559
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
480
560
|
cost)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
481
563
|
|
482
564
|
if trace_content:
|
483
565
|
span.add_event(
|
@@ -490,26 +572,24 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
490
572
|
span.set_status(Status(StatusCode.OK))
|
491
573
|
|
492
574
|
if disable_metrics is False:
|
493
|
-
attributes =
|
494
|
-
|
495
|
-
|
496
|
-
SemanticConvetion.
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
575
|
+
attributes = create_metrics_attributes(
|
576
|
+
service_name=application_name,
|
577
|
+
deployment_environment=environment,
|
578
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
579
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
580
|
+
request_model=request_model,
|
581
|
+
server_address=server_address,
|
582
|
+
server_port=server_port,
|
583
|
+
response_model=request_model,
|
584
|
+
)
|
585
|
+
metrics["genai_client_usage_tokens"].record(
|
586
|
+
input_tokens, attributes
|
587
|
+
)
|
588
|
+
metrics["genai_client_operation_duration"].record(
|
589
|
+
end_time - start_time, attributes
|
590
|
+
)
|
508
591
|
metrics["genai_requests"].add(1, attributes)
|
509
|
-
metrics["
|
510
|
-
response_dict.get('usage').get('total_tokens'), attributes)
|
511
|
-
metrics["genai_prompt_tokens"].add(
|
512
|
-
response_dict.get('usage').get('prompt_tokens'), attributes)
|
592
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
513
593
|
metrics["genai_cost"].record(cost, attributes)
|
514
594
|
|
515
595
|
# Return original response
|
@@ -524,125 +604,19 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
524
604
|
|
525
605
|
return wrapper
|
526
606
|
|
527
|
-
def
|
607
|
+
def async_image_generate(version, environment, application_name,
|
528
608
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
529
609
|
"""
|
530
|
-
Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
|
531
|
-
|
532
|
-
Args:
|
533
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
534
|
-
version: Version of the monitoring package.
|
535
|
-
environment: Deployment environment (e.g., production, staging).
|
536
|
-
application_name: Name of the application using the OpenAI API.
|
537
|
-
tracer: OpenTelemetry tracer for creating spans.
|
538
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
539
|
-
trace_content: Flag indicating whether to trace the actual content.
|
540
|
-
|
541
|
-
Returns:
|
542
|
-
A function that wraps the fine tuning creation method to add telemetry.
|
543
|
-
"""
|
544
|
-
|
545
|
-
async def wrapper(wrapped, instance, args, kwargs):
|
546
|
-
"""
|
547
|
-
Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
|
548
|
-
|
549
|
-
This collects metrics such as execution time, usage stats, and handles errors
|
550
|
-
gracefully, adding details to the trace for observability.
|
551
|
-
|
552
|
-
Args:
|
553
|
-
wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
|
554
|
-
instance: The instance of the class where the original method is defined.
|
555
|
-
args: Positional arguments for the method.
|
556
|
-
kwargs: Keyword arguments for the method.
|
557
|
-
|
558
|
-
Returns:
|
559
|
-
The response from the original 'fine_tuning.jobs.create' method.
|
560
|
-
"""
|
561
|
-
|
562
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
563
|
-
response = await wrapped(*args, **kwargs)
|
564
|
-
|
565
|
-
# Handling exception ensure observability without disrupting operation
|
566
|
-
try:
|
567
|
-
# Set Span attributes
|
568
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
569
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
570
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
571
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
572
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING)
|
573
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
574
|
-
gen_ai_endpoint)
|
575
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
576
|
-
environment)
|
577
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
578
|
-
application_name)
|
579
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
580
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
581
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
|
582
|
-
kwargs.get("training_file", ""))
|
583
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
|
584
|
-
kwargs.get("validation_file", ""))
|
585
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
|
586
|
-
kwargs.get("hyperparameters.batch_size", "auto"))
|
587
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
|
588
|
-
kwargs.get("hyperparameters.learning_rate_multiplier",
|
589
|
-
"auto"))
|
590
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
|
591
|
-
kwargs.get("hyperparameters.n_epochs", "auto"))
|
592
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
|
593
|
-
kwargs.get("suffix", ""))
|
594
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
595
|
-
response.id)
|
596
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
597
|
-
response.usage.prompt_tokens)
|
598
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
|
599
|
-
response.status)
|
600
|
-
span.set_status(Status(StatusCode.OK))
|
601
|
-
|
602
|
-
if disable_metrics is False:
|
603
|
-
attributes = {
|
604
|
-
TELEMETRY_SDK_NAME:
|
605
|
-
"openlit",
|
606
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
607
|
-
application_name,
|
608
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
609
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
610
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
611
|
-
environment,
|
612
|
-
SemanticConvetion.GEN_AI_TYPE:
|
613
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING,
|
614
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
615
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
616
|
-
}
|
617
|
-
|
618
|
-
metrics["genai_requests"].add(1, attributes)
|
619
|
-
|
620
|
-
# Return original response
|
621
|
-
return response
|
622
|
-
|
623
|
-
except Exception as e:
|
624
|
-
handle_exception(span, e)
|
625
|
-
logger.error("Error in trace creation: %s", e)
|
626
|
-
|
627
|
-
# Return original response
|
628
|
-
return response
|
629
|
-
|
630
|
-
return wrapper
|
631
|
-
|
632
|
-
def async_image_generate(gen_ai_endpoint, version, environment, application_name,
|
633
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
634
|
-
"""
|
635
610
|
Generates a telemetry wrapper for image generation to collect metrics.
|
636
|
-
|
611
|
+
|
637
612
|
Args:
|
638
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
639
613
|
version: Version of the monitoring package.
|
640
614
|
environment: Deployment environment (e.g., production, staging).
|
641
615
|
application_name: Name of the application using the OpenAI API.
|
642
616
|
tracer: OpenTelemetry tracer for creating spans.
|
643
617
|
pricing_info: Information used for calculating the cost of OpenAI image generation.
|
644
618
|
trace_content: Flag indicating whether to trace the input prompt and generated images.
|
645
|
-
|
619
|
+
|
646
620
|
Returns:
|
647
621
|
A function that wraps the image generation method to add telemetry.
|
648
622
|
"""
|
@@ -664,8 +638,16 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
664
638
|
The response from the original 'images.generate' method.
|
665
639
|
"""
|
666
640
|
|
667
|
-
|
641
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
642
|
+
request_model = kwargs.get("model", "dall-e-2")
|
643
|
+
|
644
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
645
|
+
|
646
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
647
|
+
start_time = time.time()
|
668
648
|
response = await wrapped(*args, **kwargs)
|
649
|
+
end_time = time.time()
|
650
|
+
|
669
651
|
images_count = 0
|
670
652
|
|
671
653
|
try:
|
@@ -676,27 +658,35 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
676
658
|
image = "url"
|
677
659
|
|
678
660
|
# Calculate cost of the operation
|
679
|
-
cost = get_image_model_cost(
|
661
|
+
cost = get_image_model_cost(request_model,
|
680
662
|
pricing_info, kwargs.get("size", "1024x1024"),
|
681
663
|
kwargs.get("quality", "standard"))
|
682
664
|
|
683
665
|
for items in response.data:
|
684
|
-
# Set Span attributes
|
666
|
+
# Set Span attributes (OTel Semconv)
|
685
667
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
668
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
669
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
686
670
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
687
671
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
688
|
-
span.set_attribute(SemanticConvetion.
|
689
|
-
|
690
|
-
span.set_attribute(SemanticConvetion.
|
691
|
-
|
672
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
673
|
+
request_model)
|
674
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
675
|
+
server_address)
|
676
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
677
|
+
server_port)
|
692
678
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
693
679
|
response.created)
|
694
|
-
span.set_attribute(SemanticConvetion.
|
680
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
681
|
+
request_model)
|
682
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
683
|
+
"image")
|
684
|
+
|
685
|
+
# Set Span attributes (Extras)
|
686
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
695
687
|
environment)
|
696
|
-
span.set_attribute(
|
688
|
+
span.set_attribute(SERVICE_NAME,
|
697
689
|
application_name)
|
698
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
699
|
-
kwargs.get("model", "dall-e-2"))
|
700
690
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
701
691
|
kwargs.get("size", "1024x1024"))
|
702
692
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
@@ -707,6 +697,9 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
707
697
|
items.revised_prompt if items.revised_prompt else "")
|
708
698
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
709
699
|
kwargs.get("user", ""))
|
700
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
701
|
+
version)
|
702
|
+
|
710
703
|
if trace_content:
|
711
704
|
span.add_event(
|
712
705
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -714,7 +707,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
714
707
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
715
708
|
},
|
716
709
|
)
|
717
|
-
attribute_name = f"
|
710
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
718
711
|
span.add_event(
|
719
712
|
name=attribute_name,
|
720
713
|
attributes={
|
@@ -729,21 +722,20 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
729
722
|
span.set_status(Status(StatusCode.OK))
|
730
723
|
|
731
724
|
if disable_metrics is False:
|
732
|
-
attributes =
|
733
|
-
|
734
|
-
|
735
|
-
SemanticConvetion.
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
743
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
744
|
-
kwargs.get("model", "dall-e-2")
|
745
|
-
}
|
725
|
+
attributes = create_metrics_attributes(
|
726
|
+
service_name=application_name,
|
727
|
+
deployment_environment=environment,
|
728
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
729
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
730
|
+
request_model=request_model,
|
731
|
+
server_address=server_address,
|
732
|
+
server_port=server_port,
|
733
|
+
response_model=request_model,
|
734
|
+
)
|
746
735
|
|
736
|
+
metrics["genai_client_operation_duration"].record(
|
737
|
+
end_time - start_time, attributes
|
738
|
+
)
|
747
739
|
metrics["genai_requests"].add(1, attributes)
|
748
740
|
metrics["genai_cost"].record(cost, attributes)
|
749
741
|
|
@@ -759,20 +751,19 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
759
751
|
|
760
752
|
return wrapper
|
761
753
|
|
762
|
-
def async_image_variatons(
|
763
|
-
|
754
|
+
def async_image_variatons(version, environment, application_name,
|
755
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
764
756
|
"""
|
765
757
|
Generates a telemetry wrapper for creating image variations to collect metrics.
|
766
|
-
|
758
|
+
|
767
759
|
Args:
|
768
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
769
760
|
version: Version of the monitoring package.
|
770
761
|
environment: Deployment environment (e.g., production, staging).
|
771
762
|
application_name: Name of the application using the OpenAI API.
|
772
763
|
tracer: OpenTelemetry tracer for creating spans.
|
773
764
|
pricing_info: Information used for calculating the cost of generating image variations.
|
774
765
|
trace_content: Flag indicating whether to trace the input image and generated variations.
|
775
|
-
|
766
|
+
|
776
767
|
Returns:
|
777
768
|
A function that wraps the image variations creation method to add telemetry.
|
778
769
|
"""
|
@@ -794,8 +785,16 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
794
785
|
The response from the original 'images.create.variations' method.
|
795
786
|
"""
|
796
787
|
|
797
|
-
|
788
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
789
|
+
request_model = kwargs.get("model", "dall-e-2")
|
790
|
+
|
791
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
792
|
+
|
793
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
794
|
+
start_time = time.time()
|
798
795
|
response = await wrapped(*args, **kwargs)
|
796
|
+
end_time = time.time()
|
797
|
+
|
799
798
|
images_count = 0
|
800
799
|
|
801
800
|
try:
|
@@ -806,34 +805,45 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
806
805
|
image = "url"
|
807
806
|
|
808
807
|
# Calculate cost of the operation
|
809
|
-
cost = get_image_model_cost(
|
808
|
+
cost = get_image_model_cost(request_model, pricing_info,
|
810
809
|
kwargs.get("size", "1024x1024"), "standard")
|
811
810
|
|
812
811
|
for items in response.data:
|
813
|
-
# Set Span attributes
|
812
|
+
# Set Span attributes (OTel Semconv)
|
814
813
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
814
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
815
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
815
816
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
816
817
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
817
|
-
span.set_attribute(SemanticConvetion.
|
818
|
-
|
819
|
-
span.set_attribute(SemanticConvetion.
|
820
|
-
|
818
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
819
|
+
request_model)
|
820
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
821
|
+
server_address)
|
822
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
823
|
+
server_port)
|
821
824
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
822
825
|
response.created)
|
823
|
-
span.set_attribute(SemanticConvetion.
|
826
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
827
|
+
request_model)
|
828
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
829
|
+
"image")
|
830
|
+
|
831
|
+
# Set Span attributes (Extras)
|
832
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
824
833
|
environment)
|
825
|
-
span.set_attribute(
|
834
|
+
span.set_attribute(SERVICE_NAME,
|
826
835
|
application_name)
|
827
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
828
|
-
kwargs.get("model", "dall-e-2"))
|
829
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
830
|
-
kwargs.get("user", ""))
|
831
836
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
832
837
|
kwargs.get("size", "1024x1024"))
|
833
838
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
834
839
|
"standard")
|
840
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
841
|
+
kwargs.get("user", ""))
|
842
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
843
|
+
version)
|
844
|
+
|
835
845
|
if trace_content:
|
836
|
-
attribute_name = f"
|
846
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
837
847
|
span.add_event(
|
838
848
|
name=attribute_name,
|
839
849
|
attributes={
|
@@ -848,21 +858,20 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
848
858
|
span.set_status(Status(StatusCode.OK))
|
849
859
|
|
850
860
|
if disable_metrics is False:
|
851
|
-
attributes =
|
852
|
-
|
853
|
-
|
854
|
-
SemanticConvetion.
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
862
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
863
|
-
kwargs.get("model", "dall-e-2")
|
864
|
-
}
|
861
|
+
attributes = create_metrics_attributes(
|
862
|
+
service_name=application_name,
|
863
|
+
deployment_environment=environment,
|
864
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
865
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
866
|
+
request_model=request_model,
|
867
|
+
server_address=server_address,
|
868
|
+
server_port=server_port,
|
869
|
+
response_model=request_model,
|
870
|
+
)
|
865
871
|
|
872
|
+
metrics["genai_client_operation_duration"].record(
|
873
|
+
end_time - start_time, attributes
|
874
|
+
)
|
866
875
|
metrics["genai_requests"].add(1, attributes)
|
867
876
|
metrics["genai_cost"].record(cost, attributes)
|
868
877
|
|
@@ -878,20 +887,19 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
878
887
|
|
879
888
|
return wrapper
|
880
889
|
|
881
|
-
def async_audio_create(
|
882
|
-
|
890
|
+
def async_audio_create(version, environment, application_name,
|
891
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
883
892
|
"""
|
884
893
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
885
|
-
|
894
|
+
|
886
895
|
Args:
|
887
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
888
896
|
version: Version of the monitoring package.
|
889
897
|
environment: Deployment environment (e.g., production, staging).
|
890
898
|
application_name: Name of the application using the OpenAI API.
|
891
899
|
tracer: OpenTelemetry tracer for creating spans.
|
892
900
|
pricing_info: Information used for calculating the cost of generating speech audio.
|
893
901
|
trace_content: Flag indicating whether to trace the input text and generated audio.
|
894
|
-
|
902
|
+
|
895
903
|
Returns:
|
896
904
|
A function that wraps the speech audio creation method to add telemetry.
|
897
905
|
"""
|
@@ -913,28 +921,42 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
913
921
|
The response from the original 'audio.speech.create' method.
|
914
922
|
"""
|
915
923
|
|
916
|
-
|
924
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
925
|
+
request_model = kwargs.get("model", "tts-1")
|
926
|
+
|
927
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
|
928
|
+
|
929
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
930
|
+
start_time = time.time()
|
917
931
|
response = await wrapped(*args, **kwargs)
|
932
|
+
end_time = time.time()
|
918
933
|
|
919
934
|
try:
|
920
935
|
# Calculate cost of the operation
|
921
|
-
cost = get_audio_model_cost(
|
936
|
+
cost = get_audio_model_cost(request_model,
|
922
937
|
pricing_info, kwargs.get("input", ""))
|
923
938
|
|
924
939
|
# Set Span attributes
|
925
940
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
941
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
942
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
|
926
943
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
927
944
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
928
|
-
span.set_attribute(SemanticConvetion.
|
929
|
-
|
930
|
-
span.set_attribute(SemanticConvetion.
|
931
|
-
|
932
|
-
span.set_attribute(SemanticConvetion.
|
945
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
946
|
+
request_model)
|
947
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
948
|
+
server_address)
|
949
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
950
|
+
server_port)
|
951
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
952
|
+
request_model)
|
953
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
954
|
+
"speech")
|
955
|
+
|
956
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
933
957
|
environment)
|
934
|
-
span.set_attribute(
|
958
|
+
span.set_attribute(SERVICE_NAME,
|
935
959
|
application_name)
|
936
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
937
|
-
kwargs.get("model", "tts-1"))
|
938
960
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
|
939
961
|
kwargs.get("voice", "alloy"))
|
940
962
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
|
@@ -943,6 +965,8 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
943
965
|
kwargs.get("speed", 1))
|
944
966
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
945
967
|
cost)
|
968
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
969
|
+
version)
|
946
970
|
if trace_content:
|
947
971
|
span.add_event(
|
948
972
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -954,21 +978,20 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
954
978
|
span.set_status(Status(StatusCode.OK))
|
955
979
|
|
956
980
|
if disable_metrics is False:
|
957
|
-
attributes =
|
958
|
-
|
959
|
-
|
960
|
-
SemanticConvetion.
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
SemanticConvetion.GEN_AI_TYPE_AUDIO,
|
968
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
969
|
-
kwargs.get("model", "tts-1")
|
970
|
-
}
|
981
|
+
attributes = create_metrics_attributes(
|
982
|
+
service_name=application_name,
|
983
|
+
deployment_environment=environment,
|
984
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
|
985
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
986
|
+
request_model=request_model,
|
987
|
+
server_address=server_address,
|
988
|
+
server_port=server_port,
|
989
|
+
response_model=request_model,
|
990
|
+
)
|
971
991
|
|
992
|
+
metrics["genai_client_operation_duration"].record(
|
993
|
+
end_time - start_time, attributes
|
994
|
+
)
|
972
995
|
metrics["genai_requests"].add(1, attributes)
|
973
996
|
metrics["genai_cost"].record(cost, attributes)
|
974
997
|
|