openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +83 -0
- openlit/__init__.py +1 -1
- openlit/instrumentation/ag2/ag2.py +2 -2
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +4 -4
- openlit/instrumentation/astra/async_astra.py +4 -4
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +4 -4
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
- openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
- openlit/instrumentation/crewai/crewai.py +2 -2
- openlit/instrumentation/dynamiq/dynamiq.py +2 -2
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
- openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/firecrawl/firecrawl.py +2 -2
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/gpt4all.py +17 -17
- openlit/instrumentation/groq/async_groq.py +14 -14
- openlit/instrumentation/groq/groq.py +14 -14
- openlit/instrumentation/haystack/haystack.py +2 -2
- openlit/instrumentation/julep/async_julep.py +2 -2
- openlit/instrumentation/julep/julep.py +2 -2
- openlit/instrumentation/langchain/langchain.py +36 -31
- openlit/instrumentation/letta/letta.py +6 -6
- openlit/instrumentation/litellm/async_litellm.py +20 -20
- openlit/instrumentation/litellm/litellm.py +20 -20
- openlit/instrumentation/llamaindex/llamaindex.py +2 -2
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/milvus/milvus.py +4 -4
- openlit/instrumentation/mistral/async_mistral.py +18 -18
- openlit/instrumentation/mistral/mistral.py +18 -18
- openlit/instrumentation/multion/async_multion.py +2 -2
- openlit/instrumentation/multion/multion.py +2 -2
- openlit/instrumentation/ollama/async_ollama.py +29 -29
- openlit/instrumentation/ollama/ollama.py +29 -29
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +434 -409
- openlit/instrumentation/openai/openai.py +415 -393
- openlit/instrumentation/phidata/phidata.py +2 -2
- openlit/instrumentation/pinecone/pinecone.py +4 -4
- openlit/instrumentation/premai/premai.py +20 -20
- openlit/instrumentation/qdrant/async_qdrant.py +4 -4
- openlit/instrumentation/qdrant/qdrant.py +4 -4
- openlit/instrumentation/reka/async_reka.py +6 -6
- openlit/instrumentation/reka/reka.py +6 -6
- openlit/instrumentation/together/async_together.py +18 -18
- openlit/instrumentation/together/together.py +18 -18
- openlit/instrumentation/transformers/transformers.py +6 -6
- openlit/instrumentation/vertexai/async_vertexai.py +53 -53
- openlit/instrumentation/vertexai/vertexai.py +53 -53
- openlit/instrumentation/vllm/vllm.py +6 -6
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
- openlit-1.33.9.dist-info/RECORD +121 -0
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
|
2
1
|
"""
|
3
2
|
Module for monitoring OpenAI API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
get_embed_model_cost,
|
@@ -14,19 +14,22 @@ from openlit.__helpers import (
|
|
14
14
|
openai_tokens,
|
15
15
|
handle_exception,
|
16
16
|
response_as_dict,
|
17
|
+
calculate_ttft,
|
18
|
+
calculate_tbt,
|
19
|
+
create_metrics_attributes,
|
20
|
+
set_server_address_and_port
|
17
21
|
)
|
18
22
|
from openlit.semcov import SemanticConvetion
|
19
23
|
|
20
24
|
# Initialize logger for logging potential issues and operations
|
21
25
|
logger = logging.getLogger(__name__)
|
22
26
|
|
23
|
-
def chat_completions(
|
27
|
+
def chat_completions(version, environment, application_name,
|
24
28
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
25
29
|
"""
|
26
30
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
27
31
|
|
28
32
|
Args:
|
29
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
30
33
|
version: Version of the monitoring package.
|
31
34
|
environment: Deployment environment (e.g., production, staging).
|
32
35
|
application_name: Name of the application using the OpenAI API.
|
@@ -54,6 +57,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
54
57
|
wrapped,
|
55
58
|
span,
|
56
59
|
kwargs,
|
60
|
+
server_address,
|
61
|
+
server_port,
|
57
62
|
**args,
|
58
63
|
):
|
59
64
|
self.__wrapped__ = wrapped
|
@@ -61,9 +66,20 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
61
66
|
# Placeholder for aggregating streaming response
|
62
67
|
self._llmresponse = ""
|
63
68
|
self._response_id = ""
|
69
|
+
self._response_model = ""
|
70
|
+
self._finish_reason = ""
|
71
|
+
self._openai_response_service_tier = ""
|
72
|
+
self._openai_system_fingerprint = ""
|
64
73
|
|
65
74
|
self._args = args
|
66
75
|
self._kwargs = kwargs
|
76
|
+
self._start_time = time.time()
|
77
|
+
self._end_time = None
|
78
|
+
self._timestamps = []
|
79
|
+
self._ttft = 0
|
80
|
+
self._tbt = 0
|
81
|
+
self._server_address = server_address
|
82
|
+
self._server_port = server_port
|
67
83
|
|
68
84
|
def __enter__(self):
|
69
85
|
self.__wrapped__.__enter__()
|
@@ -82,6 +98,14 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
82
98
|
def __next__(self):
|
83
99
|
try:
|
84
100
|
chunk = self.__wrapped__.__next__()
|
101
|
+
end_time = time.time()
|
102
|
+
# Record the timestamp for the current chunk
|
103
|
+
self._timestamps.append(end_time)
|
104
|
+
|
105
|
+
if len(self._timestamps) == 1:
|
106
|
+
# Calculate time to first chunk
|
107
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
108
|
+
|
85
109
|
chunked = response_as_dict(chunk)
|
86
110
|
# Collect message IDs and aggregated response from events
|
87
111
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
@@ -91,10 +115,18 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
91
115
|
if content:
|
92
116
|
self._llmresponse += content
|
93
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._response_model = chunked.get('model')
|
119
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
120
|
+
self._openai_response_service_tier = chunked.get('service_tier')
|
121
|
+
self._openai_system_fingerprint = chunked.get('system_fingerprint')
|
94
122
|
return chunk
|
95
123
|
except StopIteration:
|
96
124
|
# Handling exception ensure observability without disrupting operation
|
97
125
|
try:
|
126
|
+
self._end_time = time.time()
|
127
|
+
if len(self._timestamps) > 1:
|
128
|
+
self._tbt = calculate_tbt(self._timestamps)
|
129
|
+
|
98
130
|
# Format 'messages' into a single string
|
99
131
|
message_prompt = self._kwargs.get("messages", "")
|
100
132
|
formatted_messages = []
|
@@ -109,7 +141,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
109
141
|
content_str_list.append(f'text: {item["text"]}')
|
110
142
|
elif (item["type"] == "image_url" and
|
111
143
|
not item["image_url"]["url"].startswith("data:")):
|
112
|
-
# pylint: disable=line-too-long
|
113
144
|
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
114
145
|
content_str = ", ".join(content_str_list)
|
115
146
|
formatted_messages.append(f"{role}: {content_str}")
|
@@ -117,57 +148,87 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
117
148
|
formatted_messages.append(f"{role}: {content}")
|
118
149
|
prompt = "\n".join(formatted_messages)
|
119
150
|
|
151
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
+
|
120
153
|
# Calculate tokens using input prompt and aggregated response
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
154
|
+
input_tokens = openai_tokens(prompt,
|
155
|
+
request_model)
|
156
|
+
output_tokens = openai_tokens(self._llmresponse,
|
157
|
+
request_model)
|
125
158
|
|
126
159
|
# Calculate cost of the operation
|
127
|
-
cost = get_chat_model_cost(
|
128
|
-
pricing_info,
|
129
|
-
|
160
|
+
cost = get_chat_model_cost(request_model,
|
161
|
+
pricing_info, input_tokens,
|
162
|
+
output_tokens)
|
130
163
|
|
131
|
-
# Set Span attributes
|
164
|
+
# Set Span attributes (OTel Semconv)
|
132
165
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
167
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
133
168
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
134
169
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
135
|
-
self._span.set_attribute(SemanticConvetion.
|
136
|
-
|
137
|
-
self._span.set_attribute(SemanticConvetion.
|
138
|
-
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
171
|
+
request_model)
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
173
|
+
self._kwargs.get("seed", ""))
|
174
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
175
|
+
self._server_port)
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
177
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
179
|
+
self._kwargs.get("max_tokens", -1))
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
181
|
+
self._kwargs.get("presence_penalty", 0.0))
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
183
|
+
self._kwargs.get("stop", []))
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
185
|
+
self._kwargs.get("temperature", 1.0))
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
187
|
+
self._kwargs.get("top_p", 1.0))
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
189
|
+
[self._finish_reason])
|
139
190
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
140
191
|
self._response_id)
|
141
|
-
self._span.set_attribute(SemanticConvetion.
|
192
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
193
|
+
self._response_model)
|
194
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
195
|
+
input_tokens)
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
197
|
+
output_tokens)
|
198
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
199
|
+
self._server_address)
|
200
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
|
201
|
+
self._kwargs.get("service_tier", "auto"))
|
202
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
|
203
|
+
self._openai_response_service_tier)
|
204
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
|
205
|
+
self._openai_system_fingerprint)
|
206
|
+
if isinstance(self._llmresponse, str):
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
208
|
+
"text")
|
209
|
+
else:
|
210
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
211
|
+
"json")
|
212
|
+
|
213
|
+
# Set Span attributes (Extra)
|
214
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
142
215
|
environment)
|
143
|
-
self._span.set_attribute(
|
216
|
+
self._span.set_attribute(SERVICE_NAME,
|
144
217
|
application_name)
|
145
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
146
|
-
self._kwargs.get("model", "gpt-3.5-turbo"))
|
147
218
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
148
219
|
self._kwargs.get("user", ""))
|
149
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
150
|
-
self._kwargs.get("top_p", 1.0))
|
151
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
152
|
-
self._kwargs.get("max_tokens", -1))
|
153
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
154
|
-
self._kwargs.get("temperature", 1.0))
|
155
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
156
|
-
self._kwargs.get("presence_penalty", 0.0))
|
157
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
158
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
159
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
160
|
-
self._kwargs.get("seed", ""))
|
161
220
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
162
221
|
True)
|
163
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
164
|
-
prompt_tokens)
|
165
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
166
|
-
completion_tokens)
|
167
222
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
168
|
-
|
223
|
+
input_tokens + output_tokens)
|
169
224
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
170
225
|
cost)
|
226
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
227
|
+
self._tbt)
|
228
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
229
|
+
self._ttft)
|
230
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
231
|
+
version)
|
171
232
|
if trace_content:
|
172
233
|
self._span.add_event(
|
173
234
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -181,31 +242,35 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
181
242
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
243
|
},
|
183
244
|
)
|
184
|
-
|
185
245
|
self._span.set_status(Status(StatusCode.OK))
|
186
246
|
|
187
247
|
if disable_metrics is False:
|
188
|
-
attributes =
|
189
|
-
|
190
|
-
|
191
|
-
SemanticConvetion.
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
199
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
200
|
-
self._kwargs.get("model", "gpt-3.5-turbo")
|
201
|
-
}
|
248
|
+
attributes = create_metrics_attributes(
|
249
|
+
service_name=application_name,
|
250
|
+
deployment_environment=environment,
|
251
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
252
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
253
|
+
request_model=request_model,
|
254
|
+
server_address=self._server_address,
|
255
|
+
server_port=self._server_port,
|
256
|
+
response_model=self._response_model,
|
257
|
+
)
|
202
258
|
|
203
|
-
metrics["
|
204
|
-
|
205
|
-
|
259
|
+
metrics["genai_client_usage_tokens"].record(
|
260
|
+
input_tokens + output_tokens, attributes
|
261
|
+
)
|
262
|
+
metrics["genai_client_operation_duration"].record(
|
263
|
+
self._end_time - self._start_time, attributes
|
264
|
+
)
|
265
|
+
metrics["genai_server_tbt"].record(
|
266
|
+
self._tbt, attributes
|
206
267
|
)
|
207
|
-
metrics["
|
208
|
-
|
268
|
+
metrics["genai_server_ttft"].record(
|
269
|
+
self._ttft, attributes
|
270
|
+
)
|
271
|
+
metrics["genai_requests"].add(1, attributes)
|
272
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
273
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
209
274
|
metrics["genai_cost"].record(cost, attributes)
|
210
275
|
|
211
276
|
except Exception as e:
|
@@ -234,20 +299,25 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
234
299
|
|
235
300
|
# Check if streaming is enabled for the API call
|
236
301
|
streaming = kwargs.get("stream", False)
|
302
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
303
|
+
request_model = kwargs.get("model", "gpt-4o")
|
304
|
+
|
305
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
237
306
|
|
238
307
|
# pylint: disable=no-else-return
|
239
308
|
if streaming:
|
240
309
|
# Special handling for streaming response to accommodate the nature of data flow
|
241
310
|
awaited_wrapped = wrapped(*args, **kwargs)
|
242
|
-
span = tracer.start_span(
|
311
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
243
312
|
|
244
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
313
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
245
314
|
|
246
315
|
# Handling for non-streaming responses
|
247
316
|
else:
|
248
|
-
|
249
|
-
|
317
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
318
|
+
start_time = time.time()
|
250
319
|
response = wrapped(*args, **kwargs)
|
320
|
+
end_time = time.time()
|
251
321
|
|
252
322
|
response_dict = response_as_dict(response)
|
253
323
|
|
@@ -261,7 +331,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
261
331
|
|
262
332
|
if isinstance(content, list):
|
263
333
|
content_str = ", ".join(
|
264
|
-
# pylint: disable=line-too-long
|
265
334
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
266
335
|
if "type" in item else f'text: {item["text"]}'
|
267
336
|
for item in content
|
@@ -271,38 +340,72 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
271
340
|
formatted_messages.append(f"{role}: {content}")
|
272
341
|
prompt = "\n".join(formatted_messages)
|
273
342
|
|
274
|
-
|
343
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
344
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
345
|
+
|
346
|
+
# Calculate cost of the operation
|
347
|
+
cost = get_chat_model_cost(request_model,
|
348
|
+
pricing_info, input_tokens,
|
349
|
+
output_tokens)
|
350
|
+
|
351
|
+
# Set base span attribues (OTel Semconv)
|
275
352
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
353
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
354
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
276
355
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
277
356
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
278
|
-
span.set_attribute(SemanticConvetion.
|
279
|
-
|
280
|
-
span.set_attribute(SemanticConvetion.
|
281
|
-
|
357
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
358
|
+
request_model)
|
359
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
360
|
+
kwargs.get("seed", ""))
|
361
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
362
|
+
server_port)
|
363
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
364
|
+
kwargs.get("frequency_penalty", 0.0))
|
365
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
366
|
+
kwargs.get("max_tokens", -1))
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
368
|
+
kwargs.get("presence_penalty", 0.0))
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
370
|
+
kwargs.get("stop", []))
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
372
|
+
kwargs.get("temperature", 1.0))
|
373
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
374
|
+
kwargs.get("top_p", 1.0))
|
282
375
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
283
376
|
response_dict.get("id"))
|
284
|
-
span.set_attribute(SemanticConvetion.
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
378
|
+
response_dict.get('model'))
|
379
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
380
|
+
input_tokens)
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
382
|
+
output_tokens)
|
383
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
384
|
+
server_address)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
|
386
|
+
kwargs.get("service_tier", "auto"))
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
|
388
|
+
response_dict.get('service_tier'))
|
389
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
|
390
|
+
response_dict.get('system_fingerprint'))
|
391
|
+
|
392
|
+
# Set base span attribues (Extras)
|
393
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
285
394
|
environment)
|
286
|
-
span.set_attribute(
|
395
|
+
span.set_attribute(SERVICE_NAME,
|
287
396
|
application_name)
|
288
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
289
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
290
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
291
|
-
kwargs.get("top_p", 1.0))
|
292
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
293
|
-
kwargs.get("max_tokens", -1))
|
294
397
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
295
398
|
kwargs.get("user", ""))
|
296
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
297
|
-
kwargs.get("temperature", 1.0))
|
298
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
299
|
-
kwargs.get("presence_penalty", 0.0))
|
300
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
301
|
-
kwargs.get("frequency_penalty", 0.0))
|
302
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
303
|
-
kwargs.get("seed", ""))
|
304
399
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
305
400
|
False)
|
401
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
402
|
+
input_tokens + output_tokens)
|
403
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
404
|
+
cost)
|
405
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
406
|
+
end_time - start_time)
|
407
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
408
|
+
version)
|
306
409
|
if trace_content:
|
307
410
|
span.add_event(
|
308
411
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -311,93 +414,54 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
311
414
|
},
|
312
415
|
)
|
313
416
|
|
314
|
-
|
315
|
-
if "tools" not in kwargs:
|
316
|
-
# Calculate cost of the operation
|
317
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
318
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
319
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
320
|
-
|
321
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
322
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
323
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
324
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
325
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
326
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
417
|
+
for i in range(kwargs.get('n',1)):
|
327
418
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
328
|
-
[response_dict.get('choices'
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
span.add_event(
|
348
|
-
name=attribute_name,
|
349
|
-
attributes={
|
350
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
351
|
-
},
|
352
|
-
)
|
353
|
-
i += 1
|
354
|
-
|
355
|
-
# Return original response
|
356
|
-
return response
|
357
|
-
|
358
|
-
# Set span attributes when tools is passed to the function call
|
359
|
-
elif "tools" in kwargs:
|
360
|
-
# Calculate cost of the operation
|
361
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
362
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
363
|
-
response_dict.get('usage').get('completion_tokens'))
|
364
|
-
span.add_event(
|
365
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
366
|
-
attributes={
|
367
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
368
|
-
},
|
369
|
-
)
|
370
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
371
|
-
response_dict.get('usage').get('prompt_tokens'))
|
372
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
373
|
-
response_dict.get('usage').get('completion_tokens'))
|
374
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
375
|
-
response_dict.get('usage').get('total_tokens'))
|
376
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
377
|
-
cost)
|
419
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
420
|
+
if trace_content:
|
421
|
+
span.add_event(
|
422
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
423
|
+
attributes={
|
424
|
+
# pylint: disable=line-too-long
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
426
|
+
},
|
427
|
+
)
|
428
|
+
if kwargs.get('tools'):
|
429
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
430
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
431
|
+
|
432
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
433
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
434
|
+
"text")
|
435
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
436
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
437
|
+
"json")
|
378
438
|
|
379
439
|
span.set_status(Status(StatusCode.OK))
|
380
440
|
|
381
441
|
if disable_metrics is False:
|
382
|
-
attributes =
|
383
|
-
|
384
|
-
|
385
|
-
SemanticConvetion.
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
393
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
394
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
395
|
-
}
|
442
|
+
attributes = create_metrics_attributes(
|
443
|
+
service_name=application_name,
|
444
|
+
deployment_environment=environment,
|
445
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
446
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
447
|
+
request_model=request_model,
|
448
|
+
server_address=server_address,
|
449
|
+
server_port=server_port,
|
450
|
+
response_model=response_dict.get('model'),
|
451
|
+
)
|
396
452
|
|
453
|
+
metrics["genai_client_usage_tokens"].record(
|
454
|
+
input_tokens + output_tokens, attributes
|
455
|
+
)
|
456
|
+
metrics["genai_client_operation_duration"].record(
|
457
|
+
end_time - start_time, attributes
|
458
|
+
)
|
459
|
+
metrics["genai_server_ttft"].record(
|
460
|
+
end_time - start_time, attributes
|
461
|
+
)
|
397
462
|
metrics["genai_requests"].add(1, attributes)
|
398
|
-
metrics["
|
399
|
-
metrics["
|
400
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
463
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
464
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
401
465
|
metrics["genai_cost"].record(cost, attributes)
|
402
466
|
|
403
467
|
# Return original response
|
@@ -412,13 +476,12 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
412
476
|
|
413
477
|
return wrapper
|
414
478
|
|
415
|
-
def embedding(
|
479
|
+
def embedding(version, environment, application_name,
|
416
480
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
417
481
|
"""
|
418
482
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
419
483
|
|
420
484
|
Args:
|
421
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
422
485
|
version: Version of the monitoring package.
|
423
486
|
environment: Deployment environment (e.g., production, staging).
|
424
487
|
application_name: Name of the application using the OpenAI API.
|
@@ -447,40 +510,56 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
447
510
|
The response from the original 'embeddings' method.
|
448
511
|
"""
|
449
512
|
|
450
|
-
|
513
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
514
|
+
request_model = kwargs.get("model", "text-embedding-ada-002")
|
515
|
+
|
516
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
517
|
+
|
518
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
519
|
+
start_time = time.time()
|
451
520
|
response = wrapped(*args, **kwargs)
|
521
|
+
end_time = time.time()
|
522
|
+
|
452
523
|
response_dict = response_as_dict(response)
|
453
524
|
try:
|
525
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
526
|
+
|
454
527
|
# Calculate cost of the operation
|
455
|
-
cost = get_embed_model_cost(
|
456
|
-
pricing_info,
|
528
|
+
cost = get_embed_model_cost(request_model,
|
529
|
+
pricing_info, input_tokens)
|
457
530
|
|
458
|
-
# Set Span attributes
|
531
|
+
# Set Span attributes (OTel Semconv)
|
459
532
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
534
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
460
535
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
461
536
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
462
|
-
span.set_attribute(SemanticConvetion.
|
463
|
-
|
464
|
-
span.set_attribute(SemanticConvetion.
|
465
|
-
|
466
|
-
span.set_attribute(SemanticConvetion.
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
538
|
+
request_model)
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
540
|
+
[kwargs.get('encoding_format', 'float')])
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
542
|
+
request_model)
|
543
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
544
|
+
server_address)
|
545
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
546
|
+
server_port)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
|
550
|
+
# Set Span attributes (Extras)
|
551
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
467
552
|
environment)
|
468
|
-
span.set_attribute(
|
553
|
+
span.set_attribute(SERVICE_NAME,
|
469
554
|
application_name)
|
470
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
471
|
-
kwargs.get("model", "text-embedding-ada-002"))
|
472
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
473
|
-
kwargs.get("encoding_format", "float"))
|
474
|
-
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
475
|
-
# kwargs.get("dimensions", "null"))
|
476
555
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
477
556
|
kwargs.get("user", ""))
|
478
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
479
|
-
response_dict.get('usage').get('prompt_tokens'))
|
480
557
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
481
|
-
|
558
|
+
input_tokens)
|
482
559
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
483
560
|
cost)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
484
563
|
|
485
564
|
if trace_content:
|
486
565
|
span.add_event(
|
@@ -493,26 +572,24 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
493
572
|
span.set_status(Status(StatusCode.OK))
|
494
573
|
|
495
574
|
if disable_metrics is False:
|
496
|
-
attributes =
|
497
|
-
|
498
|
-
|
499
|
-
SemanticConvetion.
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
575
|
+
attributes = create_metrics_attributes(
|
576
|
+
service_name=application_name,
|
577
|
+
deployment_environment=environment,
|
578
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
579
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
580
|
+
request_model=request_model,
|
581
|
+
server_address=server_address,
|
582
|
+
server_port=server_port,
|
583
|
+
response_model=request_model,
|
584
|
+
)
|
585
|
+
metrics["genai_client_usage_tokens"].record(
|
586
|
+
input_tokens, attributes
|
587
|
+
)
|
588
|
+
metrics["genai_client_operation_duration"].record(
|
589
|
+
end_time - start_time, attributes
|
590
|
+
)
|
511
591
|
metrics["genai_requests"].add(1, attributes)
|
512
|
-
metrics["
|
513
|
-
response_dict.get('usage').get('total_tokens'), attributes)
|
514
|
-
metrics["genai_prompt_tokens"].add(
|
515
|
-
response_dict.get('usage').get('prompt_tokens'), attributes)
|
592
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
516
593
|
metrics["genai_cost"].record(cost, attributes)
|
517
594
|
|
518
595
|
# Return original response
|
@@ -527,118 +604,12 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
527
604
|
|
528
605
|
return wrapper
|
529
606
|
|
530
|
-
def
|
531
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
532
|
-
"""
|
533
|
-
Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
|
534
|
-
|
535
|
-
Args:
|
536
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
537
|
-
version: Version of the monitoring package.
|
538
|
-
environment: Deployment environment (e.g., production, staging).
|
539
|
-
application_name: Name of the application using the OpenAI API.
|
540
|
-
tracer: OpenTelemetry tracer for creating spans.
|
541
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
542
|
-
trace_content: Flag indicating whether to trace the actual content.
|
543
|
-
|
544
|
-
Returns:
|
545
|
-
A function that wraps the fine tuning creation method to add telemetry.
|
546
|
-
"""
|
547
|
-
|
548
|
-
def wrapper(wrapped, instance, args, kwargs):
|
549
|
-
"""
|
550
|
-
Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
|
551
|
-
|
552
|
-
This collects metrics such as execution time, usage stats, and handles errors
|
553
|
-
gracefully, adding details to the trace for observability.
|
554
|
-
|
555
|
-
Args:
|
556
|
-
wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
|
557
|
-
instance: The instance of the class where the original method is defined.
|
558
|
-
args: Positional arguments for the method.
|
559
|
-
kwargs: Keyword arguments for the method.
|
560
|
-
|
561
|
-
Returns:
|
562
|
-
The response from the original 'fine_tuning.jobs.create' method.
|
563
|
-
"""
|
564
|
-
|
565
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
566
|
-
response = wrapped(*args, **kwargs)
|
567
|
-
|
568
|
-
# Handling exception ensure observability without disrupting operation
|
569
|
-
try:
|
570
|
-
# Set Span attributes
|
571
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
572
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
573
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
574
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
575
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING)
|
576
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
577
|
-
gen_ai_endpoint)
|
578
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
579
|
-
environment)
|
580
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
581
|
-
application_name)
|
582
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
583
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
584
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
|
585
|
-
kwargs.get("training_file", ""))
|
586
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
|
587
|
-
kwargs.get("validation_file", ""))
|
588
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
|
589
|
-
kwargs.get("hyperparameters.batch_size", "auto"))
|
590
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
|
591
|
-
kwargs.get("hyperparameters.learning_rate_multiplier",
|
592
|
-
"auto"))
|
593
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
|
594
|
-
kwargs.get("hyperparameters.n_epochs", "auto"))
|
595
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
|
596
|
-
kwargs.get("suffix", ""))
|
597
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
598
|
-
response.id)
|
599
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
600
|
-
response.usage.prompt_tokens)
|
601
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
|
602
|
-
response.status)
|
603
|
-
span.set_status(Status(StatusCode.OK))
|
604
|
-
|
605
|
-
if disable_metrics is False:
|
606
|
-
attributes = {
|
607
|
-
TELEMETRY_SDK_NAME:
|
608
|
-
"openlit",
|
609
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
610
|
-
application_name,
|
611
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
612
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
613
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
614
|
-
environment,
|
615
|
-
SemanticConvetion.GEN_AI_TYPE:
|
616
|
-
SemanticConvetion.GEN_AI_TYPE_FINETUNING,
|
617
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
618
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
619
|
-
}
|
620
|
-
|
621
|
-
metrics["genai_requests"].add(1, attributes)
|
622
|
-
|
623
|
-
# Return original response
|
624
|
-
return response
|
625
|
-
|
626
|
-
except Exception as e:
|
627
|
-
handle_exception(span, e)
|
628
|
-
logger.error("Error in trace creation: %s", e)
|
629
|
-
|
630
|
-
# Return original response
|
631
|
-
return response
|
632
|
-
|
633
|
-
return wrapper
|
634
|
-
|
635
|
-
def image_generate(gen_ai_endpoint, version, environment, application_name,
|
607
|
+
def image_generate(version, environment, application_name,
|
636
608
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
637
609
|
"""
|
638
610
|
Generates a telemetry wrapper for image generation to collect metrics.
|
639
611
|
|
640
612
|
Args:
|
641
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
642
613
|
version: Version of the monitoring package.
|
643
614
|
environment: Deployment environment (e.g., production, staging).
|
644
615
|
application_name: Name of the application using the OpenAI API.
|
@@ -667,8 +638,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
667
638
|
The response from the original 'images.generate' method.
|
668
639
|
"""
|
669
640
|
|
670
|
-
|
641
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
642
|
+
request_model = kwargs.get("model", "dall-e-2")
|
643
|
+
|
644
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
645
|
+
|
646
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
647
|
+
start_time = time.time()
|
671
648
|
response = wrapped(*args, **kwargs)
|
649
|
+
end_time = time.time()
|
650
|
+
|
672
651
|
images_count = 0
|
673
652
|
|
674
653
|
try:
|
@@ -678,28 +657,38 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
678
657
|
else:
|
679
658
|
image = "url"
|
680
659
|
|
660
|
+
request_model = kwargs.get("model", "dall-e-2")
|
661
|
+
|
681
662
|
# Calculate cost of the operation
|
682
|
-
cost = get_image_model_cost(
|
663
|
+
cost = get_image_model_cost(request_model,
|
683
664
|
pricing_info, kwargs.get("size", "1024x1024"),
|
684
665
|
kwargs.get("quality", "standard"))
|
685
666
|
|
686
667
|
for items in response.data:
|
687
|
-
# Set Span attributes
|
668
|
+
# Set Span attributes (OTel Semconv)
|
688
669
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
670
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
671
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
689
672
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
690
673
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
691
|
-
span.set_attribute(SemanticConvetion.
|
692
|
-
|
693
|
-
span.set_attribute(SemanticConvetion.
|
694
|
-
|
674
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
675
|
+
request_model)
|
676
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
677
|
+
server_address)
|
678
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
679
|
+
server_port)
|
695
680
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
696
681
|
response.created)
|
697
|
-
span.set_attribute(SemanticConvetion.
|
682
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
683
|
+
request_model)
|
684
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
685
|
+
"image")
|
686
|
+
|
687
|
+
# Set Span attributes (Extras)
|
688
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
698
689
|
environment)
|
699
|
-
span.set_attribute(
|
690
|
+
span.set_attribute(SERVICE_NAME,
|
700
691
|
application_name)
|
701
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
702
|
-
kwargs.get("model", "dall-e-2"))
|
703
692
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
704
693
|
kwargs.get("size", "1024x1024"))
|
705
694
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
@@ -710,6 +699,9 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
710
699
|
items.revised_prompt if items.revised_prompt else "")
|
711
700
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
712
701
|
kwargs.get("user", ""))
|
702
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
703
|
+
version)
|
704
|
+
|
713
705
|
if trace_content:
|
714
706
|
span.add_event(
|
715
707
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -717,7 +709,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
717
709
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
718
710
|
},
|
719
711
|
)
|
720
|
-
attribute_name = f"
|
712
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
721
713
|
span.add_event(
|
722
714
|
name=attribute_name,
|
723
715
|
attributes={
|
@@ -732,21 +724,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
732
724
|
span.set_status(Status(StatusCode.OK))
|
733
725
|
|
734
726
|
if disable_metrics is False:
|
735
|
-
attributes =
|
736
|
-
|
737
|
-
|
738
|
-
SemanticConvetion.
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
746
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
747
|
-
kwargs.get("model", "dall-e-2")
|
748
|
-
}
|
727
|
+
attributes = create_metrics_attributes(
|
728
|
+
service_name=application_name,
|
729
|
+
deployment_environment=environment,
|
730
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
731
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
732
|
+
request_model=request_model,
|
733
|
+
server_address=server_address,
|
734
|
+
server_port=server_port,
|
735
|
+
response_model=request_model,
|
736
|
+
)
|
749
737
|
|
738
|
+
metrics["genai_client_operation_duration"].record(
|
739
|
+
end_time - start_time, attributes
|
740
|
+
)
|
750
741
|
metrics["genai_requests"].add(1, attributes)
|
751
742
|
metrics["genai_cost"].record(cost, attributes)
|
752
743
|
|
@@ -762,13 +753,12 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
762
753
|
|
763
754
|
return wrapper
|
764
755
|
|
765
|
-
def image_variatons(
|
756
|
+
def image_variatons(version, environment, application_name,
|
766
757
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
767
758
|
"""
|
768
759
|
Generates a telemetry wrapper for creating image variations to collect metrics.
|
769
760
|
|
770
761
|
Args:
|
771
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
772
762
|
version: Version of the monitoring package.
|
773
763
|
environment: Deployment environment (e.g., production, staging).
|
774
764
|
application_name: Name of the application using the OpenAI API.
|
@@ -797,8 +787,16 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
797
787
|
The response from the original 'images.create.variations' method.
|
798
788
|
"""
|
799
789
|
|
800
|
-
|
790
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
791
|
+
request_model = kwargs.get("model", "dall-e-2")
|
792
|
+
|
793
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
794
|
+
|
795
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
796
|
+
start_time = time.time()
|
801
797
|
response = wrapped(*args, **kwargs)
|
798
|
+
end_time = time.time()
|
799
|
+
|
802
800
|
images_count = 0
|
803
801
|
|
804
802
|
try:
|
@@ -809,34 +807,45 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
809
807
|
image = "url"
|
810
808
|
|
811
809
|
# Calculate cost of the operation
|
812
|
-
cost = get_image_model_cost(
|
810
|
+
cost = get_image_model_cost(request_model, pricing_info,
|
813
811
|
kwargs.get("size", "1024x1024"), "standard")
|
814
812
|
|
815
813
|
for items in response.data:
|
816
|
-
# Set Span attributes
|
814
|
+
# Set Span attributes (OTel Semconv)
|
817
815
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
816
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
817
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
818
818
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
819
819
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
820
|
-
span.set_attribute(SemanticConvetion.
|
821
|
-
|
822
|
-
span.set_attribute(SemanticConvetion.
|
823
|
-
|
820
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
821
|
+
request_model)
|
822
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
823
|
+
server_address)
|
824
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
825
|
+
server_port)
|
824
826
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
825
827
|
response.created)
|
826
|
-
span.set_attribute(SemanticConvetion.
|
828
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
829
|
+
request_model)
|
830
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
831
|
+
"image")
|
832
|
+
|
833
|
+
# Set Span attributes (Extras)
|
834
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
827
835
|
environment)
|
828
|
-
span.set_attribute(
|
836
|
+
span.set_attribute(SERVICE_NAME,
|
829
837
|
application_name)
|
830
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
831
|
-
kwargs.get("model", "dall-e-2"))
|
832
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
833
|
-
kwargs.get("user", ""))
|
834
838
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
835
839
|
kwargs.get("size", "1024x1024"))
|
836
840
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
|
837
841
|
"standard")
|
842
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
843
|
+
kwargs.get("user", ""))
|
844
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
845
|
+
version)
|
846
|
+
|
838
847
|
if trace_content:
|
839
|
-
attribute_name = f"
|
848
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
840
849
|
span.add_event(
|
841
850
|
name=attribute_name,
|
842
851
|
attributes={
|
@@ -851,21 +860,20 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
851
860
|
span.set_status(Status(StatusCode.OK))
|
852
861
|
|
853
862
|
if disable_metrics is False:
|
854
|
-
attributes =
|
855
|
-
|
856
|
-
|
857
|
-
SemanticConvetion.
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
SemanticConvetion.GEN_AI_TYPE_IMAGE,
|
865
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
866
|
-
kwargs.get("model", "dall-e-2")
|
867
|
-
}
|
863
|
+
attributes = create_metrics_attributes(
|
864
|
+
service_name=application_name,
|
865
|
+
deployment_environment=environment,
|
866
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
867
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
868
|
+
request_model=request_model,
|
869
|
+
server_address=server_address,
|
870
|
+
server_port=server_port,
|
871
|
+
response_model=request_model,
|
872
|
+
)
|
868
873
|
|
874
|
+
metrics["genai_client_operation_duration"].record(
|
875
|
+
end_time - start_time, attributes
|
876
|
+
)
|
869
877
|
metrics["genai_requests"].add(1, attributes)
|
870
878
|
metrics["genai_cost"].record(cost, attributes)
|
871
879
|
|
@@ -881,13 +889,12 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
|
|
881
889
|
|
882
890
|
return wrapper
|
883
891
|
|
884
|
-
def audio_create(
|
892
|
+
def audio_create(version, environment, application_name,
|
885
893
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
886
894
|
"""
|
887
895
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
888
896
|
|
889
897
|
Args:
|
890
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
891
898
|
version: Version of the monitoring package.
|
892
899
|
environment: Deployment environment (e.g., production, staging).
|
893
900
|
application_name: Name of the application using the OpenAI API.
|
@@ -916,28 +923,42 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
916
923
|
The response from the original 'audio.speech.create' method.
|
917
924
|
"""
|
918
925
|
|
919
|
-
|
926
|
+
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
927
|
+
request_model = kwargs.get("model", "tts-1")
|
928
|
+
|
929
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
|
930
|
+
|
931
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
932
|
+
start_time = time.time()
|
920
933
|
response = wrapped(*args, **kwargs)
|
934
|
+
end_time = time.time()
|
921
935
|
|
922
936
|
try:
|
923
937
|
# Calculate cost of the operation
|
924
|
-
cost = get_audio_model_cost(
|
938
|
+
cost = get_audio_model_cost(request_model,
|
925
939
|
pricing_info, kwargs.get("input", ""))
|
926
940
|
|
927
941
|
# Set Span attributes
|
928
942
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
943
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
944
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
|
929
945
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
930
946
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
931
|
-
span.set_attribute(SemanticConvetion.
|
932
|
-
|
933
|
-
span.set_attribute(SemanticConvetion.
|
934
|
-
|
935
|
-
span.set_attribute(SemanticConvetion.
|
947
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
948
|
+
request_model)
|
949
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
950
|
+
server_address)
|
951
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
952
|
+
server_port)
|
953
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
954
|
+
request_model)
|
955
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
956
|
+
"speech")
|
957
|
+
|
958
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
936
959
|
environment)
|
937
|
-
span.set_attribute(
|
960
|
+
span.set_attribute(SERVICE_NAME,
|
938
961
|
application_name)
|
939
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
940
|
-
kwargs.get("model", "tts-1"))
|
941
962
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
|
942
963
|
kwargs.get("voice", "alloy"))
|
943
964
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
|
@@ -946,6 +967,8 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
946
967
|
kwargs.get("speed", 1))
|
947
968
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
948
969
|
cost)
|
970
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
971
|
+
version)
|
949
972
|
if trace_content:
|
950
973
|
span.add_event(
|
951
974
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -957,21 +980,20 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
957
980
|
span.set_status(Status(StatusCode.OK))
|
958
981
|
|
959
982
|
if disable_metrics is False:
|
960
|
-
attributes =
|
961
|
-
|
962
|
-
|
963
|
-
SemanticConvetion.
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
SemanticConvetion.GEN_AI_TYPE_AUDIO,
|
971
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
972
|
-
kwargs.get("model", "tts-1")
|
973
|
-
}
|
983
|
+
attributes = create_metrics_attributes(
|
984
|
+
service_name=application_name,
|
985
|
+
deployment_environment=environment,
|
986
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
|
987
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
988
|
+
request_model=request_model,
|
989
|
+
server_address=server_address,
|
990
|
+
server_port=server_port,
|
991
|
+
response_model=request_model,
|
992
|
+
)
|
974
993
|
|
994
|
+
metrics["genai_client_operation_duration"].record(
|
995
|
+
end_time - start_time, attributes
|
996
|
+
)
|
975
997
|
metrics["genai_requests"].add(1, attributes)
|
976
998
|
metrics["genai_cost"].record(cost, attributes)
|
977
999
|
|