openlit 1.29.1__py3-none-any.whl → 1.30.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +14 -0
- openlit/__init__.py +11 -3
- openlit/evals/utils.py +5 -2
- openlit/guard/utils.py +5 -1
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -1
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -1
- openlit/instrumentation/crewai/__init__.py +50 -0
- openlit/instrumentation/crewai/crewai.py +149 -0
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -1
- openlit/instrumentation/litellm/__init__.py +54 -0
- openlit/instrumentation/litellm/async_litellm.py +406 -0
- openlit/instrumentation/litellm/litellm.py +406 -0
- openlit/instrumentation/openai/async_openai.py +236 -170
- openlit/instrumentation/openai/openai.py +208 -156
- openlit/otel/metrics.py +6 -3
- openlit/otel/tracing.py +5 -2
- openlit/semcov/__init__.py +21 -0
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/METADATA +11 -11
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/RECORD +21 -16
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/LICENSE +0 -0
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/WHEEL +0 -0
@@ -6,8 +6,15 @@ Module for monitoring OpenAI API calls.
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
8
|
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
-
from openlit.__helpers import
|
10
|
-
|
9
|
+
from openlit.__helpers import (
|
10
|
+
get_chat_model_cost,
|
11
|
+
get_embed_model_cost,
|
12
|
+
get_audio_model_cost,
|
13
|
+
get_image_model_cost,
|
14
|
+
openai_tokens,
|
15
|
+
handle_exception,
|
16
|
+
response_as_dict,
|
17
|
+
)
|
11
18
|
from openlit.semcov import SemanticConvetion
|
12
19
|
|
13
20
|
# Initialize logger for logging potential issues and operations
|
@@ -31,10 +38,184 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
31
38
|
A function that wraps the chat completions method to add telemetry.
|
32
39
|
"""
|
33
40
|
|
41
|
+
class TracedAsyncStream:
|
42
|
+
"""
|
43
|
+
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
+
Wraps the 'openai.AsyncStream' response to collect message IDs and aggregated response.
|
45
|
+
|
46
|
+
This class implements the '__aiter__' and '__anext__' methods that
|
47
|
+
handle asynchronous streaming responses.
|
48
|
+
|
49
|
+
This class also implements '__aenter__' and '__aexit__' methods that
|
50
|
+
handle asynchronous context management protocol.
|
51
|
+
"""
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
wrapped,
|
55
|
+
span,
|
56
|
+
kwargs,
|
57
|
+
**args,
|
58
|
+
):
|
59
|
+
self.__wrapped__ = wrapped
|
60
|
+
self._span = span
|
61
|
+
# Placeholder for aggregating streaming response
|
62
|
+
self._llmresponse = ""
|
63
|
+
self._response_id = ""
|
64
|
+
|
65
|
+
self._args = args
|
66
|
+
self._kwargs = kwargs
|
67
|
+
|
68
|
+
async def __aenter__(self):
|
69
|
+
await self.__wrapped__.__aenter__()
|
70
|
+
return self
|
71
|
+
|
72
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
73
|
+
await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
|
74
|
+
|
75
|
+
def __aiter__(self):
|
76
|
+
return self
|
77
|
+
|
78
|
+
async def __getattr__(self, name):
|
79
|
+
"""Delegate attribute access to the wrapped object."""
|
80
|
+
return getattr(await self.__wrapped__, name)
|
81
|
+
|
82
|
+
async def __anext__(self):
|
83
|
+
try:
|
84
|
+
chunk = await self.__wrapped__.__anext__()
|
85
|
+
chunked = response_as_dict(chunk)
|
86
|
+
# Collect message IDs and aggregated response from events
|
87
|
+
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
88
|
+
'content' in chunked.get('choices')[0].get('delta'))):
|
89
|
+
|
90
|
+
content = chunked.get('choices')[0].get('delta').get('content')
|
91
|
+
if content:
|
92
|
+
self._llmresponse += content
|
93
|
+
self._response_id = chunked.get('id')
|
94
|
+
return chunk
|
95
|
+
except StopAsyncIteration:
|
96
|
+
# Handling exception ensure observability without disrupting operation
|
97
|
+
try:
|
98
|
+
# Format 'messages' into a single string
|
99
|
+
message_prompt = self._kwargs.get("messages", "")
|
100
|
+
formatted_messages = []
|
101
|
+
for message in message_prompt:
|
102
|
+
role = message["role"]
|
103
|
+
content = message["content"]
|
104
|
+
|
105
|
+
if isinstance(content, list):
|
106
|
+
content_str = ", ".join(
|
107
|
+
# pylint: disable=line-too-long
|
108
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
109
|
+
if "type" in item else f'text: {item["text"]}'
|
110
|
+
for item in content
|
111
|
+
)
|
112
|
+
formatted_messages.append(f"{role}: {content_str}")
|
113
|
+
else:
|
114
|
+
formatted_messages.append(f"{role}: {content}")
|
115
|
+
prompt = "\n".join(formatted_messages)
|
116
|
+
|
117
|
+
# Calculate tokens using input prompt and aggregated response
|
118
|
+
prompt_tokens = openai_tokens(prompt,
|
119
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
120
|
+
completion_tokens = openai_tokens(self._llmresponse,
|
121
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
122
|
+
|
123
|
+
# Calculate cost of the operation
|
124
|
+
cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
|
125
|
+
pricing_info, prompt_tokens,
|
126
|
+
completion_tokens)
|
127
|
+
|
128
|
+
# Set Span attributes
|
129
|
+
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
130
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
131
|
+
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
132
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
133
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
134
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
135
|
+
gen_ai_endpoint)
|
136
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
137
|
+
self._response_id)
|
138
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
139
|
+
environment)
|
140
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
141
|
+
application_name)
|
142
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
143
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
144
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
145
|
+
self._kwargs.get("user", ""))
|
146
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
147
|
+
self._kwargs.get("top_p", 1.0))
|
148
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
149
|
+
self._kwargs.get("max_tokens", -1))
|
150
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
151
|
+
self._kwargs.get("temperature", 1.0))
|
152
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
153
|
+
self._kwargs.get("presence_penalty", 0.0))
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
155
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
156
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
157
|
+
self._kwargs.get("seed", ""))
|
158
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
159
|
+
True)
|
160
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
161
|
+
prompt_tokens)
|
162
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
163
|
+
completion_tokens)
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
165
|
+
prompt_tokens + completion_tokens)
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
167
|
+
cost)
|
168
|
+
if trace_content:
|
169
|
+
self._span.add_event(
|
170
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
171
|
+
attributes={
|
172
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
173
|
+
},
|
174
|
+
)
|
175
|
+
self._span.add_event(
|
176
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
177
|
+
attributes={
|
178
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
179
|
+
},
|
180
|
+
)
|
181
|
+
|
182
|
+
self._span.set_status(Status(StatusCode.OK))
|
183
|
+
|
184
|
+
if disable_metrics is False:
|
185
|
+
attributes = {
|
186
|
+
TELEMETRY_SDK_NAME:
|
187
|
+
"openlit",
|
188
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
189
|
+
application_name,
|
190
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
191
|
+
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
192
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
193
|
+
environment,
|
194
|
+
SemanticConvetion.GEN_AI_TYPE:
|
195
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
196
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
197
|
+
self._kwargs.get("model", "gpt-3.5-turbo")
|
198
|
+
}
|
199
|
+
|
200
|
+
metrics["genai_requests"].add(1, attributes)
|
201
|
+
metrics["genai_total_tokens"].add(
|
202
|
+
prompt_tokens + completion_tokens, attributes
|
203
|
+
)
|
204
|
+
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
205
|
+
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
206
|
+
metrics["genai_cost"].record(cost, attributes)
|
207
|
+
|
208
|
+
except Exception as e:
|
209
|
+
handle_exception(self._span, e)
|
210
|
+
logger.error("Error in trace creation: %s", e)
|
211
|
+
finally:
|
212
|
+
self._span.end()
|
213
|
+
raise
|
214
|
+
|
34
215
|
async def wrapper(wrapped, instance, args, kwargs):
|
35
216
|
"""
|
36
217
|
Wraps the 'chat.completions' API call to add telemetry.
|
37
|
-
|
218
|
+
|
38
219
|
This collects metrics such as execution time, cost, and token usage, and handles errors
|
39
220
|
gracefully, adding details to the trace for observability.
|
40
221
|
|
@@ -54,140 +235,10 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
54
235
|
# pylint: disable=no-else-return
|
55
236
|
if streaming:
|
56
237
|
# Special handling for streaming response to accommodate the nature of data flow
|
57
|
-
|
58
|
-
|
59
|
-
# Placeholder for aggregating streaming response
|
60
|
-
llmresponse = ""
|
61
|
-
|
62
|
-
# Loop through streaming events capturing relevant details
|
63
|
-
async for chunk in await wrapped(*args, **kwargs):
|
64
|
-
# Collect message IDs and aggregated response from events
|
65
|
-
if len(chunk.choices) > 0:
|
66
|
-
# pylint: disable=line-too-long
|
67
|
-
if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
|
68
|
-
content = chunk.choices[0].delta.content
|
69
|
-
if content:
|
70
|
-
llmresponse += content
|
71
|
-
yield chunk
|
72
|
-
response_id = chunk.id
|
73
|
-
|
74
|
-
# Handling exception ensure observability without disrupting operation
|
75
|
-
try:
|
76
|
-
# Format 'messages' into a single string
|
77
|
-
message_prompt = kwargs.get("messages", "")
|
78
|
-
formatted_messages = []
|
79
|
-
for message in message_prompt:
|
80
|
-
role = message["role"]
|
81
|
-
content = message["content"]
|
82
|
-
|
83
|
-
if isinstance(content, list):
|
84
|
-
content_str = ", ".join(
|
85
|
-
# pylint: disable=line-too-long
|
86
|
-
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
87
|
-
if "type" in item else f'text: {item["text"]}'
|
88
|
-
for item in content
|
89
|
-
)
|
90
|
-
formatted_messages.append(f"{role}: {content_str}")
|
91
|
-
else:
|
92
|
-
formatted_messages.append(f"{role}: {content}")
|
93
|
-
prompt = "\n".join(formatted_messages)
|
94
|
-
|
95
|
-
# Calculate tokens using input prompt and aggregated response
|
96
|
-
prompt_tokens = openai_tokens(prompt,
|
97
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
98
|
-
completion_tokens = openai_tokens(llmresponse,
|
99
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
238
|
+
awaited_wrapped = await wrapped(*args, **kwargs)
|
239
|
+
span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
|
100
240
|
|
101
|
-
|
102
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
103
|
-
pricing_info, prompt_tokens,
|
104
|
-
completion_tokens)
|
105
|
-
|
106
|
-
# Set Span attributes
|
107
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
108
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
109
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
110
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
111
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
112
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
113
|
-
gen_ai_endpoint)
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
115
|
-
response_id)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
117
|
-
environment)
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
119
|
-
application_name)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
121
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
123
|
-
kwargs.get("user", ""))
|
124
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
125
|
-
kwargs.get("top_p", 1.0))
|
126
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
127
|
-
kwargs.get("max_tokens", -1))
|
128
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
129
|
-
kwargs.get("temperature", 1.0))
|
130
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
131
|
-
kwargs.get("presence_penalty", 0.0))
|
132
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
133
|
-
kwargs.get("frequency_penalty", 0.0))
|
134
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
135
|
-
kwargs.get("seed", ""))
|
136
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
137
|
-
True)
|
138
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
139
|
-
prompt_tokens)
|
140
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
141
|
-
completion_tokens)
|
142
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
143
|
-
prompt_tokens + completion_tokens)
|
144
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
145
|
-
cost)
|
146
|
-
if trace_content:
|
147
|
-
span.add_event(
|
148
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
149
|
-
attributes={
|
150
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
151
|
-
},
|
152
|
-
)
|
153
|
-
span.add_event(
|
154
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
155
|
-
attributes={
|
156
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
157
|
-
},
|
158
|
-
)
|
159
|
-
|
160
|
-
span.set_status(Status(StatusCode.OK))
|
161
|
-
|
162
|
-
if disable_metrics is False:
|
163
|
-
attributes = {
|
164
|
-
TELEMETRY_SDK_NAME:
|
165
|
-
"openlit",
|
166
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
167
|
-
application_name,
|
168
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
169
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
170
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
171
|
-
environment,
|
172
|
-
SemanticConvetion.GEN_AI_TYPE:
|
173
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
174
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
175
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
176
|
-
}
|
177
|
-
|
178
|
-
metrics["genai_requests"].add(1, attributes)
|
179
|
-
metrics["genai_total_tokens"].add(
|
180
|
-
prompt_tokens + completion_tokens, attributes
|
181
|
-
)
|
182
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
183
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
184
|
-
metrics["genai_cost"].record(cost, attributes)
|
185
|
-
|
186
|
-
except Exception as e:
|
187
|
-
handle_exception(span, e)
|
188
|
-
logger.error("Error in trace creation: %s", e)
|
189
|
-
|
190
|
-
return stream_generator()
|
241
|
+
return TracedAsyncStream(awaited_wrapped, span, kwargs)
|
191
242
|
|
192
243
|
# Handling for non-streaming responses
|
193
244
|
else:
|
@@ -195,6 +246,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
195
246
|
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
196
247
|
response = await wrapped(*args, **kwargs)
|
197
248
|
|
249
|
+
response_dict = response_as_dict(response)
|
250
|
+
|
198
251
|
try:
|
199
252
|
# Format 'messages' into a single string
|
200
253
|
message_prompt = kwargs.get("messages", "")
|
@@ -224,7 +277,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
224
277
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
225
278
|
gen_ai_endpoint)
|
226
279
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
227
|
-
|
280
|
+
response_dict.get("id"))
|
228
281
|
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
229
282
|
environment)
|
230
283
|
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
@@ -255,23 +308,21 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
255
308
|
},
|
256
309
|
)
|
257
310
|
|
258
|
-
span.set_status(Status(StatusCode.OK))
|
259
|
-
|
260
311
|
# Set span attributes when tools is not passed to the function call
|
261
312
|
if "tools" not in kwargs:
|
262
313
|
# Calculate cost of the operation
|
263
314
|
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
264
|
-
pricing_info,
|
265
|
-
|
315
|
+
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
316
|
+
response_dict.get('usage', {}).get('completion_tokens', None))
|
266
317
|
|
267
318
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
268
|
-
|
319
|
+
response_dict.get('usage', {}).get('prompt_tokens', None))
|
269
320
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
270
|
-
|
321
|
+
response_dict.get('usage', {}).get('completion_tokens', None))
|
271
322
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
272
|
-
|
323
|
+
response_dict.get('usage', {}).get('total_tokens', None))
|
273
324
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
274
|
-
|
325
|
+
[response_dict.get('choices', [])[0].get('finish_reason', None)])
|
275
326
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
276
327
|
cost)
|
277
328
|
|
@@ -281,7 +332,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
281
332
|
span.add_event(
|
282
333
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
283
334
|
attributes={
|
284
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
335
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
|
285
336
|
},
|
286
337
|
)
|
287
338
|
|
@@ -293,7 +344,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
293
344
|
span.add_event(
|
294
345
|
name=attribute_name,
|
295
346
|
attributes={
|
296
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
347
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
297
348
|
},
|
298
349
|
)
|
299
350
|
i += 1
|
@@ -305,9 +356,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
305
356
|
elif "tools" in kwargs:
|
306
357
|
# Calculate cost of the operation
|
307
358
|
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
308
|
-
pricing_info,
|
309
|
-
|
310
|
-
|
359
|
+
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
360
|
+
response_dict.get('usage').get('completion_tokens'))
|
311
361
|
span.add_event(
|
312
362
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
313
363
|
attributes={
|
@@ -315,11 +365,11 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
315
365
|
},
|
316
366
|
)
|
317
367
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
318
|
-
|
368
|
+
response_dict.get('usage').get('prompt_tokens'))
|
319
369
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
320
|
-
|
370
|
+
response_dict.get('usage').get('completion_tokens'))
|
321
371
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
322
|
-
|
372
|
+
response_dict.get('usage').get('total_tokens'))
|
323
373
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
324
374
|
cost)
|
325
375
|
|
@@ -342,9 +392,9 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
|
|
342
392
|
}
|
343
393
|
|
344
394
|
metrics["genai_requests"].add(1, attributes)
|
345
|
-
metrics["genai_total_tokens"].add(
|
346
|
-
metrics["genai_completion_tokens"].add(
|
347
|
-
metrics["genai_prompt_tokens"].add(
|
395
|
+
metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
|
396
|
+
metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
|
397
|
+
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
348
398
|
metrics["genai_cost"].record(cost, attributes)
|
349
399
|
|
350
400
|
# Return original response
|
@@ -363,7 +413,7 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
363
413
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
364
414
|
"""
|
365
415
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
366
|
-
|
416
|
+
|
367
417
|
Args:
|
368
418
|
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
369
419
|
version: Version of the monitoring package.
|
@@ -372,7 +422,7 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
372
422
|
tracer: OpenTelemetry tracer for creating spans.
|
373
423
|
pricing_info: Information used for calculating the cost of OpenAI usage.
|
374
424
|
trace_content: Flag indicating whether to trace the actual content.
|
375
|
-
|
425
|
+
|
376
426
|
Returns:
|
377
427
|
A function that wraps the embeddings method to add telemetry.
|
378
428
|
"""
|
@@ -418,8 +468,8 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
|
|
418
468
|
kwargs.get("model", "text-embedding-ada-002"))
|
419
469
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
420
470
|
kwargs.get("encoding_format", "float"))
|
421
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
422
|
-
|
471
|
+
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
472
|
+
# kwargs.get("dimensions", "null"))
|
423
473
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
424
474
|
kwargs.get("user", ""))
|
425
475
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
@@ -475,7 +525,7 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
|
|
475
525
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
476
526
|
"""
|
477
527
|
Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
|
478
|
-
|
528
|
+
|
479
529
|
Args:
|
480
530
|
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
481
531
|
version: Version of the monitoring package.
|
@@ -484,7 +534,7 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
|
|
484
534
|
tracer: OpenTelemetry tracer for creating spans.
|
485
535
|
pricing_info: Information used for calculating the cost of OpenAI usage.
|
486
536
|
trace_content: Flag indicating whether to trace the actual content.
|
487
|
-
|
537
|
+
|
488
538
|
Returns:
|
489
539
|
A function that wraps the fine tuning creation method to add telemetry.
|
490
540
|
"""
|
@@ -509,13 +559,14 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
|
|
509
559
|
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
510
560
|
response = await wrapped(*args, **kwargs)
|
511
561
|
|
562
|
+
# Handling exception ensure observability without disrupting operation
|
512
563
|
try:
|
513
564
|
# Set Span attributes
|
514
565
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
515
566
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
516
567
|
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
517
568
|
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
518
|
-
|
569
|
+
SemanticConvetion.GEN_AI_TYPE_FINETUNING)
|
519
570
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
520
571
|
gen_ai_endpoint)
|
521
572
|
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
@@ -546,7 +597,22 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
|
|
546
597
|
span.set_status(Status(StatusCode.OK))
|
547
598
|
|
548
599
|
if disable_metrics is False:
|
549
|
-
|
600
|
+
attributes = {
|
601
|
+
TELEMETRY_SDK_NAME:
|
602
|
+
"openlit",
|
603
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
604
|
+
application_name,
|
605
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
606
|
+
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
607
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
608
|
+
environment,
|
609
|
+
SemanticConvetion.GEN_AI_TYPE:
|
610
|
+
SemanticConvetion.GEN_AI_TYPE_FINETUNING,
|
611
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
612
|
+
kwargs.get("model", "gpt-3.5-turbo")
|
613
|
+
}
|
614
|
+
|
615
|
+
metrics["genai_requests"].add(1, attributes)
|
550
616
|
|
551
617
|
# Return original response
|
552
618
|
return response
|
@@ -564,7 +630,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
564
630
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
565
631
|
"""
|
566
632
|
Generates a telemetry wrapper for image generation to collect metrics.
|
567
|
-
|
633
|
+
|
568
634
|
Args:
|
569
635
|
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
570
636
|
version: Version of the monitoring package.
|
@@ -573,7 +639,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
|
|
573
639
|
tracer: OpenTelemetry tracer for creating spans.
|
574
640
|
pricing_info: Information used for calculating the cost of OpenAI image generation.
|
575
641
|
trace_content: Flag indicating whether to trace the input prompt and generated images.
|
576
|
-
|
642
|
+
|
577
643
|
Returns:
|
578
644
|
A function that wraps the image generation method to add telemetry.
|
579
645
|
"""
|
@@ -694,7 +760,7 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
694
760
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
695
761
|
"""
|
696
762
|
Generates a telemetry wrapper for creating image variations to collect metrics.
|
697
|
-
|
763
|
+
|
698
764
|
Args:
|
699
765
|
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
700
766
|
version: Version of the monitoring package.
|
@@ -703,7 +769,7 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
|
|
703
769
|
tracer: OpenTelemetry tracer for creating spans.
|
704
770
|
pricing_info: Information used for calculating the cost of generating image variations.
|
705
771
|
trace_content: Flag indicating whether to trace the input image and generated variations.
|
706
|
-
|
772
|
+
|
707
773
|
Returns:
|
708
774
|
A function that wraps the image variations creation method to add telemetry.
|
709
775
|
"""
|
@@ -813,7 +879,7 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
813
879
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
814
880
|
"""
|
815
881
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
816
|
-
|
882
|
+
|
817
883
|
Args:
|
818
884
|
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
819
885
|
version: Version of the monitoring package.
|
@@ -822,7 +888,7 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
|
|
822
888
|
tracer: OpenTelemetry tracer for creating spans.
|
823
889
|
pricing_info: Information used for calculating the cost of generating speech audio.
|
824
890
|
trace_content: Flag indicating whether to trace the input text and generated audio.
|
825
|
-
|
891
|
+
|
826
892
|
Returns:
|
827
893
|
A function that wraps the speech audio creation method to add telemetry.
|
828
894
|
"""
|